import MultipleChoiceQuestion from "../../../../Components/MultipleChoiceQuestion/MultipleChoiceQuestion";
import LessonTemplate, { LessonState, LessonTemplateProps } from "../../../../Components/LessonTemplate/LessonTemplate";
import { JSX } from "react/jsx-runtime";
import MarkdownTextView from "../../../../Components/MarkdownTextView/MarkdownTextView";
import { Fragment } from "react";

interface State extends LessonState {
    currentPage: number
}

class C2_ML2 extends LessonTemplate<State> {
    constructor(props: LessonTemplateProps) {
        super(props, 5, "What is Machine Learning?")

        this.state = {
            currentPage: 1
        }
    }

    getPageData(index: number): JSX.Element {
        if (index === 0) {
            return <Fragment>
                <MarkdownTextView rawText={"### Diagnostic test\nBefore you start reading, please fill out the [diagnostic quiz](https://docs.google.com/forms/d/e/1FAIpQLScJpRJJwaLZjplC9YKh0BDC_Ul9hnkLSk_5VtyipDWIZAotAg/viewform?usp=sf_link). This is meant to see where your starting point is and provide a basis for measuring your learning. It is fine to skip any question that you are unsure of."} />
            </Fragment>
        } else if (index === 1) {
            return <Fragment>
                <MarkdownTextView rawText={"### What is Machine Learning\n<span style='color:#4069a0'>**Machine learning is the process through which an AI system gains its intelligence from data**</span>. An AI system like ChatGPT is not born with intelligence. Instead, its intelligence is gradually gained as it sips through tons of data.\n\nLearning from data is not as straightforward as it sounds. An AI system is a huge machine with lots of controls (e.g. knobs, switches) called **parameters**. Each control has a numerical state. Some combinations of those control states lead to intelligent behaviors, and is what we want to get at. But such states are very rare to find. The challenge of machine learning is to come up with reliable and efficient strategies to arrive at those optimal states for different kinds of AI systems, using data. This process is called **training**. To be clear, different AI models (e.g. decision tree, support vector machine, linear regressor, neural network) learn from data in different ways, and they are all part of the field of machine learning."} enableHTML />

                {/* <img src="/assets/chapter2/knobs.jpg" width={500} className="centered" />
                <div className="image-caption">A professional speaker has lots of controls. Imagine an AI system having billions of such controls. Machine learning is the science of how to find an optimal combination of values for those controls.</div> */}

                <MultipleChoiceQuestion prompt="Do you know how many parameters ChatGPT (GPT 3.5 version) has? Take a guess." options={["Fewer than a million", "Between 1 million and 1 billion", "Between 1 billion and 100 billion", "More than 100 billion"]} correctIndex={3} explanation="GPT 3.5 has 175 billion parameters. Natural language is hard for machines to learn. Today's chatbots are typically powered by hundreds of billions of parameters. Look, if they are not this large, they wouldn't be called “Large Language Models” right?"/>
            </Fragment>
        } else if (index === 2) {
            return <Fragment>
                <MarkdownTextView rawText={"### Neural Networks\nOne popular type of machine learning models is the **neural network**. In neural networks, many simple units, called *neurons*, are interconnected into larger structures in a way that loosely resembles the human brain. The network’s behavior is determined by the weights of the links that interconnect the neurons. These weights make up the parameters of the neural network. When a neural network is first created, we start by **initializing** its parameters to random values."}/>

                <MarkdownTextView rawText={"### Loss and Loss Function\nWe assess a neural network by comparing its **prediction** with the correct answer, a.k.a. the **label**. For example, in sentiment prediction, the neural network needs to give a score between -1 to 1 indicating how positive it thinks a product review is. Say that for one review, the model predicts 0.2 and the actual label is 1 (meaning that the review is positive but the model is right but not very confident). To quantify how bad the current prediction is, we need some **loss function** that measures the error between the prediction 0.2 and target 1. A popular choice is the square function because it has nice mathematical properties, although other ones exist. After taking the square, we get $(0.2 - 1)^2 = 0.64$, which is called the **loss**."} />

                <MultipleChoiceQuestion prompt="Based on the text above, what do you think is true about loss functions?" options={["Loss can be negative", "Loss functions have at least two inputs", "Large loss is bad", "All of the above"]} correctIndex={3} explanation="There is no strict rule that the loss can't be negative, as long as it's bounded below. A loss function need at least 2 inputs, one for the prediction, one for the actual value, in order to measure their distance. Large loss is bad because we want the error of the prediction to be as small as possible."/>
            </Fragment>
        } else if (index === 3) {
            return <Fragment>
                <MarkdownTextView rawText={"### Gradient Descent and Loss Minimization\nThe loss of neural networks is minimized through an algorithm called *gradient descent*. Given the current state of a neural network, the algorithm calculates (through automatic differentiation) which direction it should adjust each of its parameters in order to result in the sharpest decrease in loss, and move a small step in that direction. This procedure of searching for an optimal state according to some objective is called **optimization**. The step size, called **learning rate**, is chosen by the programmer."} />

                <img src="/assets/chapter2/gradient descent.png" width={500} className="centered"/>
                <div className="image-caption">Illustration of gradient descent. The surface represents the value of the loss function for different state of the neural network model. The model starts somewhere and keeps moving in the direction with the steepest slope in small steps.</div>

                <MarkdownTextView rawText="The value of the loss function for different model states is called the **loss landscape**. The direction of the slope at a point is called **gradient**, hence the name *gradient* descent." />

                <MarkdownTextView rawText={"### The Danger of Local Minima and Relationship with Learning Rate\nA local minimum is a point that has a minimal value compared to its surrounding region (forming a pocket in the loss landscape), but not necessarily the smallest among all possible points. When doing gradient descent, our goal is always to find the **global minimum**, but sometimes, unavoidably, we end up in a local minimum. This is a limitation of gradient descent. Increasing the learning rate can reduce the chance of getting stuck in small local minima pockets, but at the risk of reduced precision when narrowing down on a global minimum. If the only global minimum of a neural network is in a tiny pocket, then with a large learning rate, the neural network could take forever to land inside it."} />

                <img src="/assets/chapter2/local minimum.png" width={500} className="centered" />
                <div className="image-caption">While a larger learning rate can reduce the risk of getting stuck in a local minimum, it increases the risk of not able to land precisely at a global minimum.</div>
            </Fragment>
        } else if (index === 4) {
            return <Fragment>
                <MarkdownTextView rawText={"### Post-completion Quiz\nOnce you have finished reading the text, please complete the [quiz](https://docs.google.com/forms/d/e/1FAIpQLSc8QSIrAlAYVzxpHb4d6aaJ_fMmNV5gLOwlmw9qeC7jyQ06eQ/viewform?usp=sf_link). It contains 10 multiple choice questions and are the same as those on the diagnostic test."}/>
            </Fragment>
        } else {
            return <Fragment />
        }
    }
}

export default C2_ML2;