cm1: Base introduction and some elements of linear algebra

2023-09-27 19:01:42 +02:00 · 2023-09-22 17:32:56 +02:00 · 2023-09-22 17:32:56 +02:00 · a92a13d354
commit a92a13d354
parent 14928631ec
11 changed files with 389 additions and 18 deletions
--- a/6
+++ b/6
@ -0,0 +1,6 @@
 options=-shell-escape -file-line-error
 all: main.pdf
 %.pdf: %.tex
 	lualatex $(options) $<
--- a/content/chapters/part1/0.tex
+++ b/content/chapters/part1/0.tex
@ -0,0 +1 @@
 \part{}
--- a/content/chapters/part1/1.tex
+++ b/content/chapters/part1/1.tex
@ -0,0 +1,112 @@
 \section{Generalized Linear Model}
 \[
    g(\EE(Y)) = X \beta
 \]
 with $g$ being
 \begin{itemize}
    \item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,
    \item Poission regression: $g(v) = \log(v)$, for instance for discrete variables. 
 \end{itemize}
 \subsection{Penalized Regression}
 When the number of variables is large, e.g, when the number of explicative variable is above the number of observations, if $p >> n$ ($p$: the number of explicative variable, $n$ is the number of observations), we cannot estimate the parameters.
 In order to estimate the parameters, we can use penalties (additional terms).
 Lasso regression, Elastic Net, etc.
 \subsection{Simple Linear Model}
 \begin{align*}
    \Y &= \X & \beta & + & \varepsilon.\\
    n \times 1 & n \times 2 & 2 \times 1 & + & n \times 1 \\
    \begin{pmatrix}
        Y_1 \\
        Y_2 \\
        \vdots \\
        Y_n
    \end{pmatrix}
    &= \begin{pmatrix}
        1 & X_1 \\
        1 & X_2 \\
        \vdots & \vdots \\
        1 & X_n
    \end{pmatrix}
    & \begin{pmatrix}
        \beta_0 \\
        \beta_1
    \end{pmatrix}
    & + & 
    \begin{pmatrix}
        \varepsilon_1 \\
        \varepsilon_2 \\
        \vdots \\
        \varepsilon_n
    \end{pmatrix}
 \end{align*}
 \subsection{Assumptions}
 \begin{itemize}
    \item 
 \end{itemize}
 \subsection{Statistical Analysis Workflow}
 \begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}]
    \item Graphical representation;
    \item ...
 \end{enumerate}
 \section{Parameter Estimation}
 \subsection{Simple Linear Regression}
 \subsection{General Case}
 If $\X^\T\X$ is invertible, the OLS estimator is:
 \begin{equation}
 \hat{\beta} = (\X^\T\X)^{-1} \X^\T \Y
 \end{equation}
 \subsection{Ordinary Least Square Algorithm}
 We want to minimize the distance between $\X\beta$ and $\Y$:
 \[
    \min \norm{\Y - \X\beta}^2
 \]
 (See \autoref{ch:elements-of-linear-algebra}).
 \begin{align*}
    \Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
    \Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
    \Rightarrow& \forall i: \\
    & \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
    \Rightarrow& \X^\T \Y = \X^\T \X \hat{\beta} \\
    \Rightarrow& {\color{red}(\X^T \X)^{-1}} \X^\T \Y = {\color{red}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\
    \Rightarrow& \hat{\beta} = (X^\T\X)^{-1} \X^\T \Y
 \end{align*}
 This formula comes from the orthogonal projection of $\Y$ on the subspace define by the explicative variables $\X$
 $\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.
 If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
 \section{Coefficient of Determination: $R^2$}
 \begin{definition}[$R^2$]
    \[
        0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1
    \] proportion of variation of $\Y$ explicated by the model.
 \end{definition}
--- a/content/chapters/part1/2.tex
+++ b/content/chapters/part1/2.tex
@ -0,0 +1,212 @@
 \chapter{Elements of Linear Algebra}
 \label{ch:elements-of-linear-algebra}
 \begin{remark}[vector]
    Let $u$ a vector, we will use interchangeably the following notations: $u$ and $\vec{u}$
 \end{remark}
 Let $u = \begin{pmatrix}
        u_1    \\
        \vdots \\
        u_n
    \end{pmatrix}$ and $v = \begin{pmatrix}
        v_1    \\
        \vdots \\
        v_n
    \end{pmatrix}$
 \begin{align*}
    \langle u, v\rangle & = \left(u_1, \ldots, u_v\right) \begin{pmatrix}
                                                              v_1    \\
                                                              \vdots \\
                                                              v_n
                                                          \end{pmatrix} \\
                        & = u_1 v_1 + u_2 v_2 + \ldots + u_n v_n
 \end{align*}
 \begin{definition}[Norm]
    Length of the vector.
    \[
        \norm{u} = \sqrt{\scalar{u, v}}
    \]
    $\norm{u, v} > 0$
 \end{definition}
 \begin{definition}[Distance]
    \[
        dist(u, v) = \norm{u-v}
    \]
 \end{definition}
 \begin{definition}[Orthogonality]
    \[
        u \perp v \Leftrightarrow \scalar{u, v} = 0
    \]
 \end{definition}
 \begin{remark}
    \[
        (dist(u, v))^2 = \norm{u - v}^2,
    \] and
    \[
        \scalar{v-u, v-u}
    \]
 \end{remark}
 Scalar product properties:
 \begin{itemize}
    \item $\scalar{u, v} = \scalar{v, u}$
    \item $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$
    \item $\scalar{u, v}$
    \item $\scalar{\vec{u}, \vec{v}} = \norm{\vec{u}} \times \norm{\vec{v}} \times \cos(\widehat{\vec{u}, \vec{v}})$
 \end{itemize}
 \begin{align*}
    \scalar{v-u, v-u} & = \scalar{v, v} + \scalar{u, u} - 2 \scalar{u, v} \\
                      & = \norm{v}^2 + \norm{u}^2                         \\
                      & = -2 \scalar{u, v}
 \end{align*}
 \begin{align*}
    \norm{u - v}^2 & = \norm{u}^2 + \norm{v}^2 - 2 \scalar{u,v} \\
    \norm{u + v}^2 & = \norm{u}^2 + \norm{v}^2 + 2 \scalar{u,v}
 \end{align*}
 If $u \perp v$, then $\scalar{u, v} = 0$
 \begin{proof}[Indeed]
    $\norm{u-v}^2 = \norm{u+v}^2$,
    \begin{align*}
        \Leftrightarrow & -2 \scalar{u, v} = 2 \scalar{u, v} \\
        \Leftrightarrow & 4 \scalar{u, v} = 0                \\
        \Leftrightarrow & \scalar{u, v} = 0
    \end{align*}
 \end{proof}
 \begin{theorem}{Pythagorean theorem}
    If $u \perp v$, then $\norm{u+v}^2 = \norm{u}^2 + \norm{v}^2$ .
 \end{theorem}
 \begin{definition}[Orthogonal Projection]
 \end{definition}
 Let $y = \begin{pmatrix}
        y_1 \\
        .   \\
        y_n
    \end{pmatrix} \in \RR[n]$ and $w$ a subspace of $\RR[n]$
 $\mathcal{Y}$ can be written as the orthogonal projection of $y$ on $w$:
 \[
    \mathcal{Y} = proj^w(y) + z,
 \]
 where
 \[
    \begin{cases}
        z \in w^\perp \\
        proj^w(y) \in w
    \end{cases}
 \]
 There is only one vector $\mathcal{Y}$ that ?
 The scalar product between $z$ and (?) is zero.
 \begin{property}
    $proj^w(y)$ is the closest vector to $y$ that belongs to $w$.
 \end{property}
 \begin{definition}[Matrix]
    A matrix is an application, that is, a function that transform a thing into another, it is a linear function.
 \end{definition}
 \begin{example}[Matrix application]
    Let $A$ be a matrix:
    \[
        A = \begin{pmatrix}
            a & b \\
            c & d
        \end{pmatrix}
    \] and
    \[
        x = \begin{pmatrix}
            x_1 \\
            x_2
        \end{pmatrix}
    \]
    Then,
    \begin{align*}
        Ax & = \begin{pmatrix}
                   a & b \\
                   c & d
               \end{pmatrix}
        \begin{pmatrix}
            x_1 \\
            x_2
        \end{pmatrix}        \\
           & = \begin{pmatrix}
                   a x_1 + b_x2 \\
                   c x_1 + d x_2
               \end{pmatrix}
    \end{align*}
    Similarly,
    \begin{align*}
        \begin{pmatrix}
            a & b & c & d \\
            e & f & g & h \\
            i & j & k & l
        \end{pmatrix}
        \begin{pmatrix}
            x_1 \\
            x_2 \\
            x_3 \\
            x_4
        \end{pmatrix}
         & = \begin{pmatrix}
                 a x_1 + b x_2 + c x_3 \ldots
             \end{pmatrix}
    \end{align*}
 \end{example}
 The number of columns has to be the same as the dimension of the vector to which the matrix is applied.
 \begin{definition}[Tranpose of a Matrix]
    Let $A = \begin{pmatrix}
        a & b \\
        c & d
    \end{pmatrix}$, then $A^\T =  \begin{pmatrix}
        a & c \\
        b & d
    \end{pmatrix}$
 \end{definition}
 \begin{example}
    \begin{align*}
        Y & = X \beta + \varepsilon \\
        \begin{pmatrix}
            y_1 \\
            y_2 \\
            y_3 \\
            y_4
        \end{pmatrix}
          & = \begin{pmatrix}
                  1 & x_{11} & x_{12} \\
                  1 & x_{21} & x_{22} \\
                  1 & x_{31} & x_{32} \\
                  1 & x_{41} & x_{42}
              \end{pmatrix}
        \begin{pmatrix}
            \beta_0 \\
            \beta_1 \\
            \beta_2
        \end{pmatrix} +
        \begin{pmatrix}
            \varepsilon_1 \\
            \varepsilon_2 \\
            \varepsilon_3 \\
            \varepsilon_4
        \end{pmatrix}
    \end{align*}
 \end{example}
--- a/content/chapters/part1/3.tex
+++ b/content/chapters/part1/3.tex
--- a/content/introduction.tex
+++ b/content/introduction.tex
@ -0,0 +1,25 @@
 \chapter{Introduction}
 \begin{definition}[Long Term Nonprocessor (LTNP)]
    Patient who will remain a long time in good health condition, even with a large viral load (cf. HIV).
 \end{definition}
 \begin{example}[Genotype: Qualitative or Quantitative?]
    \[
        \text{SNP}:
        \begin{cases}
            \text{AA} \\
            \text{AB} \\
            \text{BB} 
        \end{cases}
        \rightarrow
        \begin{pmatrix}
            0 \\
            1 \\
            2
        \end{pmatrix},
    \] 
    thus we might consider genotype either as a qualitative variable or quantitative variable.
 \end{example} 
 When the variable are quantitative, we use regression, whereas for qualitative variables, we use an analysis of variance.
--- a/figures/schemes/coordinates_systems.pdf
+++ b/figures/schemes/coordinates_systems.pdf
--- a/figures/schemes/coordinates_systems.tex
+++ b/figures/schemes/coordinates_systems.tex
@ -0,0 +1,12 @@
 \documentclass[tikz]{standalone}
 \usepackage{tikz}
 \usepackage{tkz-euclide}
 \begin{document}
 \begin{tikzpicture}
    \tkzInit[xmax=5,ymax=5,xmin=-5,ymin=-5]
    \tkzGrid
    \tkzAxeXY
    \draw[thick, latex-latex] (-1,4) -- (4,-6) node[anchor=south west] {$a$};
 \end{tikzpicture}
 \end{document}
--- a/main.pdf
+++ b/main.pdf
--- a/main.tex
+++ b/main.tex
@ -8,43 +8,43 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \documentclass[
-    a4paper,
+  a4paper,
-    fontsize=10pt,
+  fontsize=10pt,
-    fleqn,
+  fleqn,
-    oneside
+  oneside
 ]{scrbook}
 \usepackage{mus}
 \titlehead{GENIOMHE}
-\title{Multivariate Statistics}
+\title{Multivariate\newline{}Statistics}
 \author{Samuel Ortion}
 \teacher{Cyril Dalmasso}
 \cursus{GENIOMHE}
 \university{Université Paris-Saclay, Université d'Évry val d'Essonne}
 \semester{M1 - S1}
-\date{}
+\date{Fall 2023}
 \definecolor{myblue}{HTML}{5654fa}
 \colorlet{primary}{myblue}
 \input{definitions}
 \input{preamble}
 \hypersetup{
-    pdftitle={
+  pdftitle={Course - Multivariate Statistics},
-        Course - Multivariate Statistics
+  pdfauthor={Samuel Ortion},
-    },
+  pdfsubject={},
-    pdfauthor={
+  pdfkeywords={},
-            Samuel Ortion
+  pdfcreator={LaTeX}
        },
    pdfsubject={},
    pdfkeywords={},
    pdfcreator={LaTeX}
 }
 \addbibresource{references}
 \usepackage[
-    type={CC},
+  type={CC},
-    modifier={by-sa},
+  modifier={by-sa},
-    version={4.0},
+  version={4.0},
 ]{doclicense}
 \input{preamble}
--- a/preamble.tex
+++ b/preamble.tex
@ -0,0 +1,3 @@
 \usepackage{pgffor}
 \usetikzlibrary{math}
 \usepackage{standalone}