cm1: Base introduction and some elements of linear algebra

2023-09-27 19:01:42 +02:00 · 2023-09-22 17:32:56 +02:00 · 2023-09-22 17:32:56 +02:00 · a92a13d354
commit a92a13d354
parent 14928631ec
11 changed files with 389 additions and 18 deletions
--- a/6
+++ b/6
@ -0,0 +1,6 @@
+options=-shell-escape -file-line-error
+
+all: main.pdf
+
+%.pdf: %.tex
+	lualatex $(options) $<
--- a/content/chapters/part1/0.tex
+++ b/content/chapters/part1/0.tex
@ -0,0 +1 @@
+\part{}
--- a/content/chapters/part1/1.tex
+++ b/content/chapters/part1/1.tex
@ -0,0 +1,112 @@
+
+
+\section{Generalized Linear Model}
+
+\[
+    g(\EE(Y)) = X \beta
+\]
+with $g$ being
+\begin{itemize}
+    \item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,
+    \item Poission regression: $g(v) = \log(v)$, for instance for discrete variables. 
+\end{itemize}
+
+\subsection{Penalized Regression}
+
+When the number of variables is large, e.g, when the number of explicative variable is above the number of observations, if $p >> n$ ($p$: the number of explicative variable, $n$ is the number of observations), we cannot estimate the parameters.
+In order to estimate the parameters, we can use penalties (additional terms).
+
+Lasso regression, Elastic Net, etc.
+
+\subsection{Simple Linear Model}
+
+\begin{align*}
+    \Y &= \X & \beta & + & \varepsilon.\\
+    n \times 1 & n \times 2 & 2 \times 1 & + & n \times 1 \\
+    \begin{pmatrix}
+        Y_1 \\
+        Y_2 \\
+        \vdots \\
+        Y_n
+    \end{pmatrix}
+    &= \begin{pmatrix}
+        1 & X_1 \\
+        1 & X_2 \\
+        \vdots & \vdots \\
+        1 & X_n
+    \end{pmatrix}
+    & \begin{pmatrix}
+        \beta_0 \\
+        \beta_1
+    \end{pmatrix}
+    & + & 
+    \begin{pmatrix}
+        \varepsilon_1 \\
+        \varepsilon_2 \\
+        \vdots \\
+        \varepsilon_n
+    \end{pmatrix}
+\end{align*}
+
+\subsection{Assumptions}
+
+\begin{itemize}
+    \item 
+\end{itemize}
+
+
+\subsection{Statistical Analysis Workflow}
+
+\begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}]
+    \item Graphical representation;
+    \item ...
+\end{enumerate}
+
+
+\section{Parameter Estimation}
+
+\subsection{Simple Linear Regression}
+
+\subsection{General Case}
+
+If $\X^\T\X$ is invertible, the OLS estimator is:
+\begin{equation}
+\hat{\beta} = (\X^\T\X)^{-1} \X^\T \Y
+\end{equation}
+
+\subsection{Ordinary Least Square Algorithm}
+
+We want to minimize the distance between $\X\beta$ and $\Y$:
+\[
+    \min \norm{\Y - \X\beta}^2
+\]
+(See \autoref{ch:elements-of-linear-algebra}).
+\begin{align*}
+    \Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
+    \Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
+    \Rightarrow& \forall i: \\
+    & \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
+    \Rightarrow& \X^\T \Y = \X^\T \X \hat{\beta} \\
+    \Rightarrow& {\color{red}(\X^T \X)^{-1}} \X^\T \Y = {\color{red}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\
+    \Rightarrow& \hat{\beta} = (X^\T\X)^{-1} \X^\T \Y
+\end{align*}
+
+
+This formula comes from the orthogonal projection of $\Y$ on the subspace define by the explicative variables $\X$
+
+
+
+
+$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.
+
+
+
+If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
+
+
+\section{Coefficient of Determination: $R^2$}
+\begin{definition}[$R^2$]
+    \[
+        0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1
+    \] proportion of variation of $\Y$ explicated by the model.
+\end{definition}
--- a/content/chapters/part1/2.tex
+++ b/content/chapters/part1/2.tex
@ -0,0 +1,212 @@
+\chapter{Elements of Linear Algebra}
+\label{ch:elements-of-linear-algebra}
+
+\begin{remark}[vector]
+    Let $u$ a vector, we will use interchangeably the following notations: $u$ and $\vec{u}$
+\end{remark}
+
+Let $u = \begin{pmatrix}
+        u_1    \\
+        \vdots \\
+        u_n
+    \end{pmatrix}$ and $v = \begin{pmatrix}
+        v_1    \\
+        \vdots \\
+        v_n
+    \end{pmatrix}$
+
+\begin{align*}
+    \langle u, v\rangle & = \left(u_1, \ldots, u_v\right) \begin{pmatrix}
+                                                              v_1    \\
+                                                              \vdots \\
+                                                              v_n
+                                                          \end{pmatrix} \\
+                        & = u_1 v_1 + u_2 v_2 + \ldots + u_n v_n
+\end{align*}
+
+
+\begin{definition}[Norm]
+    Length of the vector.
+    \[
+        \norm{u} = \sqrt{\scalar{u, v}}
+    \]
+
+    $\norm{u, v} > 0$
+\end{definition}
+
+\begin{definition}[Distance]
+    \[
+        dist(u, v) = \norm{u-v}
+    \]
+\end{definition}
+
+\begin{definition}[Orthogonality]
+    \[
+        u \perp v \Leftrightarrow \scalar{u, v} = 0
+    \]
+\end{definition}
+
+\begin{remark}
+    \[
+        (dist(u, v))^2 = \norm{u - v}^2,
+    \] and
+    \[
+        \scalar{v-u, v-u}
+    \]
+\end{remark}
+
+Scalar product properties:
+\begin{itemize}
+    \item $\scalar{u, v} = \scalar{v, u}$
+    \item $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$
+    \item $\scalar{u, v}$
+    \item $\scalar{\vec{u}, \vec{v}} = \norm{\vec{u}} \times \norm{\vec{v}} \times \cos(\widehat{\vec{u}, \vec{v}})$
+\end{itemize}
+
+\begin{align*}
+    \scalar{v-u, v-u} & = \scalar{v, v} + \scalar{u, u} - 2 \scalar{u, v} \\
+                      & = \norm{v}^2 + \norm{u}^2                         \\
+                      & = -2 \scalar{u, v}
+\end{align*}
+
+\begin{align*}
+    \norm{u - v}^2 & = \norm{u}^2 + \norm{v}^2 - 2 \scalar{u,v} \\
+    \norm{u + v}^2 & = \norm{u}^2 + \norm{v}^2 + 2 \scalar{u,v}
+\end{align*}
+
+
+If $u \perp v$, then $\scalar{u, v} = 0$
+\begin{proof}[Indeed]
+    $\norm{u-v}^2 = \norm{u+v}^2$,
+    \begin{align*}
+        \Leftrightarrow & -2 \scalar{u, v} = 2 \scalar{u, v} \\
+        \Leftrightarrow & 4 \scalar{u, v} = 0                \\
+        \Leftrightarrow & \scalar{u, v} = 0
+    \end{align*}
+\end{proof}
+
+\begin{theorem}{Pythagorean theorem}
+    If $u \perp v$, then $\norm{u+v}^2 = \norm{u}^2 + \norm{v}^2$ .
+\end{theorem}
+
+\begin{definition}[Orthogonal Projection]
+
+\end{definition}
+Let $y = \begin{pmatrix}
+        y_1 \\
+        .   \\
+        y_n
+    \end{pmatrix} \in \RR[n]$ and $w$ a subspace of $\RR[n]$
+$\mathcal{Y}$ can be written as the orthogonal projection of $y$ on $w$:
+\[
+    \mathcal{Y} = proj^w(y) + z,
+\]
+where
+\[
+    \begin{cases}
+        z \in w^\perp \\
+        proj^w(y) \in w
+    \end{cases}
+\]
+There is only one vector $\mathcal{Y}$ that ?
+
+The scalar product between $z$ and (?) is zero.
+
+\begin{property}
+    $proj^w(y)$ is the closest vector to $y$ that belongs to $w$.
+\end{property}
+
+\begin{definition}[Matrix]
+    A matrix is an application, that is, a function that transform a thing into another, it is a linear function.
+\end{definition}
+
+\begin{example}[Matrix application]
+
+    Let $A$ be a matrix:
+    \[
+        A = \begin{pmatrix}
+            a & b \\
+            c & d
+        \end{pmatrix}
+    \] and
+    \[
+        x = \begin{pmatrix}
+            x_1 \\
+            x_2
+        \end{pmatrix}
+    \]
+    Then,
+    \begin{align*}
+        Ax & = \begin{pmatrix}
+                   a & b \\
+                   c & d
+               \end{pmatrix}
+        \begin{pmatrix}
+            x_1 \\
+            x_2
+        \end{pmatrix}        \\
+           & = \begin{pmatrix}
+                   a x_1 + b_x2 \\
+                   c x_1 + d x_2
+               \end{pmatrix}
+    \end{align*}
+
+    Similarly,
+    \begin{align*}
+        \begin{pmatrix}
+            a & b & c & d \\
+            e & f & g & h \\
+            i & j & k & l
+        \end{pmatrix}
+        \begin{pmatrix}
+            x_1 \\
+            x_2 \\
+            x_3 \\
+            x_4
+        \end{pmatrix}
+         & = \begin{pmatrix}
+                 a x_1 + b x_2 + c x_3 \ldots
+             \end{pmatrix}
+    \end{align*}
+\end{example}
+
+The number of columns has to be the same as the dimension of the vector to which the matrix is applied.
+
+\begin{definition}[Tranpose of a Matrix]
+    Let $A = \begin{pmatrix}
+        a & b \\
+        c & d
+    \end{pmatrix}$, then $A^\T =  \begin{pmatrix}
+        a & c \\
+        b & d
+    \end{pmatrix}$
+\end{definition}
+
+\begin{example}
+    \begin{align*}
+        Y & = X \beta + \varepsilon \\
+        \begin{pmatrix}
+            y_1 \\
+            y_2 \\
+            y_3 \\
+            y_4
+        \end{pmatrix}
+          & = \begin{pmatrix}
+                  1 & x_{11} & x_{12} \\
+                  1 & x_{21} & x_{22} \\
+                  1 & x_{31} & x_{32} \\
+                  1 & x_{41} & x_{42}
+              \end{pmatrix}
+        \begin{pmatrix}
+            \beta_0 \\
+            \beta_1 \\
+            \beta_2
+        \end{pmatrix} +
+        \begin{pmatrix}
+            \varepsilon_1 \\
+            \varepsilon_2 \\
+            \varepsilon_3 \\
+            \varepsilon_4
+        \end{pmatrix}
+    \end{align*}
+\end{example}
--- a/content/chapters/part1/3.tex
+++ b/content/chapters/part1/3.tex
--- a/content/introduction.tex
+++ b/content/introduction.tex
@ -0,0 +1,25 @@
+\chapter{Introduction}
+
+\begin{definition}[Long Term Nonprocessor (LTNP)]
+    Patient who will remain a long time in good health condition, even with a large viral load (cf. HIV).
+\end{definition}
+
+\begin{example}[Genotype: Qualitative or Quantitative?]
+    \[
+        \text{SNP}:
+        \begin{cases}
+            \text{AA} \\
+            \text{AB} \\
+            \text{BB} 
+        \end{cases}
+        \rightarrow
+        \begin{pmatrix}
+            0 \\
+            1 \\
+            2
+        \end{pmatrix},
+    \] 
+    thus we might consider genotype either as a qualitative variable or quantitative variable.
+\end{example} 
+
+When the variable are quantitative, we use regression, whereas for qualitative variables, we use an analysis of variance.
--- a/figures/schemes/coordinates_systems.pdf
+++ b/figures/schemes/coordinates_systems.pdf
--- a/figures/schemes/coordinates_systems.tex
+++ b/figures/schemes/coordinates_systems.tex
@ -0,0 +1,12 @@
+\documentclass[tikz]{standalone}
+\usepackage{tikz}
+\usepackage{tkz-euclide}
+
+\begin{document}
+\begin{tikzpicture}
+    \tkzInit[xmax=5,ymax=5,xmin=-5,ymin=-5]
+    \tkzGrid
+    \tkzAxeXY
+    \draw[thick, latex-latex] (-1,4) -- (4,-6) node[anchor=south west] {$a$};
+\end{tikzpicture}
+\end{document}
--- a/main.pdf
+++ b/main.pdf
--- a/main.tex
+++ b/main.tex
@ -8,43 +8,43 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

 \documentclass[
-    a4paper,
-    fontsize=10pt,
-    fleqn,
-    oneside
+  a4paper,
+  fontsize=10pt,
+  fleqn,
+  oneside
 ]{scrbook}

 \usepackage{mus}

 \titlehead{GENIOMHE}
-\title{Multivariate Statistics}
+\title{Multivariate\newline{}Statistics}
 \author{Samuel Ortion}
 \teacher{Cyril Dalmasso}
 \cursus{GENIOMHE}
 \university{Université Paris-Saclay, Université d'Évry val d'Essonne}
 \semester{M1 - S1}
-\date{}
+\date{Fall 2023}
+
+\definecolor{myblue}{HTML}{5654fa}
+\colorlet{primary}{myblue}

 \input{definitions}
+\input{preamble}

 \hypersetup{
-    pdftitle={
-        Course - Multivariate Statistics
-    },
-    pdfauthor={
-            Samuel Ortion
-        },
-    pdfsubject={},
-    pdfkeywords={},
-    pdfcreator={LaTeX}
+  pdftitle={Course - Multivariate Statistics},
+  pdfauthor={Samuel Ortion},
+  pdfsubject={},
+  pdfkeywords={},
+  pdfcreator={LaTeX}
 }

 \addbibresource{references}

 \usepackage[
-    type={CC},
-    modifier={by-sa},
-    version={4.0},
+  type={CC},
+  modifier={by-sa},
+  version={4.0},
 ]{doclicense}

 \input{preamble}
--- a/preamble.tex
+++ b/preamble.tex
@ -0,0 +1,3 @@
+\usepackage{pgffor}
+\usetikzlibrary{math}
+\usepackage{standalone}