mirror of
https://forge.s1gm4.eu/GENIOMHE/multivariate-statistics.git
synced 2023-09-27 19:01:42 +02:00
cm1: Base introduction and some elements of linear algebra
This commit is contained in:
parent
14928631ec
commit
a92a13d354
6
Makefile
Normal file
6
Makefile
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
options=-shell-escape -file-line-error
|
||||||
|
|
||||||
|
all: main.pdf
|
||||||
|
|
||||||
|
%.pdf: %.tex
|
||||||
|
lualatex $(options) $<
|
@ -0,0 +1 @@
|
|||||||
|
\part{}
|
@ -0,0 +1,112 @@
|
|||||||
|
|
||||||
|
|
||||||
|
\section{Generalized Linear Model}
|
||||||
|
|
||||||
|
\[
|
||||||
|
g(\EE(Y)) = X \beta
|
||||||
|
\]
|
||||||
|
with $g$ being
|
||||||
|
\begin{itemize}
|
||||||
|
\item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values,
|
||||||
|
\item Poission regression: $g(v) = \log(v)$, for instance for discrete variables.
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\subsection{Penalized Regression}
|
||||||
|
|
||||||
|
When the number of variables is large, e.g, when the number of explicative variable is above the number of observations, if $p >> n$ ($p$: the number of explicative variable, $n$ is the number of observations), we cannot estimate the parameters.
|
||||||
|
In order to estimate the parameters, we can use penalties (additional terms).
|
||||||
|
|
||||||
|
Lasso regression, Elastic Net, etc.
|
||||||
|
|
||||||
|
\subsection{Simple Linear Model}
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\Y &= \X & \beta & + & \varepsilon.\\
|
||||||
|
n \times 1 & n \times 2 & 2 \times 1 & + & n \times 1 \\
|
||||||
|
\begin{pmatrix}
|
||||||
|
Y_1 \\
|
||||||
|
Y_2 \\
|
||||||
|
\vdots \\
|
||||||
|
Y_n
|
||||||
|
\end{pmatrix}
|
||||||
|
&= \begin{pmatrix}
|
||||||
|
1 & X_1 \\
|
||||||
|
1 & X_2 \\
|
||||||
|
\vdots & \vdots \\
|
||||||
|
1 & X_n
|
||||||
|
\end{pmatrix}
|
||||||
|
& \begin{pmatrix}
|
||||||
|
\beta_0 \\
|
||||||
|
\beta_1
|
||||||
|
\end{pmatrix}
|
||||||
|
& + &
|
||||||
|
\begin{pmatrix}
|
||||||
|
\varepsilon_1 \\
|
||||||
|
\varepsilon_2 \\
|
||||||
|
\vdots \\
|
||||||
|
\varepsilon_n
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
\subsection{Assumptions}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{Statistical Analysis Workflow}
|
||||||
|
|
||||||
|
\begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}]
|
||||||
|
\item Graphical representation;
|
||||||
|
\item ...
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Parameter Estimation}
|
||||||
|
|
||||||
|
\subsection{Simple Linear Regression}
|
||||||
|
|
||||||
|
\subsection{General Case}
|
||||||
|
|
||||||
|
If $\X^\T\X$ is invertible, the OLS estimator is:
|
||||||
|
\begin{equation}
|
||||||
|
\hat{\beta} = (\X^\T\X)^{-1} \X^\T \Y
|
||||||
|
\end{equation}
|
||||||
|
|
||||||
|
\subsection{Ordinary Least Square Algorithm}
|
||||||
|
|
||||||
|
We want to minimize the distance between $\X\beta$ and $\Y$:
|
||||||
|
\[
|
||||||
|
\min \norm{\Y - \X\beta}^2
|
||||||
|
\]
|
||||||
|
(See \autoref{ch:elements-of-linear-algebra}).
|
||||||
|
\begin{align*}
|
||||||
|
\Rightarrow& \X \beta = proj^{(1, \X)} \Y\\
|
||||||
|
\Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\
|
||||||
|
\Rightarrow& \forall i: \\
|
||||||
|
& \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\
|
||||||
|
\Rightarrow& \X^\T \Y = \X^\T \X \hat{\beta} \\
|
||||||
|
\Rightarrow& {\color{red}(\X^T \X)^{-1}} \X^\T \Y = {\color{red}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\
|
||||||
|
\Rightarrow& \hat{\beta} = (X^\T\X)^{-1} \X^\T \Y
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
|
||||||
|
This formula comes from the orthogonal projection of $\Y$ on the subspace define by the explicative variables $\X$
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
$\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
If $H$ is the projection matrix of the subspace generated by $\X$, $X\Y$ is the projection on $\Y$ on this subspace, that corresponds to $\X\hat{\beta}$.
|
||||||
|
|
||||||
|
|
||||||
|
\section{Coefficient of Determination: $R^2$}
|
||||||
|
\begin{definition}[$R^2$]
|
||||||
|
\[
|
||||||
|
0 \leq R^2 = \frac{\norm{\X\hat{\beta} - \bar{\Y}\One}^2}{\norm{\Y - \bar{\Y}\One}^2} = 1 - \frac{\norm{\Y - \X\hat{\beta}}^2}{\norm{\Y - \bar{\Y}\One}^2} \leq 1
|
||||||
|
\] proportion of variation of $\Y$ explicated by the model.
|
||||||
|
\end{definition}
|
212
content/chapters/part1/2.tex
Normal file
212
content/chapters/part1/2.tex
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
\chapter{Elements of Linear Algebra}
|
||||||
|
\label{ch:elements-of-linear-algebra}
|
||||||
|
|
||||||
|
\begin{remark}[vector]
|
||||||
|
Let $u$ a vector, we will use interchangeably the following notations: $u$ and $\vec{u}$
|
||||||
|
\end{remark}
|
||||||
|
|
||||||
|
Let $u = \begin{pmatrix}
|
||||||
|
u_1 \\
|
||||||
|
\vdots \\
|
||||||
|
u_n
|
||||||
|
\end{pmatrix}$ and $v = \begin{pmatrix}
|
||||||
|
v_1 \\
|
||||||
|
\vdots \\
|
||||||
|
v_n
|
||||||
|
\end{pmatrix}$
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\langle u, v\rangle & = \left(u_1, \ldots, u_v\right) \begin{pmatrix}
|
||||||
|
v_1 \\
|
||||||
|
\vdots \\
|
||||||
|
v_n
|
||||||
|
\end{pmatrix} \\
|
||||||
|
& = u_1 v_1 + u_2 v_2 + \ldots + u_n v_n
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{definition}[Norm]
|
||||||
|
Length of the vector.
|
||||||
|
\[
|
||||||
|
\norm{u} = \sqrt{\scalar{u, v}}
|
||||||
|
\]
|
||||||
|
|
||||||
|
$\norm{u, v} > 0$
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Distance]
|
||||||
|
\[
|
||||||
|
dist(u, v) = \norm{u-v}
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Orthogonality]
|
||||||
|
\[
|
||||||
|
u \perp v \Leftrightarrow \scalar{u, v} = 0
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{remark}
|
||||||
|
\[
|
||||||
|
(dist(u, v))^2 = \norm{u - v}^2,
|
||||||
|
\] and
|
||||||
|
\[
|
||||||
|
\scalar{v-u, v-u}
|
||||||
|
\]
|
||||||
|
\end{remark}
|
||||||
|
|
||||||
|
Scalar product properties:
|
||||||
|
\begin{itemize}
|
||||||
|
\item $\scalar{u, v} = \scalar{v, u}$
|
||||||
|
\item $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$
|
||||||
|
\item $\scalar{u, v}$
|
||||||
|
\item $\scalar{\vec{u}, \vec{v}} = \norm{\vec{u}} \times \norm{\vec{v}} \times \cos(\widehat{\vec{u}, \vec{v}})$
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\scalar{v-u, v-u} & = \scalar{v, v} + \scalar{u, u} - 2 \scalar{u, v} \\
|
||||||
|
& = \norm{v}^2 + \norm{u}^2 \\
|
||||||
|
& = -2 \scalar{u, v}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
\begin{align*}
|
||||||
|
\norm{u - v}^2 & = \norm{u}^2 + \norm{v}^2 - 2 \scalar{u,v} \\
|
||||||
|
\norm{u + v}^2 & = \norm{u}^2 + \norm{v}^2 + 2 \scalar{u,v}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
|
||||||
|
If $u \perp v$, then $\scalar{u, v} = 0$
|
||||||
|
\begin{proof}[Indeed]
|
||||||
|
$\norm{u-v}^2 = \norm{u+v}^2$,
|
||||||
|
\begin{align*}
|
||||||
|
\Leftrightarrow & -2 \scalar{u, v} = 2 \scalar{u, v} \\
|
||||||
|
\Leftrightarrow & 4 \scalar{u, v} = 0 \\
|
||||||
|
\Leftrightarrow & \scalar{u, v} = 0
|
||||||
|
\end{align*}
|
||||||
|
\end{proof}
|
||||||
|
|
||||||
|
\begin{theorem}{Pythagorean theorem}
|
||||||
|
If $u \perp v$, then $\norm{u+v}^2 = \norm{u}^2 + \norm{v}^2$ .
|
||||||
|
\end{theorem}
|
||||||
|
|
||||||
|
\begin{definition}[Orthogonal Projection]
|
||||||
|
|
||||||
|
\end{definition}
|
||||||
|
Let $y = \begin{pmatrix}
|
||||||
|
y_1 \\
|
||||||
|
. \\
|
||||||
|
y_n
|
||||||
|
\end{pmatrix} \in \RR[n]$ and $w$ a subspace of $\RR[n]$
|
||||||
|
$\mathcal{Y}$ can be written as the orthogonal projection of $y$ on $w$:
|
||||||
|
\[
|
||||||
|
\mathcal{Y} = proj^w(y) + z,
|
||||||
|
\]
|
||||||
|
where
|
||||||
|
\[
|
||||||
|
\begin{cases}
|
||||||
|
z \in w^\perp \\
|
||||||
|
proj^w(y) \in w
|
||||||
|
\end{cases}
|
||||||
|
\]
|
||||||
|
There is only one vector $\mathcal{Y}$ that ?
|
||||||
|
|
||||||
|
The scalar product between $z$ and (?) is zero.
|
||||||
|
|
||||||
|
\begin{property}
|
||||||
|
$proj^w(y)$ is the closest vector to $y$ that belongs to $w$.
|
||||||
|
\end{property}
|
||||||
|
|
||||||
|
\begin{definition}[Matrix]
|
||||||
|
A matrix is an application, that is, a function that transform a thing into another, it is a linear function.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{example}[Matrix application]
|
||||||
|
|
||||||
|
Let $A$ be a matrix:
|
||||||
|
\[
|
||||||
|
A = \begin{pmatrix}
|
||||||
|
a & b \\
|
||||||
|
c & d
|
||||||
|
\end{pmatrix}
|
||||||
|
\] and
|
||||||
|
\[
|
||||||
|
x = \begin{pmatrix}
|
||||||
|
x_1 \\
|
||||||
|
x_2
|
||||||
|
\end{pmatrix}
|
||||||
|
\]
|
||||||
|
Then,
|
||||||
|
\begin{align*}
|
||||||
|
Ax & = \begin{pmatrix}
|
||||||
|
a & b \\
|
||||||
|
c & d
|
||||||
|
\end{pmatrix}
|
||||||
|
\begin{pmatrix}
|
||||||
|
x_1 \\
|
||||||
|
x_2
|
||||||
|
\end{pmatrix} \\
|
||||||
|
& = \begin{pmatrix}
|
||||||
|
a x_1 + b_x2 \\
|
||||||
|
c x_1 + d x_2
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{align*}
|
||||||
|
|
||||||
|
Similarly,
|
||||||
|
\begin{align*}
|
||||||
|
\begin{pmatrix}
|
||||||
|
a & b & c & d \\
|
||||||
|
e & f & g & h \\
|
||||||
|
i & j & k & l
|
||||||
|
\end{pmatrix}
|
||||||
|
\begin{pmatrix}
|
||||||
|
x_1 \\
|
||||||
|
x_2 \\
|
||||||
|
x_3 \\
|
||||||
|
x_4
|
||||||
|
\end{pmatrix}
|
||||||
|
& = \begin{pmatrix}
|
||||||
|
a x_1 + b x_2 + c x_3 \ldots
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{align*}
|
||||||
|
\end{example}
|
||||||
|
|
||||||
|
The number of columns has to be the same as the dimension of the vector to which the matrix is applied.
|
||||||
|
|
||||||
|
\begin{definition}[Tranpose of a Matrix]
|
||||||
|
Let $A = \begin{pmatrix}
|
||||||
|
a & b \\
|
||||||
|
c & d
|
||||||
|
\end{pmatrix}$, then $A^\T = \begin{pmatrix}
|
||||||
|
a & c \\
|
||||||
|
b & d
|
||||||
|
\end{pmatrix}$
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{example}
|
||||||
|
\begin{align*}
|
||||||
|
Y & = X \beta + \varepsilon \\
|
||||||
|
\begin{pmatrix}
|
||||||
|
y_1 \\
|
||||||
|
y_2 \\
|
||||||
|
y_3 \\
|
||||||
|
y_4
|
||||||
|
\end{pmatrix}
|
||||||
|
& = \begin{pmatrix}
|
||||||
|
1 & x_{11} & x_{12} \\
|
||||||
|
1 & x_{21} & x_{22} \\
|
||||||
|
1 & x_{31} & x_{32} \\
|
||||||
|
1 & x_{41} & x_{42}
|
||||||
|
\end{pmatrix}
|
||||||
|
\begin{pmatrix}
|
||||||
|
\beta_0 \\
|
||||||
|
\beta_1 \\
|
||||||
|
\beta_2
|
||||||
|
\end{pmatrix} +
|
||||||
|
\begin{pmatrix}
|
||||||
|
\varepsilon_1 \\
|
||||||
|
\varepsilon_2 \\
|
||||||
|
\varepsilon_3 \\
|
||||||
|
\varepsilon_4
|
||||||
|
\end{pmatrix}
|
||||||
|
\end{align*}
|
||||||
|
\end{example}
|
0
content/chapters/part1/3.tex
Normal file
0
content/chapters/part1/3.tex
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
\chapter{Introduction}
|
||||||
|
|
||||||
|
\begin{definition}[Long Term Nonprocessor (LTNP)]
|
||||||
|
Patient who will remain a long time in good health condition, even with a large viral load (cf. HIV).
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{example}[Genotype: Qualitative or Quantitative?]
|
||||||
|
\[
|
||||||
|
\text{SNP}:
|
||||||
|
\begin{cases}
|
||||||
|
\text{AA} \\
|
||||||
|
\text{AB} \\
|
||||||
|
\text{BB}
|
||||||
|
\end{cases}
|
||||||
|
\rightarrow
|
||||||
|
\begin{pmatrix}
|
||||||
|
0 \\
|
||||||
|
1 \\
|
||||||
|
2
|
||||||
|
\end{pmatrix},
|
||||||
|
\]
|
||||||
|
thus we might consider genotype either as a qualitative variable or quantitative variable.
|
||||||
|
\end{example}
|
||||||
|
|
||||||
|
When the variable are quantitative, we use regression, whereas for qualitative variables, we use an analysis of variance.
|
BIN
figures/schemes/coordinates_systems.pdf
Normal file
BIN
figures/schemes/coordinates_systems.pdf
Normal file
Binary file not shown.
12
figures/schemes/coordinates_systems.tex
Normal file
12
figures/schemes/coordinates_systems.tex
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
\documentclass[tikz]{standalone}
|
||||||
|
\usepackage{tikz}
|
||||||
|
\usepackage{tkz-euclide}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\tkzInit[xmax=5,ymax=5,xmin=-5,ymin=-5]
|
||||||
|
\tkzGrid
|
||||||
|
\tkzAxeXY
|
||||||
|
\draw[thick, latex-latex] (-1,4) -- (4,-6) node[anchor=south west] {$a$};
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{document}
|
36
main.tex
36
main.tex
@ -8,43 +8,43 @@
|
|||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
\documentclass[
|
\documentclass[
|
||||||
a4paper,
|
a4paper,
|
||||||
fontsize=10pt,
|
fontsize=10pt,
|
||||||
fleqn,
|
fleqn,
|
||||||
oneside
|
oneside
|
||||||
]{scrbook}
|
]{scrbook}
|
||||||
|
|
||||||
\usepackage{mus}
|
\usepackage{mus}
|
||||||
|
|
||||||
\titlehead{GENIOMHE}
|
\titlehead{GENIOMHE}
|
||||||
\title{Multivariate Statistics}
|
\title{Multivariate\newline{}Statistics}
|
||||||
\author{Samuel Ortion}
|
\author{Samuel Ortion}
|
||||||
\teacher{Cyril Dalmasso}
|
\teacher{Cyril Dalmasso}
|
||||||
\cursus{GENIOMHE}
|
\cursus{GENIOMHE}
|
||||||
\university{Université Paris-Saclay, Université d'Évry val d'Essonne}
|
\university{Université Paris-Saclay, Université d'Évry val d'Essonne}
|
||||||
\semester{M1 - S1}
|
\semester{M1 - S1}
|
||||||
\date{}
|
\date{Fall 2023}
|
||||||
|
|
||||||
|
\definecolor{myblue}{HTML}{5654fa}
|
||||||
|
\colorlet{primary}{myblue}
|
||||||
|
|
||||||
\input{definitions}
|
\input{definitions}
|
||||||
|
\input{preamble}
|
||||||
|
|
||||||
\hypersetup{
|
\hypersetup{
|
||||||
pdftitle={
|
pdftitle={Course - Multivariate Statistics},
|
||||||
Course - Multivariate Statistics
|
pdfauthor={Samuel Ortion},
|
||||||
},
|
pdfsubject={},
|
||||||
pdfauthor={
|
pdfkeywords={},
|
||||||
Samuel Ortion
|
pdfcreator={LaTeX}
|
||||||
},
|
|
||||||
pdfsubject={},
|
|
||||||
pdfkeywords={},
|
|
||||||
pdfcreator={LaTeX}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
\addbibresource{references}
|
\addbibresource{references}
|
||||||
|
|
||||||
\usepackage[
|
\usepackage[
|
||||||
type={CC},
|
type={CC},
|
||||||
modifier={by-sa},
|
modifier={by-sa},
|
||||||
version={4.0},
|
version={4.0},
|
||||||
]{doclicense}
|
]{doclicense}
|
||||||
|
|
||||||
\input{preamble}
|
\input{preamble}
|
||||||
|
@ -0,0 +1,3 @@
|
|||||||
|
\usepackage{pgffor}
|
||||||
|
\usetikzlibrary{math}
|
||||||
|
\usepackage{standalone}
|
Loading…
Reference in New Issue
Block a user