diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..defcde4 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +main.pdf filter=lfs diff=lfs merge=lfs -text diff --git a/content/chapters/part1/1.tex b/content/chapters/part1/1.tex index ceb60eb..e31f254 100644 --- a/content/chapters/part1/1.tex +++ b/content/chapters/part1/1.tex @@ -1,4 +1,46 @@ +\chapter{Linear Model} +\section{Simple Linear Regression} + +\[ + Y_i = \beta_0 + \beta_1 X_i + \varepsilon_i +\] +\[ + \Y = \X \beta + \varepsilon. +\] +\[ + \begin{pmatrix} + Y_1 \\ + Y_2 \\ + \vdots \\ + Y_n + \end{pmatrix} + = + \begin{pmatrix} + 1 & X_1 \\ + 1 & X_2 \\ + \vdots & \vdots \\ + 1 & X_n + \end{pmatrix} + \begin{pmatrix} + \beta_0 \\ + \beta_1 + \end{pmatrix} + + + \begin{pmatrix} + \varepsilon_1 \\ + \varepsilon_2 \\ + \vdots + \varepsilon_n + \end{pmatrix} +\] + +\paragraph*{Assumptions} +\begin{enumerate}[label={\color{primary}{($A_\arabic*$)}}] + \item $\varepsilon_i$ are independent; + \item $\varepsilon_i$ are identically distributed; + \item $\varepsilon_i$ are i.i.d $\sim \Norm(0, \sigma^2)$ (homoscedasticity). +\end{enumerate} \section{Generalized Linear Model} @@ -8,7 +50,7 @@ with $g$ being \begin{itemize} \item Logistic regression: $g(v) = \log \left(\frac{v}{1-v}\right)$, for instance for boolean values, - \item Poission regression: $g(v) = \log(v)$, for instance for discrete variables. + \item Poisson regression: $g(v) = \log(v)$, for instance for discrete variables. \end{itemize} \subsection{Penalized Regression} @@ -18,42 +60,6 @@ In order to estimate the parameters, we can use penalties (additional terms). Lasso regression, Elastic Net, etc. -\subsection{Simple Linear Model} - -\begin{align*} - \Y &= \X \beta + \varepsilon \\ - \begin{pmatrix} - Y_1 \\ - Y_2 \\ - \vdots \\ - Y_n - \end{pmatrix} - &= \begin{pmatrix} - 1 & X_1 \\ - 1 & X_2 \\ - \vdots & \vdots \\ - 1 & X_n - \end{pmatrix} - \begin{pmatrix} - \beta_0 \\ - \beta_1 - \end{pmatrix} - + - \begin{pmatrix} - \varepsilon_1 \\ - \varepsilon_2 \\ - \vdots \\ - \varepsilon_n - \end{pmatrix} -\end{align*} - -\subsection{Assumptions} - -\begin{itemize} - \item -\end{itemize} - - \subsection{Statistical Analysis Workflow} \begin{enumerate}[label={\bfseries\color{primary}Step \arabic*.}] @@ -95,9 +101,9 @@ is noted equivalently as \subsection{General Case} -If $\X^\T\X$ is invertible, the OLS estimator is: +If $\X^T\X$ is invertible, the OLS estimator is: \begin{equation} -\hat{\beta} = (\X^\T\X)^{-1} \X^\T \Y +\hat{\beta} = (\X^T\X)^{-1} \X^T \Y \end{equation} \subsection{Ordinary Least Square Algorithm} @@ -112,12 +118,12 @@ We want to minimize the distance between $\X\beta$ and $\Y$: \Rightarrow& \forall v \in w,\, vy = v proj^w(y)\\ \Rightarrow& \forall i: \\ & \X_i \Y = \X_i X\hat{\beta} \qquad \text{where $\hat{\beta}$ is the estimator of $\beta$} \\ - \Rightarrow& \X^\T \Y = \X^\T \X \hat{\beta} \\ - \Rightarrow& {\color{gray}(\X^\T \X)^{-1}} \X^\T \Y = {\color{gray}(\X^\T \X)^{-1}} (\X^\T\X) \hat{\beta} \\ - \Rightarrow& \hat{\beta} = (X^\T\X)^{-1} \X^\T \Y + \Rightarrow& \X^T \Y = \X^T \X \hat{\beta} \\ + \Rightarrow& {\color{gray}(\X^T \X)^{-1}} \X^T \Y = {\color{gray}(\X^T \X)^{-1}} (\X^T\X) \hat{\beta} \\ + \Rightarrow& \hat{\beta} = (\X^T\X)^{-1} \X^T \Y \end{align*} -This formula comes from the orthogonal projection of $\Y$ on the subspace define by the explanatory variables $\X$ +This formula comes from the orthogonal projection of $\Y$ on the vector subspace defined by the explanatory variables $\X$ $\X \hat{\beta}$ is the closest point to $\Y$ in the subspace generated by $\X$. diff --git a/content/chapters/part1/2.tex b/content/chapters/part1/2.tex index 0c78cd7..0e5f075 100644 --- a/content/chapters/part1/2.tex +++ b/content/chapters/part1/2.tex @@ -31,12 +31,12 @@ Let $u = \begin{pmatrix} We may use $\scalar{u, v}$ or $u \cdot v$ notations. \end{definition} \paragraph{Dot product properties} -\begin{itemize} - \item $\scalar{u, v} = \scalar{v, u}$ - \item $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$ - \item $\scalar{u, v}$ - \item $\scalar{\vec{u}, \vec{v}} = \norm{\vec{u}} \times \norm{\vec{v}} \times \cos(\widehat{\vec{u}, \vec{v}})$ -\end{itemize} +\begin{description} + \item[Commutative] $\scalar{u, v} = \scalar{v, u}$ + \item[Distributive] $\scalar{(u+v), w} = \scalar{u, w} + \scalar{v, w}$ + \item $\scalar{u, v} = \norm{u} \times \norm{v} \times \cos(\widehat{u, v})$ + \item $\scalar{a, a} = \norm{a}^2$ +\end{description} \begin{definition}[Norm] Length of the vector. @@ -99,7 +99,7 @@ Let $u = \begin{pmatrix} \end{align*} \end{proof} -\begin{theorem}{Pythagorean theorem} +\begin{theorem}[Pythagorean theorem] If $u \perp v$, then $\norm{u+v}^2 = \norm{u}^2 + \norm{v}^2$ . \end{theorem} @@ -110,7 +110,7 @@ Let $y = \begin{pmatrix} y_1 \\ . \\ y_n - \end{pmatrix} \in \RR[n]$ and $w$ a subspace of $\RR[n]$ + \end{pmatrix} \in \RR[n]$ and $w$ a subspace of $\RR[n]$. $\mathcal{Y}$ can be written as the orthogonal projection of $y$ on $w$: \[ \mathcal{Y} = proj^w(y) + z, @@ -178,9 +178,26 @@ The scalar product between $z$ and (?) is zero. x_3 \\ x_4 \end{pmatrix} - & = \begin{pmatrix} - a x_1 + b x_2 + c x_3 \ldots - \end{pmatrix} + = + \begin{pmatrix} + \luadirect{ + local matrix_product = require("scripts.matrix_product") + local m1 = { + {"a", "b", "c", "d"}, + {"e", "f", "g", "h"}, + {"i", "j", "k", "l"} + } + local m2 = { + {"x_1"}, + {"x_2"}, + {"x_3"}, + {"x_4"} + } + local product_matrix = matrix_product.matrix_product_repr(m1,m2) + local matrix_dump = matrix_product.dump_matrix(product_matrix) + tex.print(matrix_dump) + } + \end{pmatrix} \end{align*} \end{example} @@ -190,7 +207,7 @@ The number of columns has to be the same as the dimension of the vector to which Let $A = \begin{pmatrix} a & b \\ c & d - \end{pmatrix}$, then $A^\T = \begin{pmatrix} + \end{pmatrix}$, then $A^T = \begin{pmatrix} a & c \\ b & d \end{pmatrix}$ diff --git a/figures/schemes/ordinary_least_squares.pdf b/figures/schemes/ordinary_least_squares.pdf new file mode 100644 index 0000000..b0d0bc1 Binary files /dev/null and b/figures/schemes/ordinary_least_squares.pdf differ diff --git a/figures/schemes/ordinary_least_squares.png b/figures/schemes/ordinary_least_squares.png new file mode 100644 index 0000000..1dfc9d6 Binary files /dev/null and b/figures/schemes/ordinary_least_squares.png differ diff --git a/figures/schemes/ordinary_least_squares.svg b/figures/schemes/ordinary_least_squares.svg new file mode 100644 index 0000000..b60649e --- /dev/null +++ b/figures/schemes/ordinary_least_squares.svg @@ -0,0 +1,988 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/figures/schemes/regression_plan_3D.pdf b/figures/schemes/regression_plan_3D.pdf new file mode 100644 index 0000000..16f34d6 Binary files /dev/null and b/figures/schemes/regression_plan_3D.pdf differ diff --git a/figures/schemes/regression_plan_3D.tex b/figures/schemes/regression_plan_3D.tex new file mode 100644 index 0000000..0ef61bf --- /dev/null +++ b/figures/schemes/regression_plan_3D.tex @@ -0,0 +1,26 @@ +\documentclass[tikz,border=3.14mm]{standalone} +\usepackage{tikz-3dplot} +\begin{document} +\tdplotsetmaincoords{105}{-30} +\usetikzlibrary{patterns} +\begin{tikzpicture}[tdplot_main_coords,font=\sffamily] + \tdplotsetrotatedcoords{00}{30}{0} + \begin{scope}[tdplot_rotated_coords] + \begin{scope}[canvas is xy plane at z=0] + \draw[fill opacity=0,pattern=north west lines,pattern color=gray] (-2,-3) rectangle (2,3); + \draw[gray,fill=lightgray,fill opacity=0.75] (-2,-3) rectangle (2,3); + \draw[very thick] (-2,0) -- (2,0); + \path (-150:2) coordinate (H) (-1.5,0) coordinate(X); + \pgflowlevelsynccm + \draw[very thick,-stealth,gray] (0,0) -- (-30:1.5); + \end{scope} + \draw[stealth-] (H) -- ++ (-1,0,0.2) node[pos=1.3]{$H$}; + \draw[stealth-] (X) -- ++ (0,1,0.2) node[pos=1.3]{$X$}; + \draw[very thick,-stealth] (0,0,0) coordinate (O) -- (0,0,3) node[right]{$p$}; + \end{scope} + \pgfmathsetmacro{\Radius}{1.5} + \draw[-stealth] (O)-- (2.5*\Radius,0,0) node[pos=1.15] {$x$}; + \draw[-stealth] (O) -- (0,3.5*\Radius,0) node[pos=1.15] {$z$}; + \draw[-stealth] (O) -- (0,0,2.5*\Radius) node[pos=1.05] {$y$}; +\end{tikzpicture} +\end{document} diff --git a/main.pdf b/main.pdf index f922198..23ad1f1 100644 Binary files a/main.pdf and b/main.pdf differ diff --git a/preamble.tex b/preamble.tex index fbe8eca..d416ea0 100644 --- a/preamble.tex +++ b/preamble.tex @@ -3,4 +3,4 @@ \usepackage{standalone} \usepackage{tikz-3dplot} \usepackage{tkz-euclide} -\usepackage{mathtools} \ No newline at end of file +\usepackage{nicematrix} \ No newline at end of file diff --git a/scripts/matrix_product.lua b/scripts/matrix_product.lua new file mode 100644 index 0000000..21ba5ed --- /dev/null +++ b/scripts/matrix_product.lua @@ -0,0 +1,57 @@ +local function matrix_product_repr(m1, m2) + if #m1[1] ~= #m2 then -- inner matrix-dimensions must agree + return nil + end + + local res = {} + + for i = 1, #m1 do + res[i] = {} + for j = 1, #m2[1] do + res[i][j] = " " + for k = 1, #m2 do + if k ~= 1 then + res[i][j] = res[i][j] .. " + " + end + res[i][j] = res[i][j] .. m1[i][k] .. " " .. m2[k][j] + end + end + end + return res +end + +local function dump_matrix(matrix) + local repr = "" + for i, row in ipairs(matrix) do + for j, cell in ipairs(row) do + repr = repr .. " " .. cell + if j ~= #row then + repr = repr .. " & " + end + end + if i ~= #matrix then + repr = repr .. [[ \\ ]] + end + repr = repr .. "\n" + end + return repr +end + +local m1 = { + {"a", "b", "c", "d"}, + {"e", "f", "g", "h"}, + {"i", "j", "k", "l"} +} +local m2 = { + {"x_1"}, + {"x_2"}, + {"x_3"}, + {"x_4"} +} + +print(dump_matrix(matrix_product_repr(m1, m2))) + +return { + matrix_product_repr = matrix_product_repr, + dump_matrix = dump_matrix +} \ No newline at end of file