mirror of
https://forge.s1gm4.eu/GENIOMHE/machine-learning.git
synced 2023-09-27 18:56:25 +02:00
Merge branch 'dev'
This commit is contained in:
commit
4bf97cb0fd
1
.gitattributes
vendored
Normal file
1
.gitattributes
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
main.pdf filter=lfs diff=lfs merge=lfs -text
|
6
Makefile
Normal file
6
Makefile
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
options=-shell-escape -file-line-error
|
||||||
|
|
||||||
|
all: main.pdf
|
||||||
|
|
||||||
|
%.pdf: %.tex
|
||||||
|
lualatex $(options) $<
|
168
content/chapters/1.tex
Normal file
168
content/chapters/1.tex
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
\chapter{Unsupervised Learning}
|
||||||
|
|
||||||
|
\begin{definition}[Precision Medicine]
|
||||||
|
Design of treatment for a given patient, based on genomic data.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Hierarchical clustering]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
Gene expression time series: look for genes with similar expression footprint.
|
||||||
|
|
||||||
|
\paragraph{Representation of data}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item Tables;
|
||||||
|
\item Trees / Graphs;
|
||||||
|
\item Time series...
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\includestandalone{figures/plots/genes_expression_timeseries}
|
||||||
|
\caption{Example of gene expression time series}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
\section{Distances and Similarities}
|
||||||
|
|
||||||
|
\begin{property}[Distance]
|
||||||
|
\begin{description}
|
||||||
|
\item[non-negativity] $d(i, j) \geq 0$
|
||||||
|
\item[isolation] $d(i, i) = 0$
|
||||||
|
\item[symmetry] $d(i, j) = d(j, i)$
|
||||||
|
\item[triangular inequality] $d(i, j) \leq d(i, h) + d(h, j)$
|
||||||
|
\end{description}
|
||||||
|
\end{property}
|
||||||
|
|
||||||
|
\begin{definition}[Dissimilarity]
|
||||||
|
Distance without triangular inequality.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Similarity]
|
||||||
|
Function $s$ from $X \times X$ to $\RR_+$ such that:
|
||||||
|
\begin{enumerate}
|
||||||
|
\item $s$ is symmetric: $(x, y) \in X \times X; s(x, y) = s(y, x)$
|
||||||
|
\item $(x, y) \in X \times X; s(x, x) = s(y, y) > s(x, y)$.
|
||||||
|
\end{enumerate}
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{exercise}
|
||||||
|
|
||||||
|
Let $d(x, y)$ be the distance, $d(x, y) \in [0, +\infty[$.
|
||||||
|
|
||||||
|
What should be the similarity measure $S(x, y) = f(d(x, y))$ that satisfies the following property:
|
||||||
|
\[
|
||||||
|
(x, y) \in X \times X \: | \: S(x, y) > S(x, y)
|
||||||
|
\]
|
||||||
|
having $S(x, y) \leq M$, $S(x, y) \in ]0, M]$.
|
||||||
|
\end{exercise}
|
||||||
|
$d(x, y) \geq 0 \: \forall (x, y)$
|
||||||
|
\begin{equation}
|
||||||
|
S(x, y) = \frac{M}{d(x, y) + 1}
|
||||||
|
\label{eq:similarity-first}
|
||||||
|
\end{equation}
|
||||||
|
In \cref{eq:similarity-first}, $S(x, y)$ ranges from 0 to M.
|
||||||
|
\begin{eqnarray}
|
||||||
|
\lim_{n \to \infty} \frac{M}{n + 1} = 0 && \lim_{n \to 0} \frac{M}{n + 1} = M
|
||||||
|
\end{eqnarray}
|
||||||
|
|
||||||
|
|
||||||
|
\section{Data Representation}
|
||||||
|
|
||||||
|
\paragraph{Data matrix}
|
||||||
|
|
||||||
|
|
||||||
|
\paragraph{Distance matrix}
|
||||||
|
|
||||||
|
\[
|
||||||
|
\begin{bmatrix}
|
||||||
|
0 \\
|
||||||
|
d(2, 1) & 0 \\
|
||||||
|
d(3, 1) & d(3, 2) & 0 \\
|
||||||
|
\vdots & \vdots & \ddots \\
|
||||||
|
d(n, 1) & d(n,2) & \dots & \dots & 0
|
||||||
|
\end{bmatrix}
|
||||||
|
\]
|
||||||
|
|
||||||
|
|
||||||
|
\begin{table}
|
||||||
|
\centering
|
||||||
|
\begin{tabular}{c|cc}
|
||||||
|
&$s_{1}$ & $s_{2}$ \\
|
||||||
|
\hline
|
||||||
|
$p_{1}$ & 0 & 1 \\
|
||||||
|
$p_{2}$ & 1 & 0 \\
|
||||||
|
$p_{3}$ & 3 & 2 \\
|
||||||
|
\end{tabular}
|
||||||
|
\caption{Example data matrix: 2 symptoms for 3 patients.}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\begin{definition}[Minkowski distance]
|
||||||
|
\[
|
||||||
|
L_p (x, y) = \left(\abs{x_1 - y_1}^p + \abs{x_2 - y_2}^p + \ldots + \abs{x_d - y_d}^p\right)^{\sfrac{1}{p}} = \left(\sum_{i=1}^d \left(x_i - y_i\right)^p\right)^{\sfrac{1}{p}}
|
||||||
|
\]
|
||||||
|
where $p$ is a positive integer.
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Manhattan distance]
|
||||||
|
\[
|
||||||
|
L_1(x, y) = \sum_{i=1}^d \abs{x_i - y_i}
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
\begin{definition}[Euclidian distance]
|
||||||
|
Let $A$ and $B$ be two points, with $(x_{A}, y_{A})$ and $(x_{B}, y_{B})$ their respective coordinates,
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
If $p=2$, $L_2$ is the Euclidian distance:
|
||||||
|
\begin{definition}[Euclidian distance]
|
||||||
|
\[
|
||||||
|
d(x, y) = \sqrt{\abs{x_1 - y_1}^2 + \abs{x_2 - y_2} + \ldots + \abs{x_d - y_d}^2}
|
||||||
|
\]
|
||||||
|
\end{definition}
|
||||||
|
|
||||||
|
We can add weights
|
||||||
|
|
||||||
|
\subsection{K-means}
|
||||||
|
|
||||||
|
The cost function is minimized:
|
||||||
|
\[
|
||||||
|
Cost(C) \sum_{i=1}^{k}...
|
||||||
|
\]
|
||||||
|
|
||||||
|
\begin{algorithm}[H]
|
||||||
|
Choose the number of clusters $k$.
|
||||||
|
|
||||||
|
Choose randomly $k$ means.
|
||||||
|
|
||||||
|
For each point, compute the distance between the point and each means.
|
||||||
|
We allocate the point to the cluster represented by the clostest center.
|
||||||
|
|
||||||
|
We set each means to the center of the cluster, and reiterate.
|
||||||
|
\caption{$K$-means algorithm}
|
||||||
|
\end{algorithm}
|
||||||
|
|
||||||
|
|
||||||
|
\begin{exercise}
|
||||||
|
We have six genes:
|
||||||
|
\begin{table}[H]
|
||||||
|
\centering
|
||||||
|
\begin{tabular}{ccccccc}
|
||||||
|
\toprule
|
||||||
|
& $g_{1}$ & $g_{2}$ & $g_{3}$ & $g_{4}$ & $g_{5}$ & $g_{6}$ \\
|
||||||
|
\midrule
|
||||||
|
$\times 10^{-2}$ & 10 & 12 & 9 & 15 & 17 & 18 \\
|
||||||
|
\bottomrule
|
||||||
|
\end{tabular}
|
||||||
|
\caption{Sample values for six gene expressions.}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
With $k=2$ and $m_{1} = 10 \cdot 10^{-2}$ and $m_{2} = 9 \cdot 10^{-2}$ the two initial randomly chosen means, run the $k$-means algorithm.
|
||||||
|
\end{exercise}
|
||||||
|
|
||||||
|
\begin{figure}
|
||||||
|
\centering
|
||||||
|
\includegraphics[scale=1]{figures/plots/kmeans.pdf}
|
||||||
|
\caption{$k$-means states at each of the 3 steps}
|
||||||
|
\end{figure}
|
@ -11,9 +11,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
\includechapters{}{2}
|
||||||
\includechapters{part1}{2}
|
|
||||||
|
|
||||||
|
|
||||||
% \includechapters{part2}{2}
|
% \includechapters{part2}{2}
|
||||||
|
|
||||||
|
19
content/genes_expression_timeseries.tex
Normal file
19
content/genes_expression_timeseries.tex
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
\documentclass[tikz,a4paper]{standalone}
|
||||||
|
|
||||||
|
\usepackage{tikz}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\usetikzlibrary{datavisualization}
|
||||||
|
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\datavisualization[visualize as smooth line]
|
||||||
|
data {
|
||||||
|
x, y
|
||||||
|
2, 1,
|
||||||
|
3, 2,
|
||||||
|
4, 1.5
|
||||||
|
};
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\end{document}
|
19
figures/euclidian_distance.tex
Normal file
19
figures/euclidian_distance.tex
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
\documentclass[tikz]{standalone}
|
||||||
|
|
||||||
|
\usepackage{tikz}
|
||||||
|
\usepackage{tkz-euclide}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\begin{tikzpicture}[scale=1]
|
||||||
|
\tkzInit[xmax=5,ymax=5]
|
||||||
|
\tkzDrawX[>=latex]
|
||||||
|
\tkzDraw[>=latex]
|
||||||
|
\tkzDefPoints()
|
||||||
|
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\end{document}
|
2
figures/plots/.gitattributes
vendored
Normal file
2
figures/plots/.gitattributes
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
genes_expression_timeseries.pdf filter=lfs diff=lfs merge=lfs -text
|
||||||
|
kmeans.pdf filter=lfs diff=lfs merge=lfs -text
|
BIN
figures/plots/genes_expression_timeseries.pdf
(Stored with Git LFS)
Normal file
BIN
figures/plots/genes_expression_timeseries.pdf
(Stored with Git LFS)
Normal file
Binary file not shown.
45
figures/plots/genes_expression_timeseries.tex
Normal file
45
figures/plots/genes_expression_timeseries.tex
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
\documentclass[tikz]{standalone}
|
||||||
|
\usepackage{tikz}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
\usetikzlibrary{datavisualization}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
|
||||||
|
\datavisualization data group {genes} = {
|
||||||
|
data[set=gene1] {
|
||||||
|
x, y
|
||||||
|
0, 1,
|
||||||
|
1, 2,
|
||||||
|
2, 1.5
|
||||||
|
}
|
||||||
|
data[set=gene2] {
|
||||||
|
x, y
|
||||||
|
0, 1.5,
|
||||||
|
1, 2.25,
|
||||||
|
2, 1.75
|
||||||
|
}
|
||||||
|
data[set=gene3] {
|
||||||
|
x, y
|
||||||
|
0, 0.25,
|
||||||
|
1, 0.26,
|
||||||
|
2, 0.7
|
||||||
|
}
|
||||||
|
data[set=gene4] {
|
||||||
|
x, y
|
||||||
|
0, 0.5,
|
||||||
|
1, 0.25,
|
||||||
|
2, 1
|
||||||
|
}
|
||||||
|
};
|
||||||
|
\datavisualization [
|
||||||
|
school book axes, all axes={unit length=7.5mm},
|
||||||
|
visualize as smooth line/.list={gene1, gene2, gene3, gene4},
|
||||||
|
style sheet=strong colors,
|
||||||
|
x axis={label=$t$},
|
||||||
|
y axis={label={expression}}]
|
||||||
|
data group {genes};
|
||||||
|
|
||||||
|
\end{tikzpicture}
|
||||||
|
|
||||||
|
\end{document}
|
BIN
figures/plots/kmeans.pdf
(Stored with Git LFS)
Normal file
BIN
figures/plots/kmeans.pdf
(Stored with Git LFS)
Normal file
Binary file not shown.
54
figures/plots/kmeans.tex
Normal file
54
figures/plots/kmeans.tex
Normal file
@ -0,0 +1,54 @@
|
|||||||
|
\documentclass[margin=0.5cm]{standalone}
|
||||||
|
\usepackage{tikz}
|
||||||
|
\usepackage{pyluatex}
|
||||||
|
\usepackage{pgf}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
\begin{python}
|
||||||
|
# %%
|
||||||
|
import io
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sklearn.cluster import KMeans
|
||||||
|
|
||||||
|
# %%
|
||||||
|
|
||||||
|
|
||||||
|
data = """g1,10
|
||||||
|
g2,12
|
||||||
|
g3,9
|
||||||
|
g4,15
|
||||||
|
g5,17
|
||||||
|
g6,18"""
|
||||||
|
|
||||||
|
points =[int(row.split(",")[1]) for row in data.split("\n")]
|
||||||
|
X = np.array([[point] for point in points])
|
||||||
|
initial_means = [[10], [9]]
|
||||||
|
points
|
||||||
|
|
||||||
|
# %%
|
||||||
|
kmeans_values = []
|
||||||
|
for i in range(1,4):
|
||||||
|
kmeans = KMeans(n_clusters=2, random_state=42, max_iter=i, init=initial_means, n_init=1)
|
||||||
|
kmeans.fit(X)
|
||||||
|
kmeans_values.append(kmeans.cluster_centers_)
|
||||||
|
|
||||||
|
|
||||||
|
# %%
|
||||||
|
fig, axs = plt.subplots(len(kmeans_values), 1, sharex=True)
|
||||||
|
for i, centroids in enumerate(kmeans_values):
|
||||||
|
ax = axs[i]
|
||||||
|
ax.scatter(centroids, [i]*len(centroids), marker='x')
|
||||||
|
ax.scatter(points, [i]*len(points), s=2, color="black")
|
||||||
|
ax.axis('off')
|
||||||
|
|
||||||
|
|
||||||
|
with io.StringIO() as file:
|
||||||
|
fig.savefig(file, format="pgf", bbox_inches="tight", pad_inches=0.1)
|
||||||
|
print(file.getvalue())
|
||||||
|
\end{python}
|
||||||
|
\begin{tikzpicture}
|
||||||
|
|
||||||
|
\end{tikzpicture}
|
||||||
|
\end{document}
|
69
main.tex
69
main.tex
@ -1,46 +1,69 @@
|
|||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
% Course of None
|
% Course on "Data-mining and Machine Learning" - GENIOMHE - M1-S1
|
||||||
%
|
%
|
||||||
% Author: Samuel ORTION <samuel@ortion.fr>
|
% Author: Samuel Ortion <samuel@ortion.fr>
|
||||||
% Version: 0.0.1
|
% Version: 0.1.0
|
||||||
% Date: 2023
|
% Date: 2023
|
||||||
% Licence: CC-By-SA 4.0+ International
|
% Licence: CC-By-SA 4.0+ International
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
|
|
||||||
\documentclass[
|
\documentclass[twoside=false,fontsize=10pt,fleqn]{scrbook}
|
||||||
a4paper,
|
\usepackage{mus}
|
||||||
10pt,
|
\usepackage{standalone}
|
||||||
fleqn,
|
\titlehead{GENIOMHE}
|
||||||
oneside
|
\title{Data-mining and\newline{}Machine Learning}
|
||||||
]{talpa}
|
\subtitle{}
|
||||||
|
\author{Samuel Ortion}
|
||||||
|
\date{Fall 2023}
|
||||||
|
\teacher{Farida Zerhaoui}
|
||||||
|
\cursus{GENIOMHE}
|
||||||
|
\university{Université d'Évry val d'Essonne -- Université Paris-Saclay}
|
||||||
|
\semester{M1 - S1}
|
||||||
|
|
||||||
\input{colors.tex}
|
\input{definitions}
|
||||||
\input{meta.tex}
|
\input{preamble}
|
||||||
\input{definitions.tex}
|
|
||||||
|
|
||||||
\hypersetup{
|
\hypersetup{
|
||||||
pdftitle={
|
pdftitle={Course - Data-mining and Machine Learning},
|
||||||
Course - None
|
pdfauthor={Samuel Ortion},
|
||||||
},
|
|
||||||
pdfauthor={
|
|
||||||
Samuel Ortion
|
|
||||||
},
|
|
||||||
pdfsubject={},
|
pdfsubject={},
|
||||||
pdfkeywords={},
|
pdfkeywords={GENIOMHE, Master, bioinformatics, machine learning, statistics, data},
|
||||||
pdfcreator={LaTeX}
|
pdfcreator={LaTeX}
|
||||||
}
|
}
|
||||||
|
|
||||||
% \addbibressource{bibliography.bib}
|
\usepackage{ccicons}
|
||||||
|
\usepackage[
|
||||||
|
type={CC},
|
||||||
|
modifier={by-sa},
|
||||||
|
version={4.0},
|
||||||
|
]{doclicense}
|
||||||
|
|
||||||
|
\addbibresource{references.bib}
|
||||||
\makeindex
|
\makeindex
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
|
|
||||||
|
\setkomafont{fullpagetitle}{\fontsize{1.5cm}{3em}\fontseries{b}\selectfont}
|
||||||
|
\maketitlefullpage
|
||||||
|
|
||||||
|
{
|
||||||
|
\hypersetup{
|
||||||
|
linkcolor=black
|
||||||
|
}
|
||||||
\tableofcontents
|
\tableofcontents
|
||||||
|
}
|
||||||
|
|
||||||
% \input{content/introduction.tex}
|
\doclicenseThis%
|
||||||
|
|
||||||
\input{content/chapters/include.tex}
|
% \input{content/introduction}
|
||||||
|
|
||||||
% \input{content/conclusion.tex}
|
\input{content/chapters/include}
|
||||||
|
|
||||||
|
% \input{content/conclusion}
|
||||||
|
|
||||||
|
\nocite{*}
|
||||||
|
|
||||||
|
\printbibliography%
|
||||||
|
% \printglossary%
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
109
notebooks/kmeans1d.ipynb
Normal file
109
notebooks/kmeans1d.ipynb
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import io\n",
|
||||||
|
"\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"from sklearn.cluster import KMeans"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 64,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[10, 12, 9, 15, 17, 18]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 64,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"data = \"\"\"g1,10\n",
|
||||||
|
"g2,12\n",
|
||||||
|
"g3,9\n",
|
||||||
|
"g4,15\n",
|
||||||
|
"g5,17\n",
|
||||||
|
"g6,18\"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
"points =[int(row.split(\",\")[1]) for row in data.split(\"\\n\")]\n",
|
||||||
|
"X = np.array([[point] for point in points])\n",
|
||||||
|
"initial_means = [[10], [9]]\n",
|
||||||
|
"points"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 65,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"kmeans_values = []\n",
|
||||||
|
"for i in range(1,4): \n",
|
||||||
|
" kmeans = KMeans(n_clusters=2, random_state=42, max_iter=i, init=initial_means, n_init=1)\n",
|
||||||
|
" kmeans.fit(X)\n",
|
||||||
|
" kmeans_values.append(kmeans.cluster_centers_)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 68,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgMAAAGFCAYAAABg2vAPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAMpklEQVR4nO3dMU9Va77H8f+54cgRjBhsRpPDJDsZKS0GM+z2vgUL7Ow8mel4JSbTGC2spZj7Fm41kRAtKGUSEr0JTCMBIm6QnXALMxxRi3N0b9Zi/T6fbkPifh6f9Tz5Zu219YeTk5OTAgBi/VfTAwAAmiUGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgXCMxsH94XNt7g6/+bntvUPuHx+c8IqDtnBswPuceA/uHx3X/6VotPV6trd2zG3trd1BLj1fr/tM1Gxs45dyA8Tr3GDg4Gtbbdx/qzc77uvfk1429tTuoe09W683O+3r77kMdHA3Pe2hASzk3YLzOPQZuzFyuZw8Wa252qt7svK+lR/+s//nftVp69M96s/O+5man6tmDxboxc/m8h/ZdhsNhbWxs1HDoMGoD69Etn58b956s1svXO6chcF7nhuuqXbqyHm2YRyPPDNy89nFj/zwzWWt//1vd/e+/1Nrf/1Y/z0zWsweLdfPaxQuBfr9f8/Pz1e/3L/yFedFZj276z7nxnyC4++j5mRAY97nhumqXrqxHW+bR2LcJbl67XMuLV+vDv/9VVVUf/v2vWl68euFCoKpqc3OzXrx4UVVVL168qM3NzYZHlM16dNfNa5fr4dLtMz97uHT7XM4N11W7dGU92jKPxmJga3dQD1f369If/lRVVZf+8Kd6uLr/xcNBF0Gv16uFhYWqqrpz5071er2GR5TNenTX1u6gllfWz/xseWX9XM4N11W7dGU92jKPH05OTk7O+00/fejn55nJWl68Wg9X9+v/9o7O7ZbfqA2Hw9rc3Kxer1cTExNNDyee9eieT8+Nudmperh0u5ZX1s/9owLXVXt0ZT3aMI9zj4HtvY9fA/p8A3++0Vd+uXgPEQLj4dyA8Tr3jwmmJyfq+pVLX5T8pw8HXb9yqaYnL27lAaPl3IDxauRjgv3D4zo4Gn614Lf3BjU9OVFXf/rxvIcFtJhzA8ankRgAANrDf1QEAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhBMDABBODABAODEAAOHEAACEEwMAEE4MAEA4MQAA4cQAAIQTAwAQTgwAQDgxAADhxAAAhOtsDOwfHtf23uCrv9veG9T+4fE5jwjg65xXNK2TMbB/eFz3n67V0uPV2to9u8G2dge19Hi17j9ds8GAxjmvaINOxsDB0bDevvtQb3be170nv26wrd1B3XuyWm923tfbdx/q4GjY8EiBdM4r2qDxGBgOh7WxsVHD4egu9Bszl+vZg8Wam5063WAvX++cbqy52al69mCxbsxcHtl7jmMefDvrwTh05bzqiq7s8zbMo9EYGA6H1e/3a35+vvr9/kj/Im5eO7vB7j56fmZj3bw22hAY1zz4/awH49CV86orurLP2zKPH05OTk4aeeeq2tjYqPn5+dPXr169qlu3bo30PV6+3qm7j56fvv7HX/v15z/OjvQ9zmMe/HbWg3HoynnVFV3Z522ZR6N3Bnq9Xi0sLFRV1Z07d6rX6430z9/aHdTyyvqZny2vrH/xkM73Gvc8+H2sB+PQlfOqK7qyz9syj0bvDFR9vEWyublZvV6vJiYmRvbnfvrwzdzsVD1cul3LK+tj/ahgHPPg21gPxqEr51VXdGWft2EejcfAOGzvffw6zucb6fMNt/KLh3KAZjmvaIPGv00wDtOTE3X9yqUvivrTh3SuX7lU05MXtySBbnBe0QadvDNQ9fEf8jg4Gn61pLf3BjU9OVFXf/qxgZEBnOW8ommdjQEA4Lfp5McEAMBvJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGKgQfuHx7W9N/jq77b3BrV/eHzOIwIYL+deO4mBhuwfHtf9p2u19Hi1tnbPboyt3UEtPV6t+0/XbAygM5x77SUGGnJwNKy37z7Um533de/Jrxtja3dQ956s1pud9/X23Yc6OBo2PFKA0XDutVfjMTAcDmtjY6OGw4u9+L93HjdmLtezB4s1Nzt1ujFevt453RBzs1P17MFi3Zi5POaRd1NXrivaxXX1fUZ97nVlPdowj0ZjYDgcVr/fr/n5+er3+xd2Qb91Hjevnd0Ydx89P7Mhbl4TAt+iK9cV7eK6Go1RnXtdWY+2zOOHk5OTk0beuao2NjZqfn7+9PWrV6/q1q1bTQ3nm33vPF6+3qm7j56fvv7HX/v15z/OjnSMSbpyXdEurqvR+t5zryvr0ZZ5NHpnoNfr1cLCQlVV3blzp3q9XpPD+WbfM4+t3UEtr6yf+dnyyvoXD9fw23XluqJdXFejM4pzryvr0ZZ5NHpnoOrjLZLNzc3q9Xo1MTHR5FC+y7fM49OHZuZmp+rh0u1aXln3UcEIdOW6ol1cV99vlOdeV9ajDfNoPAZSbe99/BrN5xvg842y8ouHCIFucO61V+PfJkg1PTlR169c+qKEP3245vqVSzU9eXFrF+BTzr32cmegQfuHx3VwNPxqAW/vDWp6cqKu/vRjAyMDGA/nXjuJAQAI52MCAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAIJwYAIJwYAIBwYgAAwokBAAgnBgAgnBgAgHBiAADCiQEACCcGACCcGACAcGIAAMKJAQAI9/8lWKkDA/pOhQAAAABJRU5ErkJggg==",
|
||||||
|
"text/plain": [
|
||||||
|
"<Figure size 640x480 with 3 Axes>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "display_data"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"fig, axs = plt.subplots(len(kmeans_values), 1, sharex=True)\n",
|
||||||
|
"for i, centroids in enumerate(kmeans_values):\n",
|
||||||
|
" ax = axs[i]\n",
|
||||||
|
" ax.scatter(centroids, [i]*len(centroids), marker='x')\n",
|
||||||
|
" ax.scatter(points, [i]*len(points), s=2, color=\"black\")\n",
|
||||||
|
" ax.axis('off')"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "geniomhe-ml",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.11.5"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
@ -0,0 +1,2 @@
|
|||||||
|
\usepackage{mus-learn}
|
||||||
|
\usepackage{xfrac}
|
25
references.bib
Normal file
25
references.bib
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
|
||||||
|
@book{geron_hands-machine_2019,
|
||||||
|
edition = {2},
|
||||||
|
title = {Hands-On Machine Learning with Scikit-Learn, Keras, and {TensorFlow}},
|
||||||
|
abstract = {Through a recent series of breakthroughs, deep learning has boosted the entire field of machine learning. Now, even programmers who know close to nothing about this technology can use simple, … - Selection from Hands-On Machine Learning with Scikit-Learn, Keras, and {TensorFlow}, 2nd Edition [Book]},
|
||||||
|
publisher = {O'{REILLY}},
|
||||||
|
author = {Géron, Aurélien},
|
||||||
|
date = {2019},
|
||||||
|
langid = {english},
|
||||||
|
note = {{ISBN}: 9781098125974}
|
||||||
|
}
|
||||||
|
|
||||||
|
@collection{witten_data_2011,
|
||||||
|
location = {Boston},
|
||||||
|
edition = {4},
|
||||||
|
title = {Data Mining - Practical Machine Learning Tools an Techniques},
|
||||||
|
isbn = {978-0-12-374856-0},
|
||||||
|
series = {The Morgan Kaufmann Series in Data Management Systems},
|
||||||
|
publisher = {Morgan Kaufmann},
|
||||||
|
editor = {Witten, Ian H. and Frank, Eibe and Hall, Mark A.},
|
||||||
|
urldate = {2023-06-16},
|
||||||
|
date = {2011-01-01},
|
||||||
|
langid = {english},
|
||||||
|
doi = {10.1016/B978-0-12-374856-0.00018-3}
|
||||||
|
}
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
scikit-learn
|
||||||
|
numpy
|
Loading…
Reference in New Issue
Block a user