mes-scripts-de-ml/fondamentaux/03-descente_gradient_stocha...

import numpy as np
import matplotlib.pyplot as plt
import time

###############################################################################
# 03-descente_gradient_stochastique.py
# @title: Apprentissage par descente de gradient stochastique
# @project: Mes scripts de ML
# @lang: fr
# @authors: Philippe Roy <philippe.roy@ac-grenoble.fr>
# @copyright: Copyright (C) 2023 Philippe Roy
# @license: GNU GPL
###############################################################################

###
# Commandes NumPy :
# - np.array : créer un tableau à partir d'une liste de listes
# - np.c_ : concatène les colonnes des tableaux
# - np.ones : créer un tableau de 1
# - np.linalg.inv : inversion de matrice
# - .T : transposé de matrice
# - .dot : produit de matrice
###

# Init du temps
t_debut = time.time()

# Observations d'apprentisage
m = 1000 # Nombre d'observations
bg = 1 # Bruit gaussien
x = 2*np.random.rand(m, 1) # Liste des observations x1
y = 4 + 3*x + bg * np.random.rand(m, 1) # Liste des cibles y
X = np.c_[np.ones((m, 1)), x] # Matrice des observations, avec x0=1
plt.plot(x, y, 'b.')

# Nouvelles observations
x_new=np.array([[0], [2]])
X_new = np.c_[np.ones((2, 1)), x_new] # Matrice des observations, avec x0=1

# Phase d'apprentissage par descente de gradient stochastique
# - theta : vecteur paramètres du modèle
# - gradient : gradient du coût en fonction de theta
# - eta : taux d'appentissage ici dégressif par échéancier d'apprentissage (ech_app)

# n_epoq = 50  # Nombre d'époques
n_epoq = 2  # Nombre d'époques
t0, t1 = 5, 50 # Hyperparamètres de l'échéancier d'apprentissage

def ech_app (t):
    return t0/ (t + t1)

theta= np.random.randn(2,1) # Initialisation aléatoire

for epoq in range (n_epoq):
    for i in range(m):

        # Calcul du pas
        i = np.random.randint(m) # Index aléatoire
        xi = X[i:i+1]
        yi = y[i:i+1]
        gradients = 2/1 * xi.T.dot(xi.dot(theta) - yi)
        eta = ech_app (epoq * m  + i)
        theta = theta - eta *  gradients

        # Prédiction du pas
        y_predict=X_new.dot(theta)
        plt.plot(x_new, y_predict, 'c-', linewidth=0.5)

# Phase d'inférence (dernier pas)
plt.plot(x_new, y_predict, 'r-')
plt.show()

# Performance
print ("Theta th : theta0 : "+str(4)+"     ; theta1 : "+str(3))
print ("Theta    : theta0 : "+str(round(float(theta[0]),3))+" ; theta1 : "+str(round(float(theta[1]),3)))
print ("Erreurs  : theta0 : "+str(round(float(theta[0]-4),3))+" ; theta1 : "+str(round(float(theta[1]-3),3)))
print ("Temps : "+str(time.time()-t_debut))