Fondamentaux : descente de gradient stochastique

2024-01-27 11:30:36 +01:00 · 2023-06-18 10:16:13 +02:00 · 2023-06-18 10:16:13 +02:00 · c835260441
commit c835260441
parent 82c244c029
3 changed files with 76 additions and 25 deletions
--- a/fondamentaux/01-regression_lineaire.py
+++ b/fondamentaux/01-regression_lineaire.py
@ -1,5 +1,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
+import time

 ###############################################################################
 # 01-regression_lineaire.py
@ -21,24 +22,34 @@ import matplotlib.pyplot as plt
 # - .dot : produit de matrice
 ###

+# Init du temps
+t_debut = time.time()
+
 # Observations d'apprentisage
-x = 2*np.random.rand(100, 1) # Liste des observations x1
-y = 4 + 3*x + np.random.rand(100, 1) # Liste des cibles y
-X = np.c_[np.ones((100, 1)), x] # Matrice des observations, avec x0=1
+m = 1000 # Nombre d'observations
+bg = 1 # Bruit gaussien
+x = 2*np.random.rand(m, 1) # Liste des observations x1
+y = 4 + 3*x + bg * np.random.rand(m, 1) # Liste des cibles y
+X = np.c_[np.ones((m, 1)), x] # Matrice des observations, avec x0=1
+plt.plot(x, y, 'b.')

 # Phase d'apprentissage par régression linéaire avec l'équation normale
+# - theta : vecteur paramètres du modèle
+# - theta_best : vecteur paramètres pour le coût mini
 theta_best= np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
+theta = theta_best

 # Nouvelles observations
 x_new=np.array([[0], [2]])
 X_new = np.c_[np.ones((2, 1)), x_new] # Matrice des observations, avec x0=1

 # Phase d'inférence
-y_predict=X_new.dot(theta_best)
-
-# Plot
-plt.plot(x, y, 'b.')
+y_predict=X_new.dot(theta_best) # Liste des prédictions y_predict
 plt.plot(x_new, y_predict, 'r-')
 plt.show()

-
+# Performance
+print ("Theta th : theta0 : "+str(4)+"     ; theta1 : "+str(3))
+print ("Theta    : theta0 : "+str(round(float(theta[0]),3))+" ; theta1 : "+str(round(float(theta[1]),3)))
+print ("Erreurs  : theta0 : "+str(round(float(theta[0]-4),3))+" ; theta1 : "+str(round(float(theta[1]-3),3)))
+print ("Temps : "+str(time.time()-t_debut))
--- a/fondamentaux/02-descente_gradient.py
+++ b/fondamentaux/02-descente_gradient.py
@ -1,5 +1,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
+import time

 ###############################################################################
 # 02-descente_gradient.py
@ -21,10 +22,14 @@ import matplotlib.pyplot as plt
 # - .dot : produit de matrice
 ###

+# Init du temps
+t_debut = time.time()
+
 # Observations d'apprentisage
-m = 100 # Nombre d'observations
+m = 1000 # Nombre d'observations
+bg = 1 # Bruit gaussien
 x = 2*np.random.rand(m, 1) # Liste des observations x1
-y = 4 + 3*x + np.random.rand(m, 1) # Liste des cibles y
+y = 4 + 3*x + bg * np.random.rand(m, 1) # Liste des cibles y
 X = np.c_[np.ones((m, 1)), x] # Matrice des observations, avec x0=1
 plt.plot(x, y, 'b.')

@ -33,8 +38,12 @@ x_new=np.array([[0], [2]])
 X_new = np.c_[np.ones((2, 1)), x_new] # Matrice des observations, avec x0=1

 # Phase d'apprentissage par descente de gradient
-eta = 0.001 # Taux d'appentissage (valeur par défaut : 0.1)
-n = 10000 # Nombre d'itérations (valeur par défaut : 1000)
+# - theta : vecteur paramètres du modèle
+# - gradient : gradient du coût en fonction de theta
+# - eta : taux d'appentissage
+
+eta = 0.01 # Taux d'appentissage (valeur par défaut : 0.1)
+n = 5000 # Nombre d'itérations (valeur par défaut : 1000)
 theta= np.random.randn(2,1) # Initialisation aléatoire

 for i in range(n):
@ -45,8 +54,14 @@ for i in range(n):

    # Prédiction du pas
    y_predict=X_new.dot(theta)
-    plt.plot(x_new, y_predict, 'y-')
+    plt.plot(x_new, y_predict, 'c-', linewidth=0.5)

 # Phase d'inférence (dernier pas)
 plt.plot(x_new, y_predict, 'r-')
 plt.show()
+
+# Performance
+print ("Theta th : theta0 : "+str(4)+"     ; theta1 : "+str(3))
+print ("Theta    : theta0 : "+str(round(float(theta[0]),3))+" ; theta1 : "+str(round(float(theta[1]),3)))
+print ("Erreurs  : theta0 : "+str(round(float(theta[0]-4),3))+" ; theta1 : "+str(round(float(theta[1]-3),3)))
+print ("Temps : "+str(time.time()-t_debut))
--- a/fondamentaux/03-descente_gradient_stochastique.py
+++ b/fondamentaux/03-descente_gradient_stochastique.py
@ -1,5 +1,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
+import time

 ###############################################################################
 # 03-descente_gradient_stochastique.py
@ -21,10 +22,14 @@ import matplotlib.pyplot as plt
 # - .dot : produit de matrice
 ###

+# Init du temps
+t_debut = time.time()
+
 # Observations d'apprentisage
-m = 100 # Nombre d'observations
+m = 1000 # Nombre d'observations
+bg = 1 # Bruit gaussien
 x = 2*np.random.rand(m, 1) # Liste des observations x1
-y = 4 + 3*x + np.random.rand(m, 1) # Liste des cibles y
+y = 4 + 3*x + bg * np.random.rand(m, 1) # Liste des cibles y
 X = np.c_[np.ones((m, 1)), x] # Matrice des observations, avec x0=1
 plt.plot(x, y, 'b.')

@ -32,21 +37,41 @@ plt.plot(x, y, 'b.')
 x_new=np.array([[0], [2]])
 X_new = np.c_[np.ones((2, 1)), x_new] # Matrice des observations, avec x0=1

-# Phase d'apprentissage par descente de gradient
-eta = 0.001 # Taux d'appentissage
-n = 10000 # Nombre d'itération
+# Phase d'apprentissage par descente de gradient stochastique
+# - theta : vecteur paramètres du modèle
+# - gradient : gradient du coût en fonction de theta
+# - eta : taux d'appentissage ici dégressif par échéancier d'apprentissage (ech_app)
+
+# n_epoq = 50  # Nombre d'époques
+n_epoq = 2  # Nombre d'époques
+t0, t1 = 5, 50 # Hyperparamètres de l'échéancier d'apprentissage
+
+def ech_app (t):
+    return t0/ (t + t1)
+    
 theta= np.random.randn(2,1) # Initialisation aléatoire

-for i in range(n):
+for epoq in range (n_epoq):
+    for i in range(m):

-    # Calcul du pas
-    gradients = 2/m * X.T.dot(X.dot(theta) - y)
-    theta = theta - eta *  gradients
+        # Calcul du pas
+        i = np.random.randint(m) # Index aléatoire
+        xi = X[i:i+1]
+        yi = y[i:i+1]
+        gradients = 2/1 * xi.T.dot(xi.dot(theta) - yi)
+        eta = ech_app (epoq * m  + i)
+        theta = theta - eta *  gradients

-    # Prédiction du pas
-    y_predict=X_new.dot(theta)
-    plt.plot(x_new, y_predict, 'y-')
+        # Prédiction du pas
+        y_predict=X_new.dot(theta)
+        plt.plot(x_new, y_predict, 'c-', linewidth=0.5)

 # Phase d'inférence (dernier pas)
 plt.plot(x_new, y_predict, 'r-')
 plt.show()
+
+# Performance
+print ("Theta th : theta0 : "+str(4)+"     ; theta1 : "+str(3))
+print ("Theta    : theta0 : "+str(round(float(theta[0]),3))+" ; theta1 : "+str(round(float(theta[1]),3)))
+print ("Erreurs  : theta0 : "+str(round(float(theta[0]-4),3))+" ; theta1 : "+str(round(float(theta[1]-3),3)))
+print ("Temps : "+str(time.time()-t_debut))