import plotly.express as px
import torch
import random
import plotly.graph_objects as go
from ipywidgets import interact, interactive, fixed, interact_manual
import math
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

# Données complètes
X = torch.arange(-5, 5, 0.05)
Y = X + 3 * torch.sin(5 * X)
fig = px.scatter(x=X, y=Y)
fig.show()

# Prendre un test aléatoire est trop simple...
X = torch.arange(-5, 5, 0.05)
Y = X + 3 * torch.sin(5 * X)
index = torch.IntTensor(random.choices(list(range(len(X))), k=int(len(X) / 10.0 * 9.0)))
index_bool = torch.ones(X.shape[0], dtype=bool)
index_bool[index] = False
fig = go.Figure(
    data=[
        go.Scatter(x=X[index_bool], y=Y[index_bool], mode='markers', marker_color="red", name="test"),
        go.Scatter(x=X[~index_bool], y=Y[~index_bool], mode='markers', marker_color="blue", name="train")
    ]
)
fig.show()

# Un découpage temporel permet de mieux valider la générabilité
X = torch.arange(-5, 5, 0.05)
Y = X + 3 * torch.sin(5 * X)
index = torch.IntTensor(list(range(len(X)))[:int(len(X) / 10.0 * 8.0)])
index_bool = torch.ones(X.shape[0], dtype=bool)
index_bool[index] = False
fig = go.Figure(
    data=[
        go.Scatter(x=X[index_bool], y=Y[index_bool], mode='markers', marker_color="red", name="test"),
        go.Scatter(x=X[~index_bool], y=Y[~index_bool], mode='markers', marker_color="blue", name="train")
    ]
)
fig.show()

fig = px.area(x=[0, 1], y=[0, 1])
fig.update_layout(xaxis_title='FPR', yaxis_title='TPR', title="Classificateur aléatoire")
fig.show()

fig = px.area(x=[0, 1], y=[1, 1])
fig.update_layout(xaxis_title='FPR', yaxis_title='TPR', title="Classificateur parfait")
fig.show()

fig = px.area(x=[0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
              y=[0.1, 0.32, 0.40, 0.45, 0.7, 0.81, 0.89, 0.93, 0.94, 0.98, 1.0])
fig.update_layout(xaxis_title='FPR', yaxis_title='TPR', title="Cas général")
fig.show()

X = torch.arange(-2, 2, 0.1)
Y = X * X
X_test = torch.Tensor([-1, -0.8, -0.4, 0.1, 0.2, 0.3, 0.7, 1.1])
Y_test = X_test * X_test + (torch.rand(len(X_test)) - 0.5) / 2  # On ajoute du bruit
fig = go.Figure([
    go.Scatter(x=X_test, y=Y_test, mode="markers", marker_color="red", name="dataset"),
    go.Scatter(x=X_test, y=Y_test, mode="lines", line_color="blue", name="Modèle")
])
fig.update_layout(title="Overfit pour une régression")
fig.show()

X = torch.arange(-2, 2, 0.1)
Y = X * X
X_test = torch.Tensor([-1, -0.8, -0.4, 0.1, 0.2, 0.3, 0.7, 1.1])
Y_test = X_test * X_test + (torch.rand(len(X_test)) - 0.5) / 2  # On ajoute du bruit
fig = go.Figure([
    go.Scatter(x=X_test, y=Y_test, mode="markers", marker_color="red", name="dataset"),
    go.Scatter(x=X, y=Y, mode="lines", line_color="blue", name="Modèle")
])
fig.update_layout(title="Modèle plus généralisable")
fig.show()

X = torch.arange(-1, 1, 0.01)
Y = torch.sigmoid(100*X)
X_test = [-0.9, -0.8, -0.75, -0.7, -0.68, -0.61, -0.59, -0.54, 0.53, 0.60, 0.69, 0.71, 0.74, 0.76, 0.79, 0.84]
Y_test = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
fig = go.Figure([
    go.Scatter(x=X_test, y=Y_test, mode="markers", marker_color="red", name="dataset"),
    go.Scatter(x=X, y=Y, mode="lines", line_color="blue", name="Modèle")
])
fig.add_vrect(x0="-0.5", x1="0.5", 
              annotation_text="Pas de données", annotation_position="top left",
              fillcolor="red", opacity=0.25, line_width=0)
fig.update_layout(title="Modèle de classification trop confiant dans une zone sans donnée")
fig.show()

X = torch.arange(-1, 1, 0.01)
Y = torch.sigmoid(10*X)
X_test = [-0.9, -0.8, -0.75, -0.7, -0.68, -0.61, -0.59, -0.54, 0.53, 0.60, 0.69, 0.71, 0.74, 0.76, 0.79, 0.84]
Y_test = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]
fig = go.Figure([
    go.Scatter(x=X_test, y=Y_test, mode="markers", marker_color="red", name="dataset"),
    go.Scatter(x=X, y=Y, mode="lines", line_color="blue", name="Modèle")
])
fig.add_vrect(x0="-0.5", x1="0.5", 
              annotation_text="Pas de données", annotation_position="top left",
              fillcolor="red", opacity=0.25, line_width=0)
fig.update_layout(title="Modèle de classification surement plus généralisable")
fig.show()

X = torch.arange(-2, 2, 0.1)
Y = X * 0 + 0.5
X_test = torch.Tensor([-1, -0.8, -0.4, 0.1, 0.2, 0.3, 0.7, 1.1])
Y_test = X_test * X_test + (torch.rand(len(X_test)) - 0.5) / 2  # On ajoute du bruit
fig = go.Figure([
    go.Scatter(x=X_test, y=Y_test, mode="markers", marker_color="red", name="dataset"),
    go.Scatter(x=X, y=Y, mode="lines", line_color="blue", name="Modèle")
])
fig.update_layout(title="Modèle linéaire pour des données quadratiques = sous-apprentissage")
fig.show()

def show_sgd():
    X = torch.arange(-2, 2, 0.1)
    Y = torch.arange(-2, 2, 0.1)
    Z = []
    for x in X:
        temp = []
        for y in Y:
            temp.append(10 * x * x + y * y)
        Z.append(temp)
    Z = torch.Tensor(Z)
    X_descent, Y_descent, Z_descent = [-1.5], [-1.5], [11 * 1.5 * 1.5]
    lr = 0.095
    for _ in range(10):
        X_descent.append(X_descent[-1] - lr * 2 * 10 * X_descent[-1])
        Y_descent.append(Y_descent[-1] - lr * 2 * Y_descent[-1])
        Z_descent.append(10 * X_descent[-1] * X_descent[-1] + Y_descent[-1] * Y_descent[-1])
    fig = go.Figure(data=[
        go.Surface(z=Z.T, x=X, y=Y)
    ])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                      highlightcolor="limegreen", project_z=True))
    fig.add_trace(
        go.Scatter3d(x=X_descent, y=Y_descent, z=Z_descent, marker_color="red"))
    fig.show()
show_sgd()

def show_local_minimum():
    x = torch.arange(-2, 2, 0.01)
    t = -2
    t_descent = [t]
    y_descent = [2.0 - 1 * t - 2 * t ** 2 + 0.16 * t ** 3 + 0.5 * t**4]
    lr = 0.2
    for _ in range(10):
        t = t_descent[-1]
        grad = -1 - 4 * t + 0.16 * 3 * t ** 2 + 2 * t ** 3
        t = t - lr * grad
        t_descent.append(t)
        y_descent.append(2.0 - 1 * t - 2 * t ** 2 + 0.16 * t ** 3 + 0.5 * t**4)
    fig = go.Figure([
            go.Scatter(x=x, y=2.0 - 1 * x - 2 * x ** 2 + 0.16 * x ** 3 + 0.5 * x**4, name="fonction"),
            go.Scatter(x=t_descent, y=y_descent, mode="lines+markers", marker=dict(size=10, color="red"), name="valeur")
    ])
    fig.show()
show_local_minimum()

def show_momentum():
    X = torch.arange(-2, 2, 0.1)
    Y = torch.arange(-2, 2, 0.1)
    Z = []
    for x in X:
        temp = []
        for y in Y:
            temp.append(10 * x * x + y * y)
        Z.append(temp)
    Z = torch.Tensor(Z)
    X_descent, Y_descent, Z_descent = [-1.5], [-1.5], [11 * 1.5 * 1.5]
    lr = 0.095
    rho = 0.5
    v_x, v_y = 0, 0
    for _ in range(10):
        v_x = rho * v_x + 2 * 10 * X_descent[-1]
        v_y = rho * v_y + 2 * Y_descent[-1]
        X_descent.append(X_descent[-1] - lr * v_x)
        Y_descent.append(Y_descent[-1] - lr * v_y)
        Z_descent.append(10 * X_descent[-1] * X_descent[-1] + Y_descent[-1] * Y_descent[-1])
    fig = go.Figure(data=[
        go.Surface(z=Z.T, x=X, y=Y)
    ])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                      highlightcolor="limegreen", project_z=True))
    fig.add_trace(
        go.Scatter3d(x=X_descent, y=Y_descent, z=Z_descent, marker_color="red"))
    fig.show()
show_momentum()

def show_momentum_local_mimumum():
    x = torch.arange(-2, 2, 0.01)
    t = -2
    t_descent = [t]
    y_descent = [2.0 - 1 * t - 2 * t ** 2 + 0.16 * t ** 3 + 0.5 * t**4]
    lr = 0.2
    rho = 0.5
    v = 0
    for _ in range(10):
        t = t_descent[-1]
        grad = -1 - 4 * t + 0.16 * 3 * t ** 2 + 2 * t ** 3
        v = rho * v + grad
        t = t - lr * v
        t_descent.append(t)
        y_descent.append(2.0 - 1 * t - 2 * t ** 2 + 0.16 * t ** 3 + 0.5 * t**4)
    fig = go.Figure([
            go.Scatter(x=x, y=2.0 - 1 * x - 2 * x ** 2 + 0.16 * x ** 3 + 0.5 * x**4, name="fonction"),
            go.Scatter(x=t_descent, y=y_descent, mode="lines+markers", marker=dict(size=10, color="red"), name="valeur")
    ])
    fig.show()
show_momentum_local_mimumum()

def show_adagrad():
    X = torch.arange(-2, 2, 0.1)
    Y = torch.arange(-2, 2, 0.1)
    Z = []
    for x in X:
        temp = []
        for y in Y:
            temp.append(10 * x * x + y * y)
        Z.append(temp)
    Z = torch.Tensor(Z)
    X_descent, Y_descent, Z_descent = [-1.5], [-1.5], [11 * 1.5 * 1.5]
    lr = 0.4
    g_x = 0
    g_y = 0
    for _ in range(10):
        grad_x = 2 * 10 * X_descent[-1]
        grad_y = 2 * Y_descent[-1]
        g_x = g_x + grad_x * grad_x
        g_y = g_y + grad_y * grad_y
        X_descent.append(X_descent[-1] - lr * grad_x / (math.sqrt(g_x) +1e-7))
        Y_descent.append(Y_descent[-1] - lr * grad_y / (math.sqrt(g_y) + 1e-7))
        Z_descent.append(10 * X_descent[-1] * X_descent[-1] + Y_descent[-1] * Y_descent[-1])
    fig = go.Figure(data=[
        go.Surface(z=Z.T, x=X, y=Y)
    ])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                      highlightcolor="limegreen", project_z=True))
    fig.add_trace(
        go.Scatter3d(x=X_descent, y=Y_descent, z=Z_descent, marker_color="red"))
    fig.show()
show_adagrad()

def show_rmsprop():
    X = torch.arange(-2, 2, 0.1)
    Y = torch.arange(-2, 2, 0.1)
    Z = []
    for x in X:
        temp = []
        for y in Y:
            temp.append(10 * x * x + y * y)
        Z.append(temp)
    Z = torch.Tensor(Z)
    X_descent, Y_descent, Z_descent = [-1.5], [-1.5], [11 * 1.5 * 1.5]
    lr = 0.4
    g_x = 0
    g_y = 0
    delta = 0.1
    for _ in range(10):
        grad_x = 2 * 10 * X_descent[-1]
        grad_y = 2 * Y_descent[-1]
        g_x = delta * g_x + (1 - delta) * grad_x * grad_x
        g_y = delta * g_y + (1 - delta) * grad_y * grad_y
        X_descent.append(X_descent[-1] - lr * grad_x / (math.sqrt(g_x) +1e-7))
        Y_descent.append(Y_descent[-1] - lr * grad_y / (math.sqrt(g_y) + 1e-7))
        Z_descent.append(10 * X_descent[-1] * X_descent[-1] + Y_descent[-1] * Y_descent[-1])
    fig = go.Figure(data=[
        go.Surface(z=Z.T, x=X, y=Y)
    ])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                      highlightcolor="limegreen", project_z=True))
    fig.add_trace(
        go.Scatter3d(x=X_descent, y=Y_descent, z=Z_descent, marker_color="red"))
    fig.show()
show_rmsprop()

def show_adam():
    X = torch.arange(-2, 2, 0.1)
    Y = torch.arange(-2, 2, 0.1)
    Z = []
    for x in X:
        temp = []
        for y in Y:
            temp.append(10 * x * x + y * y)
        Z.append(temp)
    Z = torch.Tensor(Z)
    X_descent, Y_descent, Z_descent = [-1.5], [-1.5], [11 * 1.5 * 1.5]
    lr = 0.1
    m1_x, m1_y, m2_x, m2_y = 0, 0, 0, 0
    beta1, beta2 = 0.9, 0.999
    for t in range(1, 11):
        grad_x = 2 * 10 * X_descent[-1]
        grad_y = 2 * Y_descent[-1]
        m1_x = beta1 * m1_x + (1-beta1) * grad_x
        m1_y = beta1 * m1_y + (1-beta1) * grad_y
        m2_x = beta2 * m2_x + (1-beta2) * grad_x * grad_x
        m2_y = beta2 * m2_y + (1-beta2) * grad_y * grad_y
        m1_x_unbais = m1_x / (1 - beta1 ** t)
        m1_y_unbais = m1_y / (1 - beta1 ** t)
        m2_x_unbais = m2_x / (1 - beta2 ** t)
        m2_y_unbais = m2_y / (1 - beta2 ** t)
        X_descent.append(X_descent[-1] - lr * m1_x_unbais / (math.sqrt(m2_x_unbais) +1e-7))
        Y_descent.append(Y_descent[-1] - lr * m1_y_unbais / (math.sqrt(m2_y_unbais) + 1e-7))
        Z_descent.append(10 * X_descent[-1] * X_descent[-1] + Y_descent[-1] * Y_descent[-1])
    fig = go.Figure(data=[
        go.Surface(z=Z.T, x=X, y=Y)
    ])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                      highlightcolor="limegreen", project_z=True))
    fig.add_trace(
        go.Scatter3d(x=X_descent, y=Y_descent, z=Z_descent, marker_color="red"))
    fig.show()
show_adam()

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from torch import nn
import torch.nn.functional as f
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

iris = load_iris()
x = iris.data
y = iris.target

# Découpage du dataset
# Sklearn ne permet pas de couper en trois directement
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25)

# On crée des tenseurs
x_train=torch.FloatTensor(x_train)
x_val=torch.FloatTensor(x_val)
x_test=torch.FloatTensor(x_test)
y_train=torch.LongTensor(y_train)
y_val=torch.LongTensor(y_val)
y_test=torch.LongTensor(y_test)

class LinearModel(nn.Module):
    def __init__(self, in_dim=4, hidden_dim=10, out_dim=3):
        super().__init__()
        self.linear1 = nn.Linear(in_dim, hidden_dim)
        self.linear2 = nn.Linear(hidden_dim, out_dim)
    def forward(self, x):
        out1 = f.relu(self.linear1(x))
        return self.linear2(out1)

    
losses_train = []
losses_val = []
accuracies = []

model = LinearModel()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters())

# Boucle d'entrainement
for epoch in range(5000):
    # Ici, on ne fait pas de batch
    optimizer.zero_grad() # On met les gradients à 0
    outputs = model(x_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()  # Fait tout pour nous

    # Validation
    with torch.no_grad():
        val_outputs = model(x_val)
        loss_val = criterion(val_outputs, y_val)
        losses_val.append(loss_val.item())
        # On prend l'indice avec la plus haute valeur
        _, predicted = torch.max(val_outputs, 1)
        accuracy = accuracy_score(y_val, predicted)
        accuracies.append(accuracy)

    losses_train.append(loss.item())

# Test
with torch.no_grad():
    test_outputs = model(x_test)
    _, predicted = torch.max(test_outputs, 1)
    
    # precision / recall / f1 score
    precision = precision_score(y_test, predicted, average='weighted')
    recall = recall_score(y_test, predicted, average='weighted')
    f1 = f1_score(y_test, predicted, average='weighted')

print(f'Test Accuracy: {accuracy * 100:.2f}%')

Test Accuracy: 100.00%

fig = go.Figure([
    go.Scatter(x=list(range(len(losses_train))), y=losses_train, name="train"),   
    go.Scatter(x=list(range(len(losses_val))), y=losses_val, name="val"),
])
fig.show()

fig = go.Figure([
    go.Scatter(x=list(range(len(accuracies))), y=accuracies, name="Val accuracy"),   
])
fig.show()

Régularisation¶

La dernière fois...¶

Notions¶

Schéma¶

Est-ce que la fonction de coût est une garantie de qualité ?¶

Procédure d'entrainement et d'évaluation classique¶

Train/Val/Test¶

Découpage temporel¶

Découpage contextuel¶

Découpage contextuel¶

Évaluation¶

Matrice de confusion¶

Matrice de confusion - Exemple¶

Métrique : Accuracy¶

Métrique : Précision¶

Métrique : Rappel¶

Métrique : F1¶

Métrique : Courbe ROC¶

Évaluation humaine¶

Choix des hyperparamètres¶

Surapprentissage¶

Comment détecter un surapprentissage ?¶

Sous-apprentissage / Underfit¶

Comment détecter un sous-apprentissage ?¶

Réduire le sur-apprentissage avec la régularisation¶

Qu'est-ce que la régularisation ?¶

Contraindre les poids par la fonction de coût¶

Fonctions de régularisation classiques¶

Exemples¶

Gradient de la régularisation L2¶

Weight decay¶

Optimisation avancée¶

Problèmes avec la descente de gradient stochastique¶

Ajouter de l'inertie avec le Momentum¶

Normaliser les gradients avec Adagrad¶

Normaliser en donnant plus d'importance aux points récents : RMSProp¶

Combiner l'inertie avec la normalisation : ADAM¶

En pratique¶

En résumé¶

Ressources additionnelles¶