from notebook.services.config import ConfigManager
cm = ConfigManager()
cm.update('livereveal', {
        'width': 1024,
        'height': 768,
        'scroll': True,
})
import torch
import plotly.express as px
import plotly.graph_objects as go
import torch.nn as nn
import torch.nn.functional as F
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
import plotly.figure_factory as ff
import graphviz
def gv(s): return graphviz.Source('digraph G{ rankdir="LR"' + s + '; }')
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

import torch

X = torch.Tensor([1, 2, 3, 4])
W = torch.Tensor([[0.1, -0.5, 0.3, 0.4]])

W@X  # Multiplication de matrices/tenseurs

tensor([1.6000])

X = torch.Tensor([1, 2, 3, 4])
W = torch.Tensor([[0.1, -0.5, 0.3, 0.4]])

1.0 / (1.0 + torch.exp(-W@X))

tensor([0.8320])

# Traçons la fonction sigmoid
X = torch.arange(-10, 10, 0.1)
Y = 1.0 / (1.0 + torch.exp(-X))
px.line(x=X, y=Y)

X = torch.arange(-10, 10, 0.1)
Y = 1.0 / (1.0 + torch.exp(-X))
# ou Y = torch.sigmoid(X)
dev_Y = Y * (1.0 - Y) 
fig = go.Figure([go.Scatter(x=X, y=Y, name="Sigmoid"), go.Scatter(x=X, y=dev_Y, name="Dérivée")])
fig.show()

X = torch.arange(-10, 10, 0.1)
Y = torch.tanh(X)
dev_Y = 1.0 - Y * Y
fig = go.Figure([go.Scatter(x=X, y=Y, name="tanh"), go.Scatter(x=X, y=dev_Y, name="Dérivée")])
fig.show()

X = torch.arange(-5, 5, 0.01)
Y = torch.relu(X)
dev_Y = torch.Tensor([0 if x < 0 else 1 for x in X])
fig = go.Figure([go.Scatter(x=X, y=Y), go.Scatter(x=X, y=dev_Y)])
fig.show()

X = torch.arange(-10, 10, 0.1, )
Y = -1.64 - 0.18 * X
fig = go.Figure(
    [go.Scatter(x=list(X) + [min(X)], y=list(Y) + [min(Y)], fill="toself", text=["" for _ in range(len(X))] + ["Negative"], mode="markers+text", textposition="top right", name="Négative"),
    go.Scatter(x=list(X) + [max(X)], y=list(Y) + [max(Y)], fill="toself", text=["" for _ in range(len(X))] + ["Positive"], mode="markers+text", textposition="bottom left", name="Positive")])
fig.show()

X = torch.arange(-10, 10, 0.1, )
Y = -1.64 - 0.18 * X
fig = go.Figure(
    [go.Scatter(x=list(X) + [min(X)], y=list(Y) + [min(Y)], fill="toself", text=["" for _ in range(len(X))] + ["Negative"],mode="markers+text", textposition="top right", name="Négative"),
    go.Scatter(x=list(X) + [max(X)], y=list(Y) + [max(Y)], fill="toself", text=["" for _ in range(len(X))] + ["Positive"],mode="markers+text", textposition="bottom left", name="Positive"),
    go.Scatter(x=[4.7], y=[-1.1], text=["X"], mode="markers+text", textposition="top center",marker=dict(size=10), name="X")])
fig.show()

torch.sigmoid(torch.Tensor([1, 4.7, -1.1]) @ torch.Tensor([4.1, 0.4, 2.2]))

tensor(0.9723)

X = torch.Tensor([1, 4.7, -1.1])
# On peut aussi directement transposer l'entrée : X = torch.Tensor([1, 4.7, -1.1]).reshape(1, -1)
W = torch.Tensor([[-1.2,  1.4,  1.1],
                  [ 0.3,  0.5, -0.9]])
print("X :", X)
print("W :", W)
print("Z :", X@W.T)
print("Y :", torch.sigmoid(X@W.T))

X : tensor([ 1.0000,  4.7000, -1.1000])
W : tensor([[-1.2000,  1.4000,  1.1000],
        [ 0.3000,  0.5000, -0.9000]])
Z : tensor([4.1700, 3.6400])
Y : tensor([0.9848, 0.9744])

# Avec un module Pytorch
class Perceptron(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.W = torch.rand((output_size, input_size))
        self.b = torch.rand((output_size))

    def forward(self, x):
        return F.sigmoid(x @ W.T + self.b)

perceptron = Perceptron(3, 2)
X = torch.Tensor([1.4, 4.7, -1.1])
print(perceptron(X))

tensor([0.9808, 0.9842])

# Pytorch fournit une fonction pour la partie linéaire

class Perceptron2(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        return F.sigmoid(self.linear(x))

perceptron = Perceptron2(3, 2)
X = torch.Tensor([1.4, 4.7, -1.1])
print(perceptron(X))

tensor([0.6476, 0.4706], grad_fn=<SigmoidBackward0>)

X = torch.Tensor([[1.4, 4.7, -1.1],
                  [2.1, 1.8, 3.1]])
print(perceptron(X))

tensor([[0.6476, 0.4706],
        [0.6648, 0.6628]], grad_fn=<SigmoidBackward0>)

class MultiPerceptron(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.linear1 = nn.Linear(input_size, hidden_size)
        self.linear2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        hidden = F.sigmoid(self.linear1(x))
        return F.sigmoid(self.linear2(hidden))

perceptron = MultiPerceptron(3, 6, 2)
X = torch.Tensor([1, 4.7, -1.1])
print(perceptron(X))

tensor([0.3522, 0.3938], grad_fn=<SigmoidBackward0>)

positif_x = torch.Tensor([1, 2, 4, 4, 5, 6, 7])
positif_y = torch.Tensor([6.5, 4.6, 6.1, 2, 5.5, 1.9, 3.2])
negatif_x = torch.Tensor([0, 0, 1, 4])
negatif_y = torch.Tensor([0, 1.5, 0.5, 5])

test_x = torch.Tensor([3])
test_y = torch.Tensor([5.1])

fig = go.Figure([
    go.Scatter(x=positif_x, y=positif_y, mode="markers", name="Positive", marker=dict(size=10, color="green")),
    go.Scatter(x=negatif_x, y=negatif_y, mode="markers", name="Négative", marker=dict(size=10, color="red")),
    go.Scatter(x=test_x, y=test_y, mode="markers+text", text=["?"],  textposition="top center",
              name="?", marker=dict(size=10, color="blue"))])
fig.show()

# Essayons de faire une prédiction avec un MLP...
torch.manual_seed(3)  # Pour la reproducibilité
perceptron = MultiPerceptron(2, 4, 1)
perceptron(torch.Tensor([3, 5.1]))

tensor([0.4053], grad_fn=<SigmoidBackward0>)

@interact(w_1=(-10.0, 10.0), w_2=(-10.0, 10.0), b=(-10.0, 10.0))
def simple_regression(w_1, w_2, b):
    print(w_1, w_2, b)
    prediction = torch.sigmoid(b + w_1 * test_x[0] + w_2 * test_y[0])
    print("Prédiction", prediction.item())
    r = torch.arange(0, 7, 0.1)
    if w_2 == 0.0:
        w_2 = 1e-7
    fig = go.Figure([
        go.Scatter(x=positif_x, y=positif_y, mode="markers", marker=dict(size=10, color="green")), go.Scatter(x=negatif_x, y=negatif_y, mode="markers",
                  marker=dict(size=10, color="red")),
        go.Scatter(x=test_x, y=test_y, mode="markers+text", text=["?"],  textposition="top center", marker=dict(size=10, color="blue")),
        go.Scatter(x=r, y=(-b + 0.5) / w_2 - w_1 / w_2 * r)])
    fig.show()

interactive(children=(FloatSlider(value=0.0, description='w_1', max=10.0, min=-10.0), FloatSlider(value=0.0, d…

@interact(w_1=(-10.0, 10.0), w_2=(-10.0, 10.0), b=(-10.0, 10.0))
def simple_regression(w_1=0.0, w_2=1.0, b=0.0):
    print(w_1, w_2, b)
    all_pos = torch.sigmoid(b + w_1 * positif_x + w_2 * positif_y)
    all_neg = torch.sigmoid(b + w_1 * negatif_x + w_2 * negatif_y)
    print("Erreur moyenne", ((sum(-torch.log(all_pos)) + sum(-torch.log(1 - all_neg))) / (len(all_pos) + len(all_neg))).item())
    prediction = torch.sigmoid(b + w_1 * test_x + w_2 * test_y)
    print("Prediction test", prediction.item())
    r = torch.arange(0, 7, 0.1)
    if w_2 == 0.0:
        w_2 = 1e-7
    fig = go.Figure([
        go.Scatter(x=positif_x, y=positif_y, mode="markers", marker=dict(size=10, color="green")), go.Scatter(x=negatif_x, y=negatif_y, mode="markers",
                  marker=dict(size=10, color="red")),
        go.Scatter(x=test_x, y=test_y, mode="markers+text", text=["?"],  textposition="top center", marker=dict(size=10, color="blue")),
        go.Scatter(x=r, y=(-b + 0.5) / w_2 - w_1 / w_2 * r)])
    fig.show()

interactive(children=(FloatSlider(value=0.0, description='w_1', max=10.0, min=-10.0), FloatSlider(value=1.0, d…

def display_3D():
    # Nous fixons b pour pouvoir visualiser la fonction de coût en fonction des poids
    b = -8.5
    x, y, z = torch.arange(1, 3, 0.01), torch.arange(0, 2, 0.01), []

    for w_1 in x:
        temp = []
        for w_2 in y:
            all_pos = torch.sigmoid(b + w_1 * positif_x + w_2 * positif_y)
            all_neg = torch.sigmoid(b + w_1 * negatif_x + w_2 * negatif_y)
            temp.append((sum(-torch.log(all_pos)) + sum(-torch.log(1 - all_neg))) / (len(all_pos) + len(all_neg)))
        z.append(temp)
    z = torch.Tensor(z) #.clamp(max=0.10)  # Je mets un max pour voir le minimum
    fig = go.Figure(data=[go.Surface(z=z.T, x=x, y=y)])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                      highlightcolor="limegreen", project_z=True))
    fig.show()
display_3D()

def display_3D():
    perceptron = MultiPerceptron(2, 10, 1)
    x, y, z = torch.arange(-10, 10, 0.05), torch.arange(-10, 10, 0.05), []
    X = torch.Tensor([list(positif_x) + list(negatif_x), list(positif_y) + list(negatif_y)]).T
    Y = torch.Tensor([1 for _ in range(len(positif_x))] + [0 for _ in range(len(negatif_x))]).reshape(-1, 1)
    for w_1 in x:
        temp = []
        perceptron.linear1.weight.data[0, 0] = w_1.item()
        for w_2 in y:
            perceptron.linear1.weight.data[0, 1] = w_2.item()
            y_pred = perceptron(X)
            loss = F.binary_cross_entropy(y_pred, Y)
            temp.append(loss)
        z.append(temp)
    z = torch.Tensor(z)
    fig = go.Figure(data=[go.Surface(z=z.T, x=x, y=y)])
    fig.update_traces(contours_z=dict(show=True, usecolormap=True,
                                      highlightcolor="limegreen", project_z=True))
    fig.show()
torch.manual_seed(42)
display_3D()

@interact(t=(-2.0, 2.0, 0.1))
def search_minimum(t=1.4):
    x = torch.arange(-2, 2, 0.1)
    fig = go.Figure([
            go.Scatter(x=x, y=x**2),
            go.Scatter(x=[t], y=[t*t], mode="markers", marker=dict(size=10, color="red"))
    ])
    fig.show()

interactive(children=(FloatSlider(value=1.4, description='t', max=2.0, min=-2.0), Output()), _dom_classes=('wi…

@interact(t=(-2.0, 2.0, 0.1))
def search_minimum(t=1.4):
    x = torch.arange(-2, 2, 0.1)
    print("La dérivée vaut :", 2 * t)
    fig = go.Figure([
            go.Scatter(x=x, y=x**2, name="fonction"), go.Scatter(x=x, y=2*x, name="dérivée"),
            go.Scatter(x=[t], y=[t*t], mode="markers", marker=dict(size=10, color="red"), name="valeur")
    ])
    fig.show()

interactive(children=(FloatSlider(value=1.4, description='t', max=2.0, min=-2.0), Output()), _dom_classes=('wi…

@interact(t=(-2.0, 2.0, 0.01))
def search_minimum(t=-1.8):
    x = torch.arange(-2, 2, 0.01)
    print("La dérivée vaut :", -1 - 4 * t + 0.16 * 3 * t ** 2 + 2 * t ** 3)
    fig = go.Figure([
            go.Scatter(x=x, y=2.0 - 1 * x - 2 * x ** 2 + 0.16 * x ** 3 + 0.5 * x**4, name="fonction"),
            go.Scatter(x=x, y=-1 - 4 * x + 0.16 * 3 * x ** 2 + 2 * x ** 3, name="dérivée"),
            go.Scatter(x=[t], y=[2.0 - 1 * t - 2 * t ** 2 + 0.16 * t ** 3 + 0.5 * t**4], mode="markers", marker=dict(size=10, color="red"), name="valeur")
    ])
    fig.show()

interactive(children=(FloatSlider(value=-1.8, description='t', max=2.0, min=-2.0, step=0.01), Output()), _dom_…

def visualisation_gradient():
    b = -8.5 # Nous fixons b pour pouvoir visualiser
    x, y, z = [], [], []

    for w_1 in torch.arange(1, 3, 0.1):
        temp_x, temp_y, temp_z = [], [], []
        for w_2 in torch.arange(0, 2, 0.1):
            all_pos = torch.sigmoid(b + w_1 * positif_x + w_2 * positif_y)
            all_neg = torch.sigmoid(b + w_1 * negatif_x + w_2 * negatif_y)
            temp_x.append(w_1)
            temp_y.append(w_2)
            temp_z.append((sum(-torch.log(all_pos)) + sum(-torch.log(1 - all_neg))).item() / (len(all_pos) + len(all_neg)))
        x.append(temp_x)
        y.append(temp_y)
        z.append(temp_z)
    x = torch.Tensor(x)
    y = torch.Tensor(y)
    z = -torch.Tensor(z) / 10 # .clamp(max=0.1) # Pour enlever les grandes flèches
    v, u = torch.gradient(z)
    fig = ff.create_quiver(x, y, u, v, scale=10, arrow_scale=1, line_width=1)
    #fig.add_trace(go.Scatter(x=x.reshape(-1), y=y.reshape(-1), text=["%.3f" % x for x in z.reshape(-1)], mode="markers", textposition="top center"))
    fig.show()
visualisation_gradient()

@interact(lr=(0.01, 1.3, 0.01))
def search_minimum(lr=0.01):
    t = 1.5
    all_t, all_t2 = [t], [t*t]
    for i in range(10):
        t = t - lr * 2 * t  # Mise à jour
        all_t.append(t)
        all_t2.append(t*t)
    x = torch.arange(min(-2, min(all_t)), max(2, max(all_t)), 0.01)
    fig = go.Figure([
            go.Scatter(x=x, y=x**2, name="fonction"),
            go.Scatter(x=all_t, y=all_t2, mode="lines+markers+text", text=list(range(len(all_t))), marker=dict(size=5, color="red"), name="valeur")
    ])
    fig.show()

interactive(children=(FloatSlider(value=0.01, description='lr', max=1.3, min=0.01, step=0.01), Output()), _dom…

def descente_gradient():
    perceptron = Perceptron2(2, 1)
    lr = 0.05
    X = torch.Tensor([list(positif_x) + list(negatif_x), list(positif_y) + list(negatif_y)]).T
    Y = torch.Tensor([1 for _ in range(len(positif_x))] + [0 for _ in range(len(negatif_x))]).reshape(-1, 1)
    all_losses = []
    for _ in range(5000):
        y_pred = perceptron(X)
        loss = F.binary_cross_entropy(y_pred, Y)  # Calcul du coût
        all_losses.append(loss.item())
        loss.backward()  # Magie qui calcule les gradients
        # Mise à jour
        perceptron.linear.weight.data = perceptron.linear.weight.data - lr * perceptron.linear.weight.grad.data
        perceptron.linear.bias.data = perceptron.linear.bias.data - lr * perceptron.linear.bias.grad.data
        perceptron.linear.weight.grad.data.zero_()  # On doit effacer les anciens gradients
        perceptron.linear.bias.grad.data.zero_()
    print("Loss final :", loss.item())
    print("Parameters :")
    print("W :", perceptron.linear.weight.data)
    print("Bias :", perceptron.linear.bias.data)
    w_1 = perceptron.linear.weight.data[0][0]
    w_2 = perceptron.linear.weight.data[0][1]
    b = perceptron.linear.bias.data[0]
    r = torch.arange(0, 7, 0.1)
    fig = go.Figure([
            go.Scatter(x=positif_x, y=positif_y, mode="markers", marker=dict(size=10, color="green")), go.Scatter(x=negatif_x, y=negatif_y, mode="markers",
                      marker=dict(size=10, color="red")),
            go.Scatter(x=test_x, y=test_y, mode="markers+text", text=["?"],  textposition="top center", marker=dict(size=10, color="blue")),
            go.Scatter(x=r, y=(-b + 0.5) / w_2 - w_1 / w_2 * r)])
    fig.show()
    return all_losses
all_losses = descente_gradient()

Loss final : 0.36282074451446533
Parameters :
W : tensor([[0.6369, 0.5167]])
Bias : tensor([-2.9017])

# Il est souvent utile de visualiser l'évolution du loss pour vérifier que l'on converge
px.line(x=range(len(all_losses)), y=all_losses)

def descente_gradient_divergent():
    perceptron = Perceptron2(2, 1)
    lr = 5
    X = torch.Tensor([list(positif_x) + list(negatif_x), list(positif_y) + list(negatif_y)]).T
    Y = torch.Tensor([1 for _ in range(len(positif_x))] + [0 for _ in range(len(negatif_x))]).reshape(-1, 1)
    all_losses = []
    for _ in range(100):
        y_pred = perceptron(X)
        loss = F.binary_cross_entropy(y_pred, Y)  # Calcul du coût
        all_losses.append(loss.item())
        loss.backward()  # Magie qui calcule les gradients
        # Mise à jour
        perceptron.linear.weight.data = perceptron.linear.weight.data - lr * perceptron.linear.weight.grad.data
        perceptron.linear.bias.data = perceptron.linear.bias.data - lr * perceptron.linear.bias.grad.data
        perceptron.linear.weight.grad.data.zero_()  # On doit effacer les anciens gradients
        perceptron.linear.bias.grad.data.zero_()
    print("Loss final :", loss.item())
    print("Parameters :")
    print("W :", perceptron.linear.weight.data)
    print("Bias :", perceptron.linear.bias.data)
    w_1 = perceptron.linear.weight.data[0][0]
    w_2 = perceptron.linear.weight.data[0][1]
    b = perceptron.linear.bias.data[0]
    r = torch.arange(0, 7, 0.1)
    fig = go.Figure([
            go.Scatter(x=positif_x, y=positif_y, mode="markers", marker=dict(size=10, color="green")), go.Scatter(x=negatif_x, y=negatif_y, mode="markers",
                      marker=dict(size=10, color="red")),
            go.Scatter(x=test_x, y=test_y, mode="markers+text", text=["?"],  textposition="top center", marker=dict(size=10, color="blue")),
            go.Scatter(x=r, y=(-b + 0.5) / w_2 - w_1 / w_2 * r)])
    fig.show()
    return all_losses
all_losses = descente_gradient_divergent()

Loss final : 9.09238052368164
Parameters :
W : tensor([[6.1928, 3.7071]])
Bias : tensor([-12.2596])

# Il est souvent utile de visualiser l'évolution du loss pour vérifier que l'on converge
px.line(x=range(len(all_losses)), y=all_losses)

# On retrouve bien les mêmes résultats 
x = torch.Tensor([1])
y = torch.Tensor([0])
w_1 = torch.Tensor([2])
w_2 = torch.Tensor([3])
z = torch.sigmoid(x * w_1)
print("z :", z)
y_hat = torch.sigmoid(z * w_2)
print("y_hat :", y_hat)
J = -torch.log(1 - y_hat)
print("J :", J)
dJ_dy_hat = 1.0 / (1.0 - y_hat)
print("dJ_dy_hat :", dJ_dy_hat)
dy_hat_dz = w_2 * (torch.sigmoid(z * w_2) * (1 - torch.sigmoid(z * w_2)))
print("dy_hat_dz: ", dy_hat_dz)
dy_hat_dw_2 = z * (torch.sigmoid(z * w_2) * (1 - torch.sigmoid(z * w_2)))
print("dy_hat_dw_2: ", dy_hat_dw_2)
dz_dw_1 = x * (torch.sigmoid(x * w_1) * (1 - torch.sigmoid(x * w_1)))
print("dz_dw_1: ", dz_dw_1)

# On recombine tout
d_J_dw_1 = dJ_dy_hat * dy_hat_dz * dz_dw_1
print("d_J_dw_1 :", d_J_dw_1)
d_J_dw_2 = dJ_dy_hat * dy_hat_dw_2
print("d_J_dw_2 :", d_J_dw_2)

z : tensor([0.8808])
y_hat : tensor([0.9335])
J : tensor([2.7112])
dJ_dy_hat : tensor([15.0467])
dy_hat_dz:  tensor([0.1861])
dy_hat_dw_2:  tensor([0.0546])
dz_dw_1:  tensor([0.1050])
d_J_dw_1 : tensor([0.2940])
d_J_dw_2 : tensor([0.8223])

##### On peut directement calculer les gradients avec Torch
y = torch.Tensor([0])
w_1 = torch.Tensor([2])
w_1.requires_grad_()
w_2 = torch.Tensor([3])
w_2.requires_grad_()
z = torch.sigmoid(x * w_1)
y_hat = torch.sigmoid(z * w_2)
J = -torch.log(1 - y_hat)
J.backward()
print("d_J_dw_1 :", w_1.grad)
print("d_J_dw_2 :", w_2.grad)

d_J_dw_1 : tensor([0.2940])
d_J_dw_2 : tensor([0.8223])

x = torch.Tensor([1, 2, -2])
x.requires_grad_()
y = x.sum()
z = 1.0 / (1 + torch.exp(-y))
z.backward()
x.grad

tensor([0.1966, 0.1966, 0.1966])

# En Torch, les portes implémentent torch.autograd.Function
class Multiply(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, y):
        ctx.save_for_backward(x, y)
        z = x * y
        return z
    def backward(ctx, grad_z):
        x, y = ctx.saved_tensors
        grad_x = y * grad_z
        grad_y = x * grad_z
        return grad_x, grad_y
x, y = torch.Tensor([1]), torch.Tensor([2])
x.requires_grad_()
y.requires_grad_()
z = Multiply.apply(x, y)
print("Output", z)
z.backward(torch.Tensor([3]))
print("Gradients :", x.grad, "et", y.grad)

Output tensor([2.], grad_fn=<MultiplyBackward>)
Gradients : tensor([6.]) et tensor([3.])

# Comportement de la fonction max en Torch
x, y = torch.Tensor([1]), torch.Tensor([0])
x.requires_grad_()
torch.max(x, y).backward()
print("Pour x > y :", x.grad)
x = torch.Tensor([-1])
x.requires_grad_()
torch.max(x, y).backward()
print("Pour x < y :", x.grad)
x = torch.Tensor([0])
x.requires_grad_()
torch.max(x, y).backward()
print("Pour x = y :", x.grad)

Pour x > y : tensor([1.])
Pour x < y : tensor([0.])
Pour x = y : tensor([0.5000])

# On fait passer deux fois les entrées dans la fonction
x, y = torch.Tensor([1]), torch.Tensor([2])
x.requires_grad_()
y.requires_grad_()
z = Multiply.apply(x, y)
print("Output :", z)
z.backward(torch.Tensor([3]))
print("Gradients :", x.grad, "et", y.grad)
z = Multiply.apply(x, y)
print("Output 2 :", z)
z.backward(torch.Tensor([3]))
print("Gradients 2 :", x.grad, "et", y.grad)

Output : tensor([2.], grad_fn=<MultiplyBackward>)
Gradients : tensor([6.]) et tensor([3.])
Output 2 : tensor([2.], grad_fn=<MultiplyBackward>)
Gradients 2 : tensor([12.]) et tensor([6.])

Introduction au deep learning¶

Qu'est qu'un réseau de neurones ?¶

Rappels de machine learning¶

Rappel : Régression linéaire¶

Rappel : Régression logistique¶

Représentation graphique¶

Le Perceptron¶

Le perceptron et la forward propagation¶

Pourquoi g doit-elle être non linéaire ?¶

N'oublions pas le biais...¶

Expression sous forme d'une multiplication de matrice¶

Fonctions d'activation usuelles¶

Fonctions d'activation usuelles - Sigmoid¶

Fonctions d'activation usuelles - Tangente hyperbolique¶

Fonctions d'activation usuelles - Rectified Linear Unit¶

Perceptron : Exemple¶

Perceptron : Exemple¶

Perceptron : Exemple¶

Du perceptron au réseau de neurones¶

Notre perceptron à une seule sortie¶

Nous pouvons généraliser à plusieurs sorties¶

On se débarasse de la somme...¶

Un autre avantage de la notation en matrice¶

Réseau de neurones à une couche¶

On peut aussi écrire l'équation avec des matrices¶

Non linéarité et couche de sortie¶

La fonction softmax¶

Théorème d'approximation universel¶

Fin du cours¶

Ce n'est pas si simple...¶

La suite du cours¶

Réseau de neurones profond¶

Exemple¶

Exemple¶

Notre perceptron prédit un résultat négatif ?¶

Fonction de coût¶

Quantifier l'erreur¶

Le coût empirique¶

Exemples de fonctions de coût¶

Entropie croisée binaire¶

L'erreur quadratique¶

Choisir une fonction de coût¶

La descente de gradient¶

On veut optimiser la fonction de coût automatiquement...¶

Tracé impossible en pratique¶

Si je considère un point donné, comment l'améliorer ?¶

Comment connaitre la "bonne direction" ?¶

Minimum local vs minimum global¶

En dimension n¶

Gradient - Exemple¶

De combien dois-je me déplacer ?¶

Convergence et divergence¶

La descente de gradient¶

Descente de gradient et taille du jeu de données¶

La descente de gradient stochastique¶

Backpropagation¶

Calculer les gradients¶

Exemple simple¶

Exemple¶

Exemple - La dérivation en chaîne¶

Exemple - La dérivation en chaîne¶

Exemple - La dérivation en chaîne¶

Exemple - La dérivation en chaîne¶

Exemple - La dérivation en chaîne¶

Exemple - De quoi a-t'on besoin pour calculer la valeur des dérivées ?¶

Exemple - Forward propagation¶

Exemple - Forward propagation¶

Exemple - Forward propagation¶

Exemple - Forward propagation¶

Exemple - Back propagation¶

Exemple - Back propagation¶

Exemple - Back propagation¶

Graphe de calculs¶

Graphe de calculs / Computational Graph¶

Graphe de calculs / Computational Graph - Définition¶

Forward propagation dans un graphe de calcul¶

Backward propagation dans un graphe de calcul¶

Exemple¶

Exemple¶

Zoom sur une porte à une entrée, une sortie¶