“Backpropagation is just the chain rule, efficiently applied in reverse.”
Reverse-mode (backprop) is ideal when you have one output, many inputs — like loss gradients in deep learning.
We’ll define a Value
class to represent numbers in the computation graph,
storing both value and gradient. Each operation links nodes together.
# calc-15-backprop-from-scratch/minimal_autodiff.py
class Value:
def __init__(self, data, _children=(), _op=""):
self.data = float(data)
self.grad = 0.0
self._backward = lambda: None
self._prev = set(_children)
self._op = _op
def __add__(self, other):
out = Value(self.data + (other.data if isinstance(other, Value) else other), (self, other), "+")
def _backward():
self.grad += out.grad
if isinstance(other, Value):
other.grad += out.grad
out._backward = _backward
return out
def __mul__(self, other):
out = Value(self.data * (other.data if isinstance(other, Value) else other), (self, other), "*")
def _backward():
self.grad += (other.data if isinstance(other, Value) else other) * out.grad
if isinstance(other, Value):
other.grad += self.data * out.grad
out._backward = _backward
return out
def tanh(self):
x = self.data
t = (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))
out = Value(t, (self,), "tanh")
def _backward():
self.grad += (1 - t**2) * out.grad
out._backward = _backward
return out
def __pow__(self, power):
out = Value(self.data ** power, (self,), f"**{power}")
def _backward():
self.grad += power * (self.data ** (power - 1)) * out.grad
out._backward = _backward
return out
def backward(self):
topo = []
visited = set()
def build(v):
if v not in visited:
visited.add(v)
for child in v._prev:
if isinstance(child, Value):
build(child)
topo.append(v)
build(self)
self.grad = 1.0
for node in reversed(topo):
node._backward()
Usage Example:
import numpy as np
a = Value(2.0)
b = Value(-3.0)
c = a * b + a ** 2
d = c.tanh()
d.backward()
print(f"d = {d.data:.5f}, ∂d/∂a = {a.grad:.5f}, ∂d/∂b = {b.grad:.5f}")
import torch
a = torch.tensor(2.0, requires_grad=True)
b = torch.tensor(-3.0, requires_grad=True)
c = a * b + a ** 2
d = torch.tanh(c)
d.backward()
print(f"d = {d.item():.5f}, ∂d/∂a = {a.grad.item():.5f}, ∂d/∂b = {b.grad.item():.5f}")
Value
class.Value
s) and backprop through a small neural network.Value
node.Put solutions in calc-15-backprop-from-scratch/
and tag v0.1
.
Next: Calculus 16 — Automatic Differentiation, Modern ML, and You.