Welcome to Module 3: Neural Networks—Building Blocks and Training! Today you’ll go back to the very roots of deep learning: the perceptron. Invented in the 1950s, the perceptron is the simplest neural network—an algorithm for classifying points by a weighted sum and a threshold. Every modern neural net layer (including all of deep RL) is a direct descendant of this little algorithm!
You will:
Let’s awaken the ancestor of all modern AI.
The perceptron is a binary classifier for input . Given weights and bias :
Learning rule:
For each data point , if the prediction is incorrect:
The parameter update after each sample:
Where is the learning rate.
Key property: If the data is linearly separable, this algorithm finds a separating hyperplane in finite steps.
The perceptron models the simplest neural network—a single weighted sum followed by a step function. At each step through the data, the perceptron checks if its prediction matches the true label:
Training loop:
Visualizing the boundary:
Since the perceptron produces a linear separator (a line in 2D), you can plot
this line after each epoch. On non-separable data, the perceptron will not
converge, highlighting an important limitation compared to modern neural nets.
import torch
def perceptron_step(
x: torch.Tensor,
y: float,
w: torch.Tensor,
b: float,
lr: float = 1.0
) -> tuple[torch.Tensor, float]:
"""
x: input vector (d,)
y: true label (0 or 1)
w: weight vector (d,)
b: bias (scalar)
Returns updated w, b
"""
# Prediction: if w^T x + b > 0: 1 else 0
z: float = torch.dot(w, x).item() + b
y_pred: float = 1.0 if z > 0 else 0.0
if y != y_pred:
# Update rule
w = w + lr * (y - y_pred) * x
b = b + lr * (y - y_pred)
return w, b
# Simple test
w: torch.Tensor = torch.zeros(2)
b: float = 0.0
x: torch.Tensor = torch.tensor([1.0, -1.0])
y: float = 1.0
w, b = perceptron_step(x, y, w, b)
print("Updated w/b:", w, b)
import numpy as np
import matplotlib.pyplot as plt
# Generate simple separable data
np.random.seed(0)
N: int = 40
X0: np.ndarray = np.random.randn(N, 2) + np.array([2,2])
X1: np.ndarray = np.random.randn(N, 2) + np.array([-2,-2])
X: np.ndarray = np.concatenate([X0, X1])
y: np.ndarray = np.concatenate([np.ones(N), np.zeros(N)])
X_t: torch.Tensor = torch.tensor(X, dtype=torch.float32)
y_t: torch.Tensor = torch.tensor(y, dtype=torch.float32)
w: torch.Tensor = torch.zeros(2)
b: float = 0.0
epochs: int = 12
boundary_history: list[tuple[torch.Tensor, float]] = []
for epoch in range(epochs):
for i in range(len(X)):
w, b = perceptron_step(X_t[i], y_t[i].item(), w, b, lr=0.7)
boundary_history.append((w.clone(), b))
print("Final weights:", w, "Final bias:", b)
def plot_perceptron_decision(
X: np.ndarray,
y: np.ndarray,
boundary_history: list[tuple[torch.Tensor, float]]
) -> None:
plt.figure(figsize=(8,5))
plt.scatter(X[y==0,0], X[y==0,1], color="orange", label="Class 0")
plt.scatter(X[y==1,0], X[y==1,1], color="blue", label="Class 1")
x_vals: np.ndarray = np.array(plt.gca().get_xlim())
for i, (w, b) in enumerate(boundary_history):
# Line: w1*x + w2*y + b = 0 => y = (-w1*x - b)/w2
if w[1].abs() > 1e-6:
y_vals = (-w[0].item() * x_vals - b) / w[1].item()
plt.plot(x_vals, y_vals, alpha=0.3 + 0.7*i/len(boundary_history), label=f"Epoch {i+1}" if i==len(boundary_history)-1 else None)
plt.legend(); plt.title("Perceptron Decision Boundary Evolution")
plt.xlabel("x1"); plt.ylabel("x2")
plt.show()
plot_perceptron_decision(X, y, boundary_history)
# Make non-separable data (add noise and overlap)
np.random.seed(3)
N: int = 40
X0_ns: np.ndarray = np.random.randn(N, 2) + np.array([1,1])
X1_ns: np.ndarray = np.random.randn(N, 2) + np.array([2,2])
X_ns: np.ndarray = np.concatenate([X0_ns, X1_ns])
y_ns: np.ndarray = np.concatenate([np.zeros(N), np.ones(N)])
X_t_ns: torch.Tensor = torch.tensor(X_ns, dtype=torch.float32)
y_t_ns: torch.Tensor = torch.tensor(y_ns, dtype=torch.float32)
w_ns: torch.Tensor = torch.zeros(2)
b_ns: float = 0.0
boundary_history_ns: list[tuple[torch.Tensor, float]] = []
for epoch in range(12):
for i in range(len(X_ns)):
w_ns, b_ns = perceptron_step(X_t_ns[i], y_t_ns[i].item(), w_ns, b_ns, lr=0.7)
boundary_history_ns.append((w_ns.clone(), b_ns))
plot_perceptron_decision(X_ns, y_ns, boundary_history_ns)
print("Final weights (non-separable):", w_ns, "Final bias:", b_ns)
import torch
import numpy as np
import matplotlib.pyplot as plt
def perceptron_step(
x: torch.Tensor,
y: float,
w: torch.Tensor,
b: float,
lr: float = 1.0
) -> tuple[torch.Tensor, float]:
z: float = torch.dot(w, x).item() + b
y_pred: float = 1.0 if z > 0 else 0.0
if y != y_pred:
w = w + lr * (y - y_pred) * x
b = b + lr * (y - y_pred)
return w, b
# EXERCISE 1: Already shown in demo
# EXERCISE 2: Linearly separable data
np.random.seed(1)
N: int = 50
X0: np.ndarray = np.random.randn(N, 2) + np.array([3,3])
X1: np.ndarray = np.random.randn(N, 2) + np.array([-3,-3])
X: np.ndarray = np.vstack([X0, X1])
y: np.ndarray = np.hstack([np.ones(N), np.zeros(N)])
X_t: torch.Tensor = torch.tensor(X, dtype=torch.float32)
y_t: torch.Tensor = torch.tensor(y, dtype=torch.float32)
w: torch.Tensor = torch.zeros(2)
b: float = 0.0
history: list[tuple[torch.Tensor, float]] = []
for epoch in range(15):
for i in range(len(X)):
w, b = perceptron_step(X_t[i], y_t[i].item(), w, b, lr=0.9)
history.append((w.clone(), b))
# EXERCISE 3
def plot_perceptron(
X: np.ndarray,
y: np.ndarray,
history: list[tuple[torch.Tensor, float]]
) -> None:
plt.scatter(X[y==0, 0], X[y==0, 1], c='red', label='Class 0')
plt.scatter(X[y==1, 0], X[y==1, 1], c='blue', label='Class 1')
x_vals: np.ndarray = np.linspace(X[:,0].min(), X[:,0].max(), 100)
for i, (w_, b_) in enumerate(history):
if abs(w_[1]) > 1e-4:
y_vals = (-w_[0].item() * x_vals - b_) / w_[1].item()
plt.plot(x_vals, y_vals, alpha=(i+1)/len(history), label=f'Epoch {i+1}' if i == len(history)-1 else None)
plt.xlabel('x1'); plt.ylabel('x2'); plt.legend(); plt.show()
plot_perceptron(X, y, history)
# EXERCISE 4: Non-separable case
np.random.seed(11)
X0_ns: np.ndarray = np.random.randn(N, 2) + np.array([1,1])
X1_ns: np.ndarray = np.random.randn(N, 2) + np.array([2,2])
X_ns: np.ndarray = np.vstack([X0_ns, X1_ns])
y_ns: np.ndarray = np.hstack([np.zeros(N), np.ones(N)])
X_t_ns: torch.Tensor = torch.tensor(X_ns, dtype=torch.float32)
y_t_ns: torch.Tensor = torch.tensor(y_ns, dtype=torch.float32)
w_ns: torch.Tensor = torch.zeros(2)
b_ns: float = 0.0
history_ns: list[tuple[torch.Tensor, float]] = []
for epoch in range(15):
for i in range(len(X_ns)):
w_ns, b_ns = perceptron_step(X_t_ns[i], y_t_ns[i].item(), w_ns, b_ns, lr=0.9)
history_ns.append((w_ns.clone(), b_ns))
plot_perceptron(X_ns, y_ns, history_ns)
You’ve brought the world’s first artificial neuron to life! Today, you:
Up next: We’ll move beyond perceptrons to networks of neurons—deep learning’s true power—and you’ll implement neural nets by hand and with PyTorch’s tools.
The foundation is set—now let’s go deep! See you in Part 3.2.