peczenyj · May 23, 2026 06:21
diff --git a/neuron.py b/neuron.py
 """
 Train 3 neurons to compute XOR, from scratch, in pure NumPy.

 Architecture (the one from our diagram):
    2 inputs  ->  hidden layer of 2 neurons (A, B)  ->  1 output neuron (C)
    9 parameters total: each neuron has 2 weights + 1 bias.

 Everything is visible: the forward pass, the loss, the gradients, and the
 weight update. No PyTorch, nothing hidden. Run with:  python xor_train.py
 """

 import numpy as np

 np.random.seed(1)  # reproducible; change/remove to see different random starts

 # ---------------------------------------------------------------------------
 # 1) The data: XOR truth table.
 #    X has 4 examples, each with 2 input bits. y is the desired output bit.
 # ---------------------------------------------------------------------------
 X = np.array([[0, 0],
              [0, 1],
              [1, 0],
              [1, 1]], dtype=float)
 y = np.array([[0],
              [1],
              [1],
              [0]], dtype=float)   # 0^0=0, 0^1=1, 1^0=1, 1^1=0

 # ---------------------------------------------------------------------------
 # 2) The activation function (the "squashing" we discussed) and its slope.
 #    sigmoid maps any number into (0,1). Its derivative is needed for learning.
 # ---------------------------------------------------------------------------
 def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

 def sigmoid_deriv(a):           # given a = sigmoid(z), slope = a*(1-a)
    return a * (1.0 - a)

 # ---------------------------------------------------------------------------
 # 3) The parameters = all the w's and b's. Start them RANDOM (network is dumb).
 #    W1: weights from 2 inputs into 2 hidden neurons  -> shape (2 inputs, 2 neurons)
 #    b1: one bias per hidden neuron                   -> shape (1, 2)
 #    W2: weights from 2 hidden neurons into 1 output  -> shape (2, 1)
 #    b2: one bias for the output neuron               -> shape (1, 1)
 #    Count: 4 + 2 + 2 + 1 = 9 parameters, exactly as in the diagram.
 # ---------------------------------------------------------------------------
 W1 = np.random.randn(2, 2)
 b1 = np.zeros((1, 2))
 W2 = np.random.randn(2, 1)
 b2 = np.zeros((1, 1))

 lr = 1.0          # learning rate: how big a nudge per step
 epochs = 20000    # how many times we loop over the data

 # ---------------------------------------------------------------------------
 # 4) The training loop. Each pass: forward (predict) -> measure error ->
 #    backward (find which way to nudge each parameter) -> update.
 # ---------------------------------------------------------------------------
 for epoch in range(epochs):
    # ----- FORWARD PASS: run the 4 examples through the network -----
    z1 = X @ W1 + b1          # weighted sums into hidden neurons A and B
    a1 = sigmoid(z1)          # hidden activations (outputs of A and B)
    z2 = a1 @ W2 + b2         # weighted sum into output neuron C
    a2 = sigmoid(z2)          # final prediction, one number per example

    # ----- LOSS: mean squared error between prediction and target -----
    loss = np.mean((a2 - y) ** 2)

    # ----- BACKWARD PASS: gradients (which direction reduces the loss) -----
    # This is the chain rule applied layer by layer ("backpropagation").
    d_a2 = (a2 - y)                       # how wrong the output is
    d_z2 = d_a2 * sigmoid_deriv(a2)       # push error through C's activation
    d_W2 = a1.T @ d_z2                    # gradient for output weights
    d_b2 = np.sum(d_z2, axis=0, keepdims=True)

    d_a1 = d_z2 @ W2.T                    # send error back to hidden layer
    d_z1 = d_a1 * sigmoid_deriv(a1)       # push through A's and B's activations
    d_W1 = X.T @ d_z1                     # gradient for hidden weights
    d_b1 = np.sum(d_z1, axis=0, keepdims=True)

    # ----- UPDATE: nudge every parameter a little, opposite the gradient -----
    W2 -= lr * d_W2
    b2 -= lr * d_b2
    W1 -= lr * d_W1
    b1 -= lr * d_b1

    # ----- progress report -----
    if epoch % 2000 == 0:
        print(f"epoch {epoch:5d}   loss = {loss:.5f}")

 # ---------------------------------------------------------------------------
 # 5) Results: the discovered parameters and a truth-table check.
 # ---------------------------------------------------------------------------
 print("\n--- Discovered parameters (the 9 numbers training found) ---")
 print("Hidden neuron A:  w =", np.round(W1[:, 0], 2), " b =", round(float(b1[0, 0]), 2))
 print("Hidden neuron B:  w =", np.round(W1[:, 1], 2), " b =", round(float(b1[0, 1]), 2))
 print("Output neuron C:  w =", np.round(W2[:, 0], 2), " b =", round(float(b2[0, 0]), 2))

 print("\n--- Truth table check ---")
 z1 = X @ W1 + b1; a1 = sigmoid(z1)
 a2 = sigmoid(a1 @ W2 + b2)
 for i in range(4):
    raw = float(a2[i, 0])
    print(f"  input {X[i].astype(int)}  ->  {raw:.3f}  ->  rounded {round(raw)}   (want {int(y[i,0])})")
	"""
	Train 3 neurons to compute XOR, from scratch, in pure NumPy.

	Architecture (the one from our diagram):
	2 inputs -> hidden layer of 2 neurons (A, B) -> 1 output neuron (C)
	9 parameters total: each neuron has 2 weights + 1 bias.

	Everything is visible: the forward pass, the loss, the gradients, and the
	weight update. No PyTorch, nothing hidden. Run with: python xor_train.py
	"""

	import numpy as np

	np.random.seed(1) # reproducible; change/remove to see different random starts

	# ---------------------------------------------------------------------------
	# 1) The data: XOR truth table.
	# X has 4 examples, each with 2 input bits. y is the desired output bit.
	# ---------------------------------------------------------------------------
	X = np.array([[0, 0],
	[0, 1],
	[1, 0],
	[1, 1]], dtype=float)
	y = np.array([[0],
	[1],
	[1],
	[0]], dtype=float) # 0^0=0, 0^1=1, 1^0=1, 1^1=0

	# ---------------------------------------------------------------------------
	# 2) The activation function (the "squashing" we discussed) and its slope.
	# sigmoid maps any number into (0,1). Its derivative is needed for learning.
	# ---------------------------------------------------------------------------
	def sigmoid(z):
	return 1.0 / (1.0 + np.exp(-z))

	def sigmoid_deriv(a): # given a = sigmoid(z), slope = a*(1-a)
	return a * (1.0 - a)

	# ---------------------------------------------------------------------------
	# 3) The parameters = all the w's and b's. Start them RANDOM (network is dumb).
	# W1: weights from 2 inputs into 2 hidden neurons -> shape (2 inputs, 2 neurons)
	# b1: one bias per hidden neuron -> shape (1, 2)
	# W2: weights from 2 hidden neurons into 1 output -> shape (2, 1)
	# b2: one bias for the output neuron -> shape (1, 1)
	# Count: 4 + 2 + 2 + 1 = 9 parameters, exactly as in the diagram.
	# ---------------------------------------------------------------------------
	W1 = np.random.randn(2, 2)
	b1 = np.zeros((1, 2))
	W2 = np.random.randn(2, 1)
	b2 = np.zeros((1, 1))

	lr = 1.0 # learning rate: how big a nudge per step
	epochs = 20000 # how many times we loop over the data

	# ---------------------------------------------------------------------------
	# 4) The training loop. Each pass: forward (predict) -> measure error ->
	# backward (find which way to nudge each parameter) -> update.
	# ---------------------------------------------------------------------------
	for epoch in range(epochs):
	# ----- FORWARD PASS: run the 4 examples through the network -----
	z1 = X @ W1 + b1 # weighted sums into hidden neurons A and B
	a1 = sigmoid(z1) # hidden activations (outputs of A and B)
	z2 = a1 @ W2 + b2 # weighted sum into output neuron C
	a2 = sigmoid(z2) # final prediction, one number per example

	# ----- LOSS: mean squared error between prediction and target -----
	loss = np.mean((a2 - y) ** 2)

	# ----- BACKWARD PASS: gradients (which direction reduces the loss) -----
	# This is the chain rule applied layer by layer ("backpropagation").
	d_a2 = (a2 - y) # how wrong the output is
	d_z2 = d_a2 * sigmoid_deriv(a2) # push error through C's activation
	d_W2 = a1.T @ d_z2 # gradient for output weights
	d_b2 = np.sum(d_z2, axis=0, keepdims=True)

	d_a1 = d_z2 @ W2.T # send error back to hidden layer
	d_z1 = d_a1 * sigmoid_deriv(a1) # push through A's and B's activations
	d_W1 = X.T @ d_z1 # gradient for hidden weights
	d_b1 = np.sum(d_z1, axis=0, keepdims=True)

	# ----- UPDATE: nudge every parameter a little, opposite the gradient -----
	W2 -= lr * d_W2
	b2 -= lr * d_b2
	W1 -= lr * d_W1
	b1 -= lr * d_b1

	# ----- progress report -----
	if epoch % 2000 == 0:
	print(f"epoch {epoch:5d} loss = {loss:.5f}")

	# ---------------------------------------------------------------------------
	# 5) Results: the discovered parameters and a truth-table check.
	# ---------------------------------------------------------------------------
	print("\n--- Discovered parameters (the 9 numbers training found) ---")
	print("Hidden neuron A: w =", np.round(W1[:, 0], 2), " b =", round(float(b1[0, 0]), 2))
	print("Hidden neuron B: w =", np.round(W1[:, 1], 2), " b =", round(float(b1[0, 1]), 2))
	print("Output neuron C: w =", np.round(W2[:, 0], 2), " b =", round(float(b2[0, 0]), 2))

	print("\n--- Truth table check ---")
	z1 = X @ W1 + b1; a1 = sigmoid(z1)
	a2 = sigmoid(a1 @ W2 + b2)
	for i in range(4):
	raw = float(a2[i, 0])
	print(f" input {X[i].astype(int)} -> {raw:.3f} -> rounded {round(raw)} (want {int(y[i,0])})")
No results found