diff --git a/pocs/dbn_mnist_demo.py b/pocs/dbn_mnist_demo.py new file mode 100644 index 0000000..5f3020d --- /dev/null +++ b/pocs/dbn_mnist_demo.py @@ -0,0 +1,376 @@ +"""Deep Belief Network (DBN) Demonstration on MNIST Dataset + +This module implements a Deep Belief Network to learn hierarchical representations +of handwritten digits from the MNIST dataset. A DBN is a generative model composed +of multiple layers of Restricted Boltzmann Machines (RBMs) stacked together. + +Key Concepts: +------------ +1. Deep Belief Network (DBN): + - A deep learning architecture that learns to probabilistically reconstruct its inputs + - Composed of multiple RBM layers trained in a greedy layer-wise manner + - Each layer learns increasingly abstract features of the data + +2. Restricted Boltzmann Machine (RBM): + - A two-layer neural network that learns to reconstruct input data + - "Restricted" because there are no connections between nodes in the same layer + - Uses contrastive divergence for training (positive and negative phases) + +Experiment Overview: +------------------ +This experiment: +1. Loads MNIST handwritten digit data (28x28 pixel images) +2. Creates a 3-layer DBN with dimensions: 784 -> 256 -> 64 + - 784: Input layer (28x28 flattened pixels) + - 256: First hidden layer for low-level features + - 64: Second hidden layer for higher-level abstractions + +3. Generates comprehensive visualizations: + - Input reconstructions: Shows how well the model recreates input images + - Weight matrices: Visualizes learned features at each layer + - Activation patterns: Shows how different inputs activate network nodes + - Training metrics: Tracks reconstruction error over time + +Training Process: +--------------- +1. Layer-wise pretraining: + - Each RBM layer is trained independently + - Lower layers learn simple features (edges, corners) + - Higher layers learn complex feature combinations + +2. For each layer: + - Forward pass: Compute hidden unit activations + - Reconstruction: Generate visible unit reconstructions + - Update weights using contrastive divergence + - Track reconstruction error and visualize progress + +Output Structure: +--------------- +The experiment creates timestamped output directories containing: +- Reconstruction visualizations +- Weight matrix patterns +- Activation heatmaps +- Training metrics +- Configuration details + +Usage: +----- +Run this script directly to train the DBN and generate visualizations: + python dbn_mnist_demo.py + +Requirements: +----------- +- NumPy: Numerical computations +- Matplotlib: Visualization +- Scikit-learn: MNIST dataset loading +- Seaborn: Enhanced visualizations +- tqdm: Progress tracking +""" + +import numpy as np +import matplotlib.pyplot as plt +from sklearn.datasets import fetch_openml +from sklearn.preprocessing import MinMaxScaler +from typing import List, Tuple, Optional +import seaborn as sns +from tqdm import tqdm +import os +from datetime import datetime + + +class RBM: + """Enhanced Restricted Boltzmann Machine with visualization capabilities. + + An RBM is a two-layer neural network that learns to reconstruct input data through + unsupervised learning. It consists of: + - Visible layer: Represents the input data + - Hidden layer: Learns features from the input + - Weights: Bidirectional connections between layers + + The learning process involves: + 1. Positive phase: Computing hidden activations from input + 2. Negative phase: Reconstructing input from hidden activations + 3. Weight updates: Minimizing reconstruction error + + Args: + n_visible (int): Number of visible units (input dimensions) + n_hidden (int): Number of hidden units (learned features) + learning_rate (float): Learning rate for weight updates + """ + + def __init__(self, n_visible: int, n_hidden: int, learning_rate: float = 0.01): + self.weights = np.random.normal(0, 0.01, (n_visible, n_hidden)) + self.visible_bias = np.zeros(n_visible) + self.hidden_bias = np.zeros(n_hidden) + self.learning_rate = learning_rate + self.training_losses = [] + + def sigmoid(self, x: np.ndarray) -> np.ndarray: + return 1 / (1 + np.exp(-np.clip(x, -100, 100))) + + def free_energy(self, v: np.ndarray) -> float: + """Calculate the free energy of a visible vector.""" + wx_b = np.dot(v, self.weights) + self.hidden_bias + hidden_term = np.sum(np.log(1 + np.exp(wx_b))) + vbias_term = np.dot(v, self.visible_bias) + return -hidden_term - vbias_term + + def reconstruct(self, v: np.ndarray) -> np.ndarray: + """Reconstruct visible units through one hidden layer and back.""" + h_prob, _ = self.sample_hidden(v) + v_prob, _ = self.sample_visible(h_prob) + return v_prob + + def sample_hidden(self, visible: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Sample hidden units given visible units.""" + hidden_probs = self.sigmoid(np.dot(visible, self.weights) + self.hidden_bias) + hidden_states = (hidden_probs > np.random.random(hidden_probs.shape)).astype( + float + ) + return hidden_probs, hidden_states + + def sample_visible(self, hidden: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Sample visible units given hidden units.""" + visible_probs = self.sigmoid(np.dot(hidden, self.weights.T) + self.visible_bias) + visible_states = (visible_probs > np.random.random(visible_probs.shape)).astype( + float + ) + return visible_probs, visible_states + + +class EnhancedDBN: + """Enhanced Deep Belief Network with visualization and analysis capabilities. + + A DBN is created by stacking multiple RBMs, where each layer learns to represent + features of increasing abstraction. This implementation includes: + - Layer-wise pretraining + - Comprehensive visualization tools + - Progress tracking and metrics + - Organized output management + + The network architecture is specified through layer_sizes, where: + - First element is input dimension + - Last element is final hidden layer size + - Intermediate elements define hidden layer sizes + + Args: + layer_sizes (List[int]): Dimensions of each layer + learning_rate (float): Learning rate for all RBM layers + """ + + def __init__(self, layer_sizes: List[int], learning_rate: float = 0.01): + """Initialize DBN with output directory creation.""" + self.rbm_layers = [] + self.layer_sizes = layer_sizes + self.rbm_layers.extend( + RBM(layer_sizes[i], layer_sizes[i + 1], learning_rate) + for i in range(len(layer_sizes) - 1) + ) + # Create output directory with timestamp + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + self.output_dir = os.path.join("output", f"dbn_run_{timestamp}") + os.makedirs(self.output_dir, exist_ok=True) + + # Create subdirectories for different visualization types + self.viz_dirs = { + "reconstructions": os.path.join(self.output_dir, "reconstructions"), + "weights": os.path.join(self.output_dir, "weights"), + "activations": os.path.join(self.output_dir, "activations"), + "metrics": os.path.join(self.output_dir, "metrics"), + } + + for dir_path in self.viz_dirs.values(): + os.makedirs(dir_path, exist_ok=True) + + def pretrain( + self, + data: np.ndarray, + epochs: int = 10, + batch_size: int = 32, + visualize: bool = True, + ) -> None: + """Enhanced pretraining with visualization and monitoring.""" + current_input = data + + for layer_idx, rbm in enumerate(self.rbm_layers): + print(f"\nPretraining layer {layer_idx + 1}") + + for epoch in tqdm(range(epochs), desc=f"Layer {layer_idx + 1}"): + reconstruction_errors = [] + + # Mini-batch training with progress tracking + for batch_start in range(0, len(data), batch_size): + batch = current_input[batch_start : batch_start + batch_size] + reconstruction_error = self._train_batch(rbm, batch) + reconstruction_errors.append(reconstruction_error) + + avg_error = np.mean(reconstruction_errors) + rbm.training_losses.append(avg_error) + + if epoch % 2 == 0 and visualize: + self._visualize_training(rbm, layer_idx, epoch, batch) + + # Transform data for next layer + current_input, _ = rbm.sample_hidden(current_input) + + def _train_batch(self, rbm: RBM, batch: np.ndarray) -> float: + """Train RBM on a single batch and return reconstruction error.""" + # Positive phase + pos_hidden_probs, pos_hidden_states = rbm.sample_hidden(batch) + pos_associations = np.dot(batch.T, pos_hidden_probs) + + # Negative phase + neg_visible_probs, _ = rbm.sample_visible(pos_hidden_states) + neg_hidden_probs, _ = rbm.sample_hidden(neg_visible_probs) + neg_associations = np.dot(neg_visible_probs.T, neg_hidden_probs) + + # Update weights and biases + rbm.weights += rbm.learning_rate * ( + (pos_associations - neg_associations) / len(batch) + ) + rbm.visible_bias += rbm.learning_rate * np.mean( + batch - neg_visible_probs, axis=0 + ) + rbm.hidden_bias += rbm.learning_rate * np.mean( + pos_hidden_probs - neg_hidden_probs, axis=0 + ) + + return np.mean((batch - neg_visible_probs) ** 2) + + def _visualize_training( + self, rbm: RBM, layer_idx: int, epoch: int, sample_batch: np.ndarray + ) -> None: + """Visualize training progress with multiple plots.""" + + # Only show image reconstructions for the first layer + if layer_idx == 0: + plt.figure(figsize=(15, 5)) + + # Plot 1: Sample reconstructions + n_samples = 5 + samples = sample_batch[:n_samples] + reconstructed = rbm.reconstruct(samples) + + for i in range(n_samples): + plt.subplot(n_samples, 2, 2 * i + 1) + plt.imshow(samples[i].reshape(28, 28), cmap="gray") + plt.axis("off") + if i == 0: + plt.title("Original") + + plt.subplot(n_samples, 2, 2 * i + 2) + plt.imshow(reconstructed[i].reshape(28, 28), cmap="gray") + plt.axis("off") + if i == 0: + plt.title("Reconstructed") + + self.save_visualization("reconstructions", layer_idx, epoch, "_reconstruction.png") + # For all layers, show weight patterns + plt.figure(figsize=(10, 10)) + n_hidden = min(100, rbm.weights.shape[1]) + # Only show weights as images for first layer + if layer_idx == 0: + n_grid = int(np.ceil(np.sqrt(n_hidden))) + + for i in range(n_hidden): + plt.subplot(n_grid, n_grid, i + 1) + plt.imshow(rbm.weights[:, i].reshape(28, 28), cmap="gray") + plt.axis("off") + else: + # For higher layers, show weights as heatmaps + plt.subplot(1, 1, 1) + sns.heatmap(rbm.weights, cmap="viridis", center=0) + plt.title(f"Layer {layer_idx + 1} Weight Matrix") + + plt.suptitle(f"Layer {layer_idx + 1} Features (Epoch {epoch})") + self.save_visualization("weights", layer_idx, epoch, "_weights.png") + # Add activation patterns visualization + plt.figure(figsize=(8, 4)) + plt.subplot(1, 1, 1) + sns.heatmap(sample_batch[:10], cmap="viridis") + plt.title(f"Layer {layer_idx + 1} Activation Patterns") + self.save_visualization("activations", layer_idx, epoch, "_activations.png") + # Save training metrics + if hasattr(rbm, "training_losses"): + plt.figure(figsize=(8, 4)) + plt.plot(rbm.training_losses) + plt.title(f"Layer {layer_idx + 1} Training Loss") + plt.xlabel("Epoch") + plt.ylabel("Reconstruction Error") + self.save_visualization("metrics", layer_idx, epoch, "_training_loss.png") + plt.close() + + def save_visualization( + self, viz_type: str, layer_idx: int, epoch: int, file_suffix: str + ) -> str: + plt.tight_layout() + result = os.path.join( + self.viz_dirs[viz_type], f"layer{layer_idx}_epoch{epoch}{file_suffix}" + ) + plt.savefig(result) + plt.close() + + return result + + +def load_mnist(n_samples: int = 10000) -> np.ndarray: + """Load and preprocess MNIST dataset.""" + print("Loading MNIST dataset...") + X, _ = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False) + X = X[:n_samples] + scaler = MinMaxScaler() + return scaler.fit_transform(X) + + +def analyze_representations(dbn: EnhancedDBN, data: np.ndarray) -> None: + """Analyze and visualize the learned representations.""" + # Get activations for each layer + activations = [] + current_input = data + + for rbm in dbn.rbm_layers: + hidden_probs, _ = rbm.sample_hidden(current_input) + activations.append(hidden_probs) + current_input = hidden_probs + + # Visualize activation patterns + plt.figure(figsize=(15, 5)) + for i, activation in enumerate(activations): + plt.subplot(1, len(activations), i + 1) + plt.title(f"Layer {i + 1} Activations") + sns.heatmap(activation[:10].T, cmap="viridis") + plt.tight_layout() + plt.savefig("layer_activations.png") + plt.close() + + +def main(): + """Run DBN training with organized output.""" + # Load and prepare MNIST data + data = load_mnist() + + # Create and train DBN + dbn = EnhancedDBN([784, 256, 64], learning_rate=0.01) + + # Save configuration + config = { + "layer_sizes": dbn.layer_sizes, + "learning_rate": 0.01, + "epochs": 10, + "batch_size": 32, + "timestamp": datetime.now().isoformat(), + } + + with open(os.path.join(dbn.output_dir, "config.txt"), "w") as f: + for key, value in config.items(): + f.write(f"{key}: {value}\n") + + # Train and generate visualizations + dbn.pretrain(data, epochs=10, batch_size=32) + + # Analyze learned representations + analyze_representations(dbn, data) + + +if __name__ == "__main__": + main() diff --git a/pocs/deep_belief_network.py b/pocs/deep_belief_network.py new file mode 100644 index 0000000..43458bb --- /dev/null +++ b/pocs/deep_belief_network.py @@ -0,0 +1,202 @@ +import numpy as np +from typing import List, Optional + +"""Deep Belief Network (DBN) Implementation and Experimentation + +This module implements a Deep Belief Network, which is a generative probabilistic model +composed of multiple layers of Restricted Boltzmann Machines (RBMs). DBNs are particularly +effective for unsupervised feature learning and dimensionality reduction. + +Key Concepts: +- DBNs learn to probabilistically reconstruct their inputs through multiple layers +- Training occurs layer by layer (greedy layer-wise training) +- Each layer captures increasingly abstract features of the data +- The network learns a joint probability distribution over visible and hidden units + +Architecture: +- Multiple RBM layers stacked together +- Each RBM learns to encode its input layer into a hidden representation +- Bottom layers capture low-level features (e.g., edges in images) +- Higher layers capture increasingly abstract concepts + +Training Process: +1. Train first RBM on raw input data +2. Use first RBM's hidden layer activations as input for second RBM +3. Repeat for all subsequent layers +4. This greedy layer-wise training builds increasingly abstract representations + +Typical Applications: +- Dimensionality reduction +- Feature learning +- Image recognition +- Pattern recognition +- Anomaly detection + +Example Usage: + # For MNIST-like data (28x28 pixel images = 784 dimensions) + dbn = DBN([784, 256, 64]) # Reducing dimensionality: 784 -> 256 -> 64 + dbn.pretrain(data, epochs=5) # Unsupervised pretraining +""" + +class RBM: + """Restricted Boltzmann Machine implementation. + + An RBM is a two-layer neural network that learns a probability distribution + over its inputs. It consists of: + - A visible layer representing the input data + - A hidden layer learning features from the input + - Bidirectional connections between layers (weights) + - No connections within each layer (hence "restricted") + + The learning process involves: + 1. Forward pass (visible to hidden) - encode input + 2. Backward pass (hidden to visible) - reconstruct input + 3. Update weights to minimize reconstruction error + + Key Properties: + - Stochastic binary units (neurons) + - Symmetric connections between layers + - No connections within layers + - Uses contrastive divergence for training + """ + + def __init__(self, n_visible: int, n_hidden: int, learning_rate: float = 0.1): + """Initialize RBM parameters. + + Args: + n_visible: Number of visible units + n_hidden: Number of hidden units + learning_rate: Learning rate for weight updates + """ + self.weights = np.random.normal(0, 0.1, (n_visible, n_hidden)) + self.visible_bias = np.zeros(n_visible) + self.hidden_bias = np.zeros(n_hidden) + self.learning_rate = learning_rate + + def sigmoid(self, x: np.ndarray) -> np.ndarray: + """Compute sigmoid activation.""" + return 1 / (1 + np.exp(-x)) + + def sample_hidden(self, visible: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + """Sample hidden units given visible units.""" + hidden_probs = self.sigmoid(np.dot(visible, self.weights) + self.hidden_bias) + hidden_states = (hidden_probs > np.random.random(hidden_probs.shape)).astype(float) + return hidden_probs, hidden_states + + def sample_visible(self, hidden: np.ndarray) -> tuple[np.ndarray, np.ndarray]: + """Sample visible units given hidden units.""" + visible_probs = self.sigmoid(np.dot(hidden, self.weights.T) + self.visible_bias) + visible_states = (visible_probs > np.random.random(visible_probs.shape)).astype(float) + return visible_probs, visible_states + +class DBN: + """Deep Belief Network implementation. + + A DBN is a stack of RBMs trained layer by layer from bottom to top. + Each layer learns to represent features of increasing abstraction. + + Training Process: + 1. Train first RBM on raw input + 2. Fix its weights and generate hidden layer activations + 3. Use these activations as training data for next RBM + 4. Repeat for all layers + + Architecture Benefits: + - Unsupervised feature learning + - Hierarchical representation learning + - Effective initialization for deep networks + - Handles unlabeled data well + + Common Applications: + - Dimensionality reduction + - Feature extraction + - Transfer learning + - Initialization for deep neural networks + """ + + def __init__(self, layer_sizes: List[int], learning_rate: float = 0.1): + """Initialize DBN with specified layer sizes. + + Args: + layer_sizes: List of integers specifying size of each layer + learning_rate: Learning rate for RBM training + """ + self.rbm_layers = [] + for i in range(len(layer_sizes) - 1): + self.rbm_layers.append( + RBM(layer_sizes[i], layer_sizes[i + 1], learning_rate) + ) + + def pretrain(self, data: np.ndarray, epochs: int = 10, batch_size: int = 32) -> None: + """Greedy layer-wise pretraining of the DBN. + + Args: + data: Training data + epochs: Number of training epochs + batch_size: Size of mini-batches + """ + current_input = data + + # Train each RBM layer + for i, rbm in enumerate(self.rbm_layers): + print(f"Pretraining layer {i+1}...") + + for epoch in range(epochs): + reconstruction_error = 0 + + # Mini-batch training + for batch_start in range(0, len(data), batch_size): + batch = current_input[batch_start:batch_start + batch_size] + + # Positive phase + pos_hidden_probs, pos_hidden_states = rbm.sample_hidden(batch) + pos_associations = np.dot(batch.T, pos_hidden_probs) + + # Negative phase + neg_visible_probs, neg_visible_states = rbm.sample_visible(pos_hidden_states) + neg_hidden_probs, neg_hidden_states = rbm.sample_hidden(neg_visible_states) + neg_associations = np.dot(neg_visible_states.T, neg_hidden_probs) + + # Update weights and biases + rbm.weights += rbm.learning_rate * ( + (pos_associations - neg_associations) / batch_size + ) + rbm.visible_bias += rbm.learning_rate * np.mean( + batch - neg_visible_states, axis=0 + ) + rbm.hidden_bias += rbm.learning_rate * np.mean( + pos_hidden_probs - neg_hidden_probs, axis=0 + ) + + reconstruction_error += np.mean((batch - neg_visible_states) ** 2) + + print(f"Epoch {epoch+1}, Reconstruction error: {reconstruction_error}") + + # Transform data for next layer + current_input, _ = rbm.sample_hidden(current_input) + +def main(): + """Demonstration experiment with a Deep Belief Network. + + This experiment: + 1. Generates synthetic binary data (simulating MNIST-like dimensions) + 2. Creates a DBN with progressive dimension reduction (784 -> 256 -> 64) + 3. Performs unsupervised pretraining to learn hierarchical features + + The architecture (784 -> 256 -> 64) demonstrates: + - Input layer (784): Matches MNIST image dimensions (28x28 pixels) + - Hidden layer 1 (256): Learns low-level features (edges, corners) + - Hidden layer 2 (64): Learns high-level abstract features + + This progressive reduction in dimensionality forces the network to learn + increasingly compact and abstract representations of the input data. + """ + # Generate some dummy data + data = np.random.binomial(1, 0.5, (1000, 784)) # Example: MNIST-like dimensions + + # Create and train DBN + dbn = DBN([784, 256, 64]) # 784 -> 256 -> 64 architecture + dbn.pretrain(data, epochs=5) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/maml_model_agnostic_meta_learning.py b/pocs/maml_model_agnostic_meta_learning.py similarity index 100% rename from src/maml_model_agnostic_meta_learning.py rename to pocs/maml_model_agnostic_meta_learning.py diff --git a/pocs/poc_hopfield_memory.py b/pocs/poc_hopfield_memory.py new file mode 100644 index 0000000..b5f52e8 --- /dev/null +++ b/pocs/poc_hopfield_memory.py @@ -0,0 +1,327 @@ +"""Hopfield Network Pattern Recognition and Memory Demonstration. + +This module implements a Hopfield Network to demonstrate associative memory and pattern completion, +showing how neural networks can store and recover patterns even in the presence of noise. + +Experiment Overview: +------------------- +The experiment demonstrates a simple pattern recognition system using a Hopfield Network +to store and retrieve 5x5 pixel representations of letters. It showcases three key +capabilities of associative memory: + +1. Pattern Storage: + - Two letter patterns ('H' and 'T') are stored in the network + - Storage uses Hebbian learning to create a weight matrix + - Each pattern becomes an attractor in the network's state space + +2. Pattern Corruption: + - The letter 'H' pattern is corrupted with random noise + - 8 random pixels are flipped from their original state + - This simulates real-world noise or partial information + +3. Pattern Recovery: + - The network processes the noisy pattern + - Through iterative updates, it converges to the nearest stored pattern + - Demonstrates the network's ability to perform error correction + +Key Concepts Demonstrated: +------------------------- +- Associative Memory: Patterns are recovered by association, not address +- Attractor Dynamics: Network converges to stable states (stored patterns) +- Error Correction: Ability to clean up noisy or corrupted inputs +- Content-Addressable Memory: Retrieval based on partial or similar content + +Technical Implementation: +----------------------- +- Uses binary threshold neurons (-1/1 states) +- Implements asynchronous updates +- Demonstrates both classical Hopfield dynamics and modern attention-like mechanisms +- Visualizes the process through matplotlib plots + +The experiment shows how simple neural architectures can exhibit complex +cognitive-like behaviors such as pattern completion and error correction, +fundamental properties of biological memory systems. + +Example Usage: +------------- + python poc_hopfield_memory.py + +This will run the demonstration and display three plots: +1. Original 'H' pattern +2. Noisy version of the pattern +3. Recovered pattern after network processing + +Historical Context: +------------------ +Hopfield Networks, introduced by John Hopfield in 1982, were among the first +neural architectures to demonstrate how distributed representations could serve +as content-addressable memory. Their principles influenced modern deep learning +and attention mechanisms. + +References: +----------- +Hopfield, J. J. (1982). Neural networks and physical systems with emergent +collective computational abilities. Proceedings of the National Academy of +Sciences, 79(8), 2554-2558. +""" + +import numpy as np +import matplotlib.pyplot as plt +from typing import List, Tuple +import random + + +class HopfieldNetwork: + """A Hopfield Network implementation for pattern storage and retrieval. + + A Hopfield network is a form of recurrent artificial neural network that serves as a content-addressable memory system. + Key characteristics: + - Symmetric weights (w_ij = w_ji) + - Binary threshold neurons (-1 or 1) + - No self-connections (diagonal weights = 0) + - Asynchronous updates + + The network can: + 1. Store patterns through Hebbian learning + 2. Recover patterns from noisy or partial inputs + 3. Converge to stable states (attractors) + + Theoretical capacity (number of patterns) ≈ 0.15N, where N is network size. + + Based on Hopfield (1982) - Neural networks and physical systems with + emergent collective computational abilities. + """ + + def __init__(self, size: int): + """Initialize the Hopfield Network. + + Args: + size: Number of neurons in the network + """ + self.size = size + self.weights = np.zeros((size, size)) + + def train(self, patterns: List[np.ndarray]) -> None: + """Store patterns in the network using Hebbian learning. + + The training process: + 1. For each pattern, compute outer product of pattern with itself + 2. Sum these products to create weight matrix + 3. Zero diagonal to prevent self-connections + 4. Normalize by number of patterns + + Hebbian Rule: "Neurons that fire together, wire together" + w_ij += x_i * x_j where x_i, x_j are neuron states + + Args: + patterns: List of binary patterns to store (each element should be -1 or 1) + """ + for pattern in patterns: + # Hebbian learning rule: strengthen connections between co-active neurons + pattern = pattern.reshape(-1, 1) + self.weights += np.outer(pattern, pattern) + + # Zero out diagonal (no self-connections) and normalize + np.fill_diagonal(self.weights, 0) + self.weights /= len(patterns) + + def update(self, state: np.ndarray, max_iterations: int = 100) -> np.ndarray: + """Update network state until convergence or max iterations reached. + + The update process: + 1. Randomly select neurons for asynchronous update + 2. For each neuron: + - Calculate local field (weighted sum of inputs) + - Update state based on sign of local field + 3. Continue until convergence or max iterations + + Energy function: E = -1/2 ∑_ij w_ij s_i s_j + Network always evolves toward local energy minima. + + Args: + state: Initial state of the network (-1/1 values) + max_iterations: Maximum number of iterations to run + + Returns: + Final state of the network (a stored memory pattern or local minimum) + """ + prev_state = state.copy() + + for _ in range(max_iterations): + # Asynchronous update: update neurons in random order + update_order = list(range(self.size)) + random.shuffle(update_order) + + for i in update_order: + # Calculate local field + h = np.dot(self.weights[i], state) + # Update neuron state using sign activation function + state[i] = 1 if h >= 0 else -1 + + # Check for convergence + if np.array_equal(state, prev_state): + break + prev_state = state.copy() + + return state + + def update_modern(self, state: np.ndarray, beta: float = 1.0) -> np.ndarray: + """Modern continuous Hopfield update with attention-like mechanism. + + Args: + state: Input state + beta: Temperature parameter (controls softmax sharpness) + """ + # Compute attention-like scores + scores = np.exp(beta * np.dot(self.weights, state)) + # Softmax normalization + return scores / np.sum(scores) + + +def create_letter_patterns() -> List[np.ndarray]: + """Create binary patterns for letters 'H' and 'T'. + + Patterns are represented as 5x5 grids flattened to 25-element vectors. + Values are binary (-1 for black, 1 for white). + + Returns: + List containing two patterns: [H_pattern, T_pattern] + """ + H = np.array( + [ + 1, + -1, + -1, + -1, + 1, + 1, + -1, + -1, + -1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + -1, + -1, + -1, + 1, + 1, + -1, + -1, + -1, + 1, + ] + ) + + T = np.array( + [ + 1, + 1, + 1, + 1, + 1, + -1, + -1, + 1, + -1, + -1, + -1, + -1, + 1, + -1, + -1, + -1, + -1, + 1, + -1, + -1, + -1, + -1, + 1, + -1, + -1, + ] + ) + + return [H, T] + + +def plot_pattern(pattern: np.ndarray, title: str) -> None: + """Visualize a pattern as a 5x5 grid.""" + plt.imshow(pattern.reshape(5, 5), cmap="binary") + plt.title(title) + plt.axis("off") + + +def compare_mechanisms(): + """Compare traditional Hopfield dynamics with modern attention mechanisms. + + Traditional Hopfield: + - Uses discrete states (-1/1) + - Binary threshold activation + - Energy minimization dynamics + + Modern Attention (Transformer-like): + - Continuous states + - Softmax activation + - Query-Key-Value computation + + Both approaches implement associative memory through + different mathematical frameworks. + """ + + +def main(): + """Run Hopfield Network demonstration. + + This experiment demonstrates: + 1. Pattern Storage: Training network on 'H' and 'T' patterns + 2. Pattern Completion: Recovering full pattern from noisy input + 3. Attractor Dynamics: Network converges to stored memory + + The visualization shows: + - Original clean pattern + - Noisy pattern (corrupted with random flips) + - Recovered pattern after network convergence + + This illustrates the network's ability to perform: + - Content-addressable memory + - Pattern completion + - Error correction + """ + # Create and train network + patterns = create_letter_patterns() + network = HopfieldNetwork(25) + network.train(patterns) + + # Create noisy version of 'H' + noisy_pattern = patterns[0].copy() + noise_positions = random.sample(range(25), 8) # Add noise to 8 positions + for pos in noise_positions: + noisy_pattern[pos] *= -1 + + # Recover pattern + recovered_pattern = network.update(noisy_pattern.copy()) + + # Visualize results + plt.figure(figsize=(15, 5)) + + plt.subplot(131) + plot_pattern(patterns[0], "Original Pattern 'H'") + + plt.subplot(132) + plot_pattern(noisy_pattern, "Noisy Pattern") + + plt.subplot(133) + plot_pattern(recovered_pattern, "Recovered Pattern") + + plt.tight_layout() + plt.show() + + +if __name__ == "__main__": + main()