Fundamentals

Neural Network Basics

# Basic neural network layer
layer = Dense(units=64, activation='relu')

# Forward pass
output = layer(input)

# Loss function
loss = tf.keras.losses.BinaryCrossentropy()

# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
Note: Neural networks consist of layers of neurons that transform input data through weighted connections and activation functions.

Activation Functions

# Sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# ReLU (Rectified Linear Unit)
def relu(x):
    return np.maximum(0, x)

# Softmax (for multi-class classification)
def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / np.sum(exp_x, axis=0)
Note: Activation functions introduce non-linearity, allowing neural networks to learn complex patterns.

Architectures

Convolutional Neural Networks (CNN)

# Basic CNN architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
            loss='sparse_categorical_crossentropy',
            metrics=['accuracy'])
Note: CNNs are particularly effective for image processing tasks due to their ability to capture spatial hierarchies.

Recurrent Neural Networks (RNN)

# Simple RNN
model = Sequential([
    SimpleRNN(50, return_sequences=True, input_shape=(None, 1)),
    SimpleRNN(50, return_sequences=True),
    SimpleRNN(50),
    Dense(1)
])

# LSTM (Long Short-Term Memory)
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(None, 1)),
    LSTM(50, return_sequences=True),
    LSTM(50),
    Dense(1)
])
Note: RNNs are designed for sequential data, with LSTMs addressing the vanishing gradient problem in traditional RNNs.

Transformers

# Transformer implementation (simplified)
class TransformerBlock(tf.keras.layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):
        super().__init__()
        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)
        self.ffn = Sequential([
            Dense(ff_dim, activation='relu'),
            Dense(embed_dim),
        ])
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att(inputs, inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
Note: Transformers use self-attention mechanisms and have become the standard for NLP tasks, powering models like BERT and GPT.

Generative Adversarial Networks (GANs)

# Basic GAN structure
def build_generator():
    model = Sequential()
    model.add(Dense(256, input_dim=100))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(512))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(1024))
    model.add(LeakyReLU(alpha=0.2))
    model.add(BatchNormalization(momentum=0.8))
    model.add(Dense(784, activation='tanh'))
    model.add(Reshape((28, 28, 1)))
    return model
Note: GANs consist of a generator and discriminator that compete against each other, enabling the generation of realistic synthetic data.

Frameworks

TensorFlow/Keras

# Basic model creation
import tensorflow as tf
from tensorflow.keras import layers

# Sequential API
model = tf.keras.Sequential([
    layers.Dense(64, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Functional API
inputs = tf.keras.Input(shape=(32,))
x = layers.Dense(64, activation='relu')(inputs)
x = layers.Dense(64, activation='relu')(x)
outputs = layers.Dense(10, activation='softmax')(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

PyTorch

# Basic model creation
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

Optimization

Optimizers

# Common optimizers in TensorFlow
optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)
optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)
optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.01)

# Learning rate scheduling
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-2,
    decay_steps=10000,
    decay_rate=0.9)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)

Regularization

# L1/L2 Regularization
model.add(Dense(64, kernel_regularizer=tf.keras.regularizers.l2(0.01)))
model.add(Dense(64, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)))

# Dropout
model.add(Dropout(0.5))

# Batch Normalization
model.add(BatchNormalization())

# Early Stopping
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True)

Quick Reference

Loss Functions

# Regression
mean_squared_error # MSE
mean_absolute_error # MAE
huber_loss # Combines MSE and MAE

# Classification
binary_crossentropy # Binary classification
categorical_crossentropy # Multi-class, one-hot encoded
sparse_categorical_crossentropy # Multi-class, integer labels

# Specialized
hinge # SVM-like loss
kl_divergence # For probabilistic models

Evaluation Metrics

# Classification metrics
accuracy
precision
recall
f1_score
auc # Area Under ROC Curve

# Regression metrics
mean_squared_error
mean_absolute_error
r2_score # Coefficient of determination

# Custom metrics
def custom_metric(y_true, y_pred):
    return tf.reduce_mean(y_true - y_pred)