Deep Learning Cheatsheet

Fundamentals

Neural Network Basics

# Basic neural network layer

layer = Dense(units=64, activation='relu')

# Forward pass

output = layer(input)

# Loss function

loss = tf.keras.losses.BinaryCrossentropy()

# Optimizer

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

Note: Neural networks consist of layers of neurons that transform input data through weighted connections and activation functions.

Activation Functions

# Sigmoid

def sigmoid(x):

    return 1 / (1 + np.exp(-x))

# ReLU (Rectified Linear Unit)

def relu(x):

    return np.maximum(0, x)

# Softmax (for multi-class classification)

def softmax(x):

    exp_x = np.exp(x - np.max(x))

    return exp_x / np.sum(exp_x, axis=0)

Note: Activation functions introduce non-linearity, allowing neural networks to learn complex patterns.

Architectures

Convolutional Neural Networks (CNN)

# Basic CNN architecture

model = Sequential([

    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),

    MaxPooling2D((2, 2)),

    Conv2D(64, (3, 3), activation='relu'),

    MaxPooling2D((2, 2)),

    Flatten(),

    Dense(64, activation='relu'),

    Dense(10, activation='softmax')

])

# Compile the model

model.compile(optimizer='adam',

            loss='sparse_categorical_crossentropy',

            metrics=['accuracy'])

Note: CNNs are particularly effective for image processing tasks due to their ability to capture spatial hierarchies.

Recurrent Neural Networks (RNN)

# Simple RNN

model = Sequential([

    SimpleRNN(50, return_sequences=True, input_shape=(None, 1)),

    SimpleRNN(50, return_sequences=True),

    SimpleRNN(50),

    Dense(1)

])

# LSTM (Long Short-Term Memory)

model = Sequential([

    LSTM(50, return_sequences=True, input_shape=(None, 1)),

    LSTM(50, return_sequences=True),

    LSTM(50),

    Dense(1)

])

Note: RNNs are designed for sequential data, with LSTMs addressing the vanishing gradient problem in traditional RNNs.

Transformers

# Transformer implementation (simplified)

class TransformerBlock(tf.keras.layers.Layer):

    def __init__(self, embed_dim, num_heads, ff_dim, rate=0.1):

        super().__init__()

        self.att = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)

        self.ffn = Sequential([

            Dense(ff_dim, activation='relu'),

            Dense(embed_dim),

        ])

        self.layernorm1 = LayerNormalization(epsilon=1e-6)

        self.layernorm2 = LayerNormalization(epsilon=1e-6)

        self.dropout1 = Dropout(rate)

        self.dropout2 = Dropout(rate)

    def call(self, inputs, training):

        attn_output = self.att(inputs, inputs)

        attn_output = self.dropout1(attn_output, training=training)

        out1 = self.layernorm1(inputs + attn_output)

        ffn_output = self.ffn(out1)

        ffn_output = self.dropout2(ffn_output, training=training)

        return self.layernorm2(out1 + ffn_output)

Note: Transformers use self-attention mechanisms and have become the standard for NLP tasks, powering models like BERT and GPT.

Generative Adversarial Networks (GANs)

# Basic GAN structure

def build_generator():

    model = Sequential()

    model.add(Dense(256, input_dim=100))

    model.add(LeakyReLU(alpha=0.2))

    model.add(BatchNormalization(momentum=0.8))

    model.add(Dense(512))

    model.add(LeakyReLU(alpha=0.2))

    model.add(BatchNormalization(momentum=0.8))

    model.add(Dense(1024))

    model.add(LeakyReLU(alpha=0.2))

    model.add(BatchNormalization(momentum=0.8))

    model.add(Dense(784, activation='tanh'))

    model.add(Reshape((28, 28, 1)))

    return model

Note: GANs consist of a generator and discriminator that compete against each other, enabling the generation of realistic synthetic data.

Frameworks

TensorFlow/Keras

# Basic model creation

import tensorflow as tf

from tensorflow.keras import layers

# Sequential API

model = tf.keras.Sequential([

    layers.Dense(64, activation='relu'),

    layers.Dense(64, activation='relu'),

    layers.Dense(10, activation='softmax')

])

# Functional API

inputs = tf.keras.Input(shape=(32,))

x = layers.Dense(64, activation='relu')(inputs)

x = layers.Dense(64, activation='relu')(x)

outputs = layers.Dense(10, activation='softmax')(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs)

PyTorch

# Basic model creation

import torch

import torch.nn as nn

import torch.nn.functional as F

class Net(nn.Module):

    def __init__(self):

        super(Net, self).__init__()

        self.conv1 = nn.Conv2d(1, 32, 3, 1)

        self.conv2 = nn.Conv2d(32, 64, 3, 1)

        self.fc1 = nn.Linear(9216, 128)

        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):

        x = F.relu(self.conv1(x))

        x = F.relu(self.conv2(x))

        x = F.max_pool2d(x, 2)

        x = torch.flatten(x, 1)

        x = F.relu(self.fc1(x))

        x = self.fc2(x)

        return F.log_softmax(x, dim=1)

Optimization

Optimizers

# Common optimizers in TensorFlow

optimizer = tf.keras.optimizers.SGD(learning_rate=0.01, momentum=0.9)

optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999)

optimizer = tf.keras.optimizers.RMSprop(learning_rate=0.001, rho=0.9)

optimizer = tf.keras.optimizers.Adagrad(learning_rate=0.01)

# Learning rate scheduling

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(

    initial_learning_rate=1e-2,

    decay_steps=10000,

    decay_rate=0.9)

optimizer = tf.keras.optimizers.SGD(learning_rate=lr_schedule)

Regularization

# L1/L2 Regularization

model.add(Dense(64, kernel_regularizer=tf.keras.regularizers.l2(0.01)))

model.add(Dense(64, kernel_regularizer=tf.keras.regularizers.l1_l2(l1=0.01, l2=0.01)))

# Dropout

model.add(Dropout(0.5))

# Batch Normalization

model.add(BatchNormalization())

# Early Stopping

early_stopping = tf.keras.callbacks.EarlyStopping(

    monitor='val_loss',

    patience=10,

    restore_best_weights=True)

Quick Reference

Loss Functions

# Regression

mean_squared_error  # MSE

mean_absolute_error # MAE

huber_loss          # Combines MSE and MAE

# Classification

binary_crossentropy # Binary classification

categorical_crossentropy # Multi-class, one-hot encoded

sparse_categorical_crossentropy # Multi-class, integer labels

# Specialized

hinge               # SVM-like loss

kl_divergence       # For probabilistic models

Evaluation Metrics

# Classification metrics

accuracy

precision

recall

f1_score

auc # Area Under ROC Curve

# Regression metrics

mean_squared_error

mean_absolute_error

r2_score # Coefficient of determination

# Custom metrics

def custom_metric(y_true, y_pred):

    return tf.reduce_mean(y_true - y_pred)

Quick reference guide

Comprehensive Deep Learning Cheatsheet Reference

This Deep Learning cheatsheet on Nikhil Learn Hub collects syntax, commands, and practical snippets for quick revision. Understand neural networks, backpropagation, CNNs, RNNs, and deep learning model concepts with practical examples.

Use the reference cards and examples above during coding sessions; return here instead of scattered searches when you need dependable reminders. Follow the Deep learning learning roadmap when you want structured lessons beyond one-page lookups.

Quick lookup coverage

Syntax, commands, and API signatures
Copy-ready examples and common patterns
Terminology for coursework and interviews
Cross-links to the matching learning roadmap

How to study with this sheet

Production debugging and tuning reminders
Security, performance, or scale cautions
Integration with adjacent stacks on this site
Deeper study through tutorials and roadmaps

Who Should Use This Cheatsheet

Students, self-taught developers, and professionals who need fast Deep Learning lookups during labs, debugging, or interview revision should keep this page bookmarked.

Related Resources on Nikhil Learn Hub

Deep learning learning roadmapstructured learning path for the same technology
Cheatsheets hubbrowse all quick-reference sheets
Technology hubtutorials, roadmaps, and practice hubs