PyTorch: Define-by-Run Framework for Deep Learning
PyTorch provides imperative, Pythonic computation with dynamic neural networks. Master tensors, automatic differentiation, modular design, and production deployment — all with clean, debug-friendly code.
Tensors
ND-Arrays + GPU
Autograd
Automatic diff
nn.Module
Neural networks
DataLoader
Parallel loading
PyTorch Tensors – NumPy on Steroids
Tensors are multidimensional arrays that can run on GPU. They track gradients and share memory with NumPy.
Tensor creation & GPU
import torch
# From list, NumPy, or random
x = torch.tensor([[1,2],[3,4]], dtype=torch.float32)
y = torch.randn(3, 5) # normal distribution
# Move to GPU
if torch.cuda.is_available():
x = x.cuda()
y = y.cuda()
print(f"On GPU: {x.device}")
# Reshape, math, broadcasting
z = x @ y.T # matrix multiplication
NumPy bridge
import numpy as np
# Tensor → NumPy (CPU)
np_array = x.cpu().numpy()
# NumPy → Tensor
tensor_from_np = torch.from_numpy(np_array)
# In-place operations
x.add_(5) # underscore = in-place
print(x)
torch.from_numpy() shares memory with NumPy. Modifying one modifies the other.
Autograd – Automatic Differentiation
Every tensor with requires_grad=True records operations for gradient computation.
import torch
x = torch.ones(2, 2, requires_grad=True)
y = x + 2
z = y * y * 3
out = z.mean()
# Compute gradients
out.backward()
print(x.grad) # ∂out/∂x
# With no_grad for inference
with torch.no_grad():
w = x * 2 # no gradient tracking
backward(retain_graph=True) for multiple backward passes.
nn.Module – Neural Network Building Blocks
All models inherit from torch.nn.Module. Define layers in __init__ and forward pass in forward.
Custom CNN Classifier
import torch.nn as nn
import torch.nn.functional as F
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super().__init__()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.bn1 = nn.BatchNorm2d(32)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 8 * 8, 128)
self.fc2 = nn.Linear(128, num_classes)
def forward(self, x):
x = self.pool(F.relu(self.bn1(self.conv1(x))))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(x.size(0), -1) # flatten
x = F.relu(self.fc1(x))
return self.fc2(x)
model = ConvNet()
print(model)
nn.Sequential for fast prototyping
model = nn.Sequential(
nn.Conv2d(3, 16, 3),
nn.ReLU(),
nn.MaxPool2d(2),
nn.Flatten(),
nn.Linear(16 * 15 * 15, 10)
)
# Module inspection
for name, param in model.named_parameters():
if param.requires_grad:
print(name, param.shape)
Param counting sum(p.numel() for p in model.parameters())
Complete Training Pipeline
import torch.optim as optim
# Model, loss, optimizer
model = ConvNet().cuda() if torch.cuda.is_available() else ConvNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
# Training
for epoch in range(10):
model.train()
running_loss = 0.0
for inputs, labels in train_loader:
inputs, labels = inputs.cuda(), labels.cuda() # GPU transfer
optimizer.zero_grad()
outputs = model(inputs)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
# Validation
model.eval()
correct = 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.cuda(), labels.cuda()
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
correct += (preds == labels).sum().item()
accuracy = correct / len(val_loader.dataset)
print(f"Epoch {epoch}: loss {running_loss:.3f}, acc {accuracy:.3f}")
Datasets, DataLoaders & Transforms
torchvision datasets
from torchvision import datasets, transforms
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],
[0.229,0.224,0.225])
])
dataset = datasets.ImageFolder('path/to/data', transform=transform)
loader = DataLoader(dataset, batch_size=32, shuffle=True, num_workers=4)
Custom Dataset
from torch.utils.data import Dataset
class CustomDataset(Dataset):
def __init__(self, df, transform=None):
self.data = df
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img, label = self.data.iloc[idx]
if self.transform:
img = self.transform(img)
return img, label
Samplers & distributed
WeightedRandomSampler for imbalance, DistributedSampler for multi-GPU.
GPU Acceleration & Mixed Precision
Automatic Mixed Precision (AMP)
from torch.cuda.amp import autocast, GradScaler
scaler = GradScaler()
model = model.cuda()
for inputs, labels in loader:
inputs, labels = inputs.cuda(), labels.cuda()
optimizer.zero_grad()
with autocast():
outputs = model(inputs)
loss = criterion(outputs, labels)
scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
AMP uses float16 where safe, preserving float32 master weights. 2-3x speedup on Volta+ GPUs
torch.utils.checkpoint to trade compute for memory.
Serialization & Production
Save / Load weights
# Save
torch.save(model.state_dict(), 'model.pth')
# Load (define model architecture first)
model = ConvNet()
model.load_state_dict(torch.load('model.pth'))
model.eval()
# Save entire model (not recommended)
torch.save(model, 'full_model.pth')
TorchScript & ONNX
# TorchScript (graph format)
scripted = torch.jit.script(model)
scripted.save('model.pt')
# ONNX export
dummy_input = torch.randn(1, 3, 32, 32)
torch.onnx.export(model, dummy_input, "model.onnx")
PyTorch Ecosystem
Lightning
Boilerplate removal, multi-GPU, TPU.
Hub
Pretrained models: torch.hub.load('pytorch/vision', 'resnet18')
FX
Functional transformations, graph manipulation.
TorchVision
Datasets, models, transforms, ops.
Optimizers & Schedulers in PyTorch
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts, StepLR
# AdamW
optimizer = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.01)
# SGD with momentum
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=1e-4)
# Cosine annealing with warm restarts
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
# StepLR
scheduler = StepLR(optimizer, step_size=30, gamma=0.1)
# In training loop
for epoch in range(epochs):
train(...)
scheduler.step() # or scheduler.step(epoch) for some schedulers
Debugging & Profiling
from torch.utils.tensorboard import SummaryWriter
writer = SummaryWriter('runs/experiment')
writer.add_scalar('Loss/train', loss, epoch)
writer.add_histogram('conv1.weight', model.conv1.weight, epoch)