Installation & Setup
Installation
# Visit https://pytorch.org for latest commands
pip install torch torchvision torchaudio
# CPU-only version
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
# With specific CUDA version
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# Install with conda
conda install pytorch torchvision torchaudio pytorch-cuda=11.8 -c pytorch -c nvidia
import torchvision
# Check PyTorch version
print(torch.__version__)
# Check CUDA availability
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))
Basic Setup
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import torchvision.transforms as transforms
# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
# Set random seed for reproducibility
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed(42)
Tensors
Tensor Creation
t1 = torch.tensor([1, 2, 3])
t2 = torch.tensor([[1, 2], [3, 4]])
# Special tensors
zeros = torch.zeros(2, 3)
ones = torch.ones(2, 3)
eye = torch.eye(3)
rand = torch.rand(2, 3)
randn = torch.randn(2, 3)
# With specific data type
t_float32 = torch.tensor([1, 2, 3], dtype=torch.float32)
t_int64 = torch.tensor([1, 2, 3], dtype=torch.int64)
# On specific device
t_gpu = torch.tensor([1, 2, 3], device='cuda')
torch.float32 - 32-bit floating point
torch.float64 - 64-bit floating point
torch.int32 - 32-bit integer
torch.int64 - 64-bit integer
torch.bool - Boolean
Tensor Operations
a = torch.tensor([1, 2, 3])
b = torch.tensor([4, 5, 6])
# Element-wise operations
add = a + b
sub = a - b
mul = a * b
div = a / b
# Matrix multiplication
mat1 = torch.randn(2, 3)
mat2 = torch.randn(3, 4)
matmul = torch.matmul(mat1, mat2)
# Reduction operations
x = torch.tensor([[1, 2], [3, 4]])
sum_all = x.sum()
sum_dim0 = x.sum(dim=0)
mean_all = x.mean()
max_val = x.max()
x = torch.randn(2, 3, 4)
# Reshape
reshaped = x.reshape(6, 4)
# Transpose
transposed = x.transpose(0, 1)
# Squeeze and unsqueeze
squeezed = x.squeeze()
unsqueezed = x.unsqueeze(0)
# Concatenation
cat = torch.cat([x, x], dim=0)
Neural Networks
Model Definition
class SimpleNN(nn.Module):
def __init__(self, input_size, hidden_size, num_classes):
super(SimpleNN, self).__init__()
self.fc1 = nn.Linear(input_size, hidden_size)
self.relu = nn.ReLU()
self.fc2 = nn.Linear(hidden_size, num_classes)
def forward(self, x):
out = self.fc1(x)
out = self.relu(out)
out = self.fc2(out)
return out
# Create model instance
model = SimpleNN(784, 128, 10)
model = model.to(device)
model = nn.Sequential(
nn.Linear(784, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 10)
)
Layers & Activations
nn.Linear - Fully connected layer
nn.Conv2d - 2D convolutional layer
nn.LSTM - Long Short-Term Memory
nn.BatchNorm2d - Batch normalization
nn.Dropout - Dropout layer
nn.ReLU() - Rectified Linear Unit
nn.Sigmoid() - Sigmoid function
nn.Tanh() - Hyperbolic tangent
nn.Softmax() - Softmax function
nn.LeakyReLU() - Leaky ReLU
class CNN(nn.Module):
def __init__(self):
super(CNN, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
Training
Training Loop
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# Training loop
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
# Forward pass
output = model(data)
loss = criterion(output, target)
# Backward pass
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch_idx % 100 == 0:
print(f'Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} '
f'({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}')
Loss & Optimizers
nn.MSELoss() - Mean Squared Error (Regression)
nn.CrossEntropyLoss() - Cross Entropy (Classification)
nn.BCELoss() - Binary Cross Entropy
nn.L1Loss() - Mean Absolute Error
nn.NLLLoss() - Negative Log Likelihood
optim.SGD - Stochastic Gradient Descent
optim.Adam - Adaptive Moment Estimation
optim.RMSprop - Root Mean Square Propagation
optim.Adagrad - Adaptive Gradient Algorithm
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
# In training loop:
# scheduler.step() after optimizer.step()
Data Loading
from torchvision import datasets, transforms
# Define transforms
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,))
])
# Load datasets
train_dataset = datasets.MNIST('./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST('./data', train=False, transform=transform)
# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1000, shuffle=False)
Custom Dataset
class CustomDataset(Dataset):
def __init__(self, data, labels, transform=None):
self.data = data
self.labels = labels
self.transform = transform
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
sample = self.data[idx]
label = self.labels[idx]
if self.transform:
sample = self.transform(sample)
return sample, label
Advanced Features
GPU Acceleration
if torch.cuda.is_available():
device = torch.device('cuda')
print(f'Using GPU: {torch.cuda.get_device_name(0)}')
else:
device = torch.device('cpu')
print('Using CPU')
# Move model to device
model = model.to(device)
# Move tensors to device
x = x.to(device)
y = y.to(device)
# Multiple GPUs
if torch.cuda.device_count() > 1:
model = nn.DataParallel(model)
accumulation_steps = 4
for i, (inputs, labels) in enumerate(dataloader):
outputs = model(inputs)
loss = criterion(outputs, labels)
loss = loss / accumulation_steps
loss.backward()
if (i + 1) % accumulation_steps == 0:
optimizer.step()
optimizer.zero_grad()
Model Saving & Loading
torch.save(model.state_dict(), 'model.pth')
# Load model
model = SimpleNN(784, 128, 10)
model.load_state_dict(torch.load('model.pth'))
model.eval()
# Save entire model
torch.save(model, 'model_complete.pth')
model = torch.load('model_complete.pth')
# Save checkpoint
checkpoint = {
'epoch': epoch,
'model_state_dict': model.state_dict(),
'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,
}
torch.save(checkpoint, 'checkpoint.pth')
checkpoint = torch.load('checkpoint.pth')
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']
model.eval()
# or model.train()
Additional Resources
Learning Resources
- Official Tutorials: PyTorch Official Tutorials
- Courses: Deep Learning with PyTorch, Fast.ai
- Books: "Deep Learning with PyTorch", "PyTorch Pocket Reference"
- Documentation: PyTorch Docs, Torchvision Docs
- Communities: PyTorch Forums, Stack Overflow, GitHub
Useful Tools
- Visualization: TensorBoard, Matplotlib
- Debugging: PyTorch Debugger (pdb), TorchScript
- Deployment: TorchServe, ONNX, LibTorch
- Libraries: Torchvision, Torchaudio, Torchtext
- Extensions: PyTorch Lightning, Fast.ai, Hugging Face
Comprehensive PyTorch Deep Learning Cheatsheet Reference
This PyTorch Deep Learning cheatsheet on Nikhil Learn Hub collects syntax, commands, and practical snippets for quick revision. Discover PyTorch tensors, neural networks, training workflows, and model-building techniques with practical code examples.
Use the reference cards and examples above during coding sessions; return here instead of scattered searches when you need dependable reminders. Follow the Deep learning learning roadmap when you want structured lessons beyond one-page lookups.
Quick lookup coverage
- Syntax, commands, and API signatures
- Copy-ready examples and common patterns
- Terminology for coursework and interviews
- Cross-links to the matching learning roadmap
How to study with this sheet
- Production debugging and tuning reminders
- Security, performance, or scale cautions
- Integration with adjacent stacks on this site
- Deeper study through tutorials and roadmaps
Who Should Use This Cheatsheet
Students, self-taught developers, and professionals who need fast PyTorch Deep Learning lookups during labs, debugging, or interview revision should keep this page bookmarked.
Related Resources on Nikhil Learn Hub
- Deep learning learning roadmapstructured learning path for the same technology
- Cheatsheets hubbrowse all quick-reference sheets
- Technology hubtutorials, roadmaps, and practice hubs