Installation & Setup
Installation
pip install opencv-python
# Install with contrib modules
pip install opencv-contrib-python
# Install computer vision libraries
pip install opencv-python matplotlib numpy scikit-image pillow
# Install deep learning frameworks
pip install torch torchvision tensorflow keras
# Install with conda
conda install opencv matplotlib numpy scikit-image pillow
import numpy as np
import matplotlib.pyplot as plt
# Check OpenCV version
print(cv2.__version__)
# Check if image loads correctly
img = cv2.imread('test.jpg')
print(f'Image shape: {img.shape}')
Basic Setup
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.transforms as transforms
# Set device for PyTorch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)
Image Basics
Image Loading & Display
img = cv2.imread('image.jpg')
# Read image as grayscale
img_gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)
# Display image with OpenCV
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Display with Matplotlib (convert BGR to RGB)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
plt.axis('off')
plt.show()
print(f'Shape: {img.shape}') # (height, width, channels)
print(f'Size: {img.size}') # total pixels
print(f'Data type: {img.dtype}')
print(f'Min value: {img.min()}')
print(f'Max value: {img.max()}')
Basic Operations
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Resize image
resized = cv2.resize(img, (new_width, new_height))
resized_fx = cv2.resize(img, None, fx=0.5, fy=0.5)
# Crop image
cropped = img[y1:y2, x1:x2]
# Rotate image
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated = cv2.warpAffine(img, M, (w, h))
# Draw line
cv2.line(img, (0, 0), (100, 100), (255, 0, 0), 5)
# Draw rectangle
cv2.rectangle(img, (50, 50), (200, 200), (0, 255, 0), 3)
# Draw circle
cv2.circle(img, (100, 100), 50, (0, 0, 255), -1)
# Add text
cv2.putText(img, 'Hello', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
Image Processing
Filters & Transformations
blurred = cv2.GaussianBlur(img, (5, 5), 0)
# Median blur
median = cv2.medianBlur(img, 5)
# Bilateral filter
bilateral = cv2.bilateralFilter(img, 9, 75, 75)
# Edge detection
edges = cv2.Canny(img, 100, 200)
# Sobel derivatives
sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=5)
sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=5)
kernel = np.ones((5,5), np.uint8)
# Erosion
erosion = cv2.erode(img, kernel, iterations=1)
# Dilation
dilation = cv2.dilate(img, kernel, iterations=1)
# Opening (erosion followed by dilation)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
# Closing (dilation followed by erosion)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
Thresholding & Segmentation
ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)
# Adaptive thresholding
thresh_adapt = cv2.adaptiveThreshold(img_gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
# Otsu's thresholding
ret, thresh_otsu = cv2.threshold(img_gray, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)
contours, hierarchy = cv2.findContours(thresh,
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Draw contours
contour_img = cv2.drawContours(img, contours, -1, (0, 255, 0), 3)
# Get contour properties
for cnt in contours:
area = cv2.contourArea(cnt)
perimeter = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02 * perimeter, True)
lower_blue = np.array([100, 50, 50])
upper_blue = np.array([130, 255, 255])
# Create mask
mask = cv2.inRange(img_hsv, lower_blue, upper_blue)
# Apply mask
result = cv2.bitwise_and(img, img, mask=mask)
Deep Learning for CV
CNN Architectures
class SimpleCNN(nn.Module):
def __init__(self, num_classes=10):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 8 * 8, 128)
self.fc2 = nn.Linear(128, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 64 * 8 * 8)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
ResNet - Residual Networks with skip connections
VGG - Very Deep Convolutional Networks
Inception - Multiple filter sizes in parallel
EfficientNet - Compound scaling for efficiency
Vision Transformer (ViT) - Transformer-based architecture
Transfer Learning
import torchvision.models as models
# Load pre-trained model
model = models.resnet50(pretrained=True)
# Freeze all layers
for param in model.parameters():
param.requires_grad = False
# Replace the last layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)
# Move to device
model = model.to(device)
from torchvision import transforms
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
Object Detection
Traditional Methods
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# Detect faces
faces = face_cascade.detectMultiScale(img_gray, 1.1, 4)
# Draw bounding boxes
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
from skimage.feature import hog
from skimage import exposure
# Compute HOG features
fd, hog_image = hog(img_gray, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualize=True, channel_axis=None)
# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))
Deep Learning Methods
YOLO (You Only Look Once) - Real-time object detection
SSD (Single Shot Detector) - Balance of speed and accuracy
Faster R-CNN - Region-based with high accuracy
RetinaNet - Focal loss for class imbalance
EfficientDet - Efficient object detection
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
# Get output layer names
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# Create blob from image
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
Advanced Features
Image Augmentation
from torchvision import transforms
train_transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomRotation(degrees=15),
transforms.ColorJitter(brightness=0.2, contrast=0.2,
saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
import albumentations as A
transform = A.Compose([
A.RandomCrop(width=256, height=256),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
A.Rotate(limit=25, p=0.5),
])
Model Deployment
dummy_input = torch.randn(1, 3, 224, 224, device=device)
torch.onnx.export(model, dummy_input, "model.onnx",
input_names=['input'], output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})
# Load ONNX model with OpenCV
net = cv2.dnn.readNetFromONNX('model.onnx')
blob = cv2.dnn.blobFromImage(img, 1.0, (224, 224), (104, 117, 123))
net.setInput(blob)
output = net.forward()
# Get prediction
class_id = np.argmax(output)
confidence = output[0][class_id]
Additional Resources
Learning Resources
- Books: "Computer Vision: Algorithms and Applications", "Deep Learning for Computer Vision"
- Courses: CS231n (Stanford), Fast.ai Computer Vision
- Tutorials: OpenCV Official Tutorials, PyImageSearch
- Documentation: OpenCV Docs, PyTorch Vision Docs, TensorFlow Object Detection API
- Communities: OpenCV Forum, Stack Overflow, Reddit r/computervision
Useful Tools & Libraries
- Image Processing: OpenCV, Scikit-image, Pillow
- Deep Learning: PyTorch, TensorFlow, Keras
- Augmentation: Albumentations, Imgaug, Torchvision Transforms
- Visualization: Matplotlib, Seaborn, Plotly
- Deployment: ONNX, TensorRT, OpenVINO, TorchServe
Comprehensive Computer Vision Cheatsheet Reference
This Computer Vision cheatsheet on Nikhil Learn Hub collects syntax, commands, and practical snippets for quick revision. Learn computer vision concepts, image processing, object detection, OpenCV, and deep learning techniques with examples.
Use the reference cards and examples above during coding sessions; return here instead of scattered searches when you need dependable reminders. Follow the AI learning roadmap when you want structured lessons beyond one-page lookups.
Quick lookup coverage
- Syntax, commands, and API signatures
- Copy-ready examples and common patterns
- Terminology for coursework and interviews
- Cross-links to the matching learning roadmap
How to study with this sheet
- Production debugging and tuning reminders
- Security, performance, or scale cautions
- Integration with adjacent stacks on this site
- Deeper study through tutorials and roadmaps
Who Should Use This Cheatsheet
Students, self-taught developers, and professionals who need fast Computer Vision lookups during labs, debugging, or interview revision should keep this page bookmarked.
Related Resources on Nikhil Learn Hub
- AI learning roadmapstructured learning path for the same technology
- Cheatsheets hubbrowse all quick-reference sheets
- Technology hubtutorials, roadmaps, and practice hubs