Computer Vision Cheatsheet

Installation & Setup

Installation

# Install OpenCV

pip install opencv-python

# Install with contrib modules

pip install opencv-contrib-python

# Install computer vision libraries

pip install opencv-python matplotlib numpy scikit-image pillow

# Install deep learning frameworks

pip install torch torchvision tensorflow keras

# Install with conda

conda install opencv matplotlib numpy scikit-image pillow

Verification

import cv2

import numpy as np

import matplotlib.pyplot as plt

# Check OpenCV version

print(cv2.__version__)

# Check if image loads correctly

img = cv2.imread('test.jpg')

print(f'Image shape: {img.shape}')

Basic Setup

# Common imports

import cv2

import numpy as np

import matplotlib.pyplot as plt

import torch

import torch.nn as nn

import torchvision.transforms as transforms

# Set device for PyTorch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f'Using device: {device}')

# Set random seed for reproducibility

np.random.seed(42)

torch.manual_seed(42)

Best Practice: Always check image dimensions and color channels when working with OpenCV (BGR format) vs Matplotlib (RGB format).

Image Basics

Image Loading & Display

# Read image

img = cv2.imread('image.jpg')

# Read image as grayscale

img_gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)

# Display image with OpenCV

cv2.imshow('Image', img)

cv2.waitKey(0)

cv2.destroyAllWindows()

# Display with Matplotlib (convert BGR to RGB)

img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.imshow(img_rgb)

plt.axis('off')

plt.show()

Image Properties

# Get image properties

print(f'Shape: {img.shape}')  # (height, width, channels)

print(f'Size: {img.size}')    # total pixels

print(f'Data type: {img.dtype}')

print(f'Min value: {img.min()}')

print(f'Max value: {img.max()}')

Basic Operations

# Color space conversions

img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Resize image

resized = cv2.resize(img, (new_width, new_height))

resized_fx = cv2.resize(img, None, fx=0.5, fy=0.5)

# Crop image

cropped = img[y1:y2, x1:x2]

# Rotate image

(h, w) = img.shape[:2]

center = (w // 2, h // 2)

M = cv2.getRotationMatrix2D(center, 45, 1.0)

rotated = cv2.warpAffine(img, M, (w, h))

# Drawing functions

# Draw line

cv2.line(img, (0, 0), (100, 100), (255, 0, 0), 5)

# Draw rectangle

cv2.rectangle(img, (50, 50), (200, 200), (0, 255, 0), 3)

# Draw circle

cv2.circle(img, (100, 100), 50, (0, 0, 255), -1)

# Add text

cv2.putText(img, 'Hello', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

Image Processing

Filters & Transformations

# Gaussian blur

blurred = cv2.GaussianBlur(img, (5, 5), 0)

# Median blur

median = cv2.medianBlur(img, 5)

# Bilateral filter

bilateral = cv2.bilateralFilter(img, 9, 75, 75)

# Edge detection

edges = cv2.Canny(img, 100, 200)

# Sobel derivatives

sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=5)

sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=5)

# Morphological operations

kernel = np.ones((5,5), np.uint8)

# Erosion

erosion = cv2.erode(img, kernel, iterations=1)

# Dilation

dilation = cv2.dilate(img, kernel, iterations=1)

# Opening (erosion followed by dilation)

opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

# Closing (dilation followed by erosion)

closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)

Thresholding & Segmentation

# Simple thresholding

ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)

# Adaptive thresholding

thresh_adapt = cv2.adaptiveThreshold(img_gray, 255, 

    cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

# Otsu's thresholding

ret, thresh_otsu = cv2.threshold(img_gray, 0, 255, 

    cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# Contour detection

contours, hierarchy = cv2.findContours(thresh, 

    cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# Draw contours

contour_img = cv2.drawContours(img, contours, -1, (0, 255, 0), 3)

# Get contour properties

for cnt in contours:

    area = cv2.contourArea(cnt)

    perimeter = cv2.arcLength(cnt, True)

    approx = cv2.approxPolyDP(cnt, 0.02 * perimeter, True)

Color-based Segmentation

# Define color range in HSV

lower_blue = np.array([100, 50, 50])

upper_blue = np.array([130, 255, 255])

# Create mask

mask = cv2.inRange(img_hsv, lower_blue, upper_blue)

# Apply mask

result = cv2.bitwise_and(img, img, mask=mask)

Deep Learning for CV

CNN Architectures

# Simple CNN in PyTorch

class SimpleCNN(nn.Module):

    def __init__(self, num_classes=10):

        super(SimpleCNN, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)

        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(64 * 8 * 8, 128)

        self.fc2 = nn.Linear(128, num_classes)

        self.dropout = nn.Dropout(0.5)

    def forward(self, x):

        x = self.pool(F.relu(self.conv1(x)))

        x = self.pool(F.relu(self.conv2(x)))

        x = x.view(-1, 64 * 8 * 8)

        x = F.relu(self.fc1(x))

        x = self.dropout(x)

        x = self.fc2(x)

        return x

Popular Architectures

ResNet - Residual Networks with skip connections

VGG - Very Deep Convolutional Networks

Inception - Multiple filter sizes in parallel

EfficientNet - Compound scaling for efficiency

Vision Transformer (ViT) - Transformer-based architecture

Transfer Learning

# Using pre-trained models in PyTorch

import torchvision.models as models

# Load pre-trained model

model = models.resnet50(pretrained=True)

# Freeze all layers

for param in model.parameters():

    param.requires_grad = False

# Replace the last layer

num_ftrs = model.fc.in_features

model.fc = nn.Linear(num_ftrs, num_classes)

# Move to device

model = model.to(device)

# Data transforms for pre-trained models

from torchvision import transforms

transform = transforms.Compose([

    transforms.Resize(256),

    transforms.CenterCrop(224),

    transforms.ToTensor(),

    transforms.Normalize(mean=[0.485, 0.456, 0.406], 

                    std=[0.229, 0.224, 0.225]),

])

Note: Pre-trained models expect specific input sizes and normalization. Always check the documentation for the correct preprocessing.

Object Detection

Traditional Methods

# Haar Cascades for face detection

face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Detect faces

faces = face_cascade.detectMultiScale(img_gray, 1.1, 4)

# Draw bounding boxes

for (x, y, w, h) in faces:

    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)

# HOG (Histogram of Oriented Gradients)

from skimage.feature import hog

from skimage import exposure

# Compute HOG features

fd, hog_image = hog(img_gray, orientations=8, pixels_per_cell=(16, 16),

    cells_per_block=(1, 1), visualize=True, channel_axis=None)

# Rescale histogram for better display

hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

Deep Learning Methods

Popular Detection Models

YOLO (You Only Look Once) - Real-time object detection

SSD (Single Shot Detector) - Balance of speed and accuracy

Faster R-CNN - Region-based with high accuracy

RetinaNet - Focal loss for class imbalance

EfficientDet - Efficient object detection

# Using YOLO with OpenCV

net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

# Get output layer names

layer_names = net.getLayerNames()

output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Create blob from image

blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

net.setInput(blob)

outs = net.forward(output_layers)

Note: Deep learning-based object detection requires pre-trained models which can be downloaded from model zoos or trained on custom datasets.

Advanced Features

Image Augmentation

# Using torchvision transforms

from torchvision import transforms

train_transform = transforms.Compose([

    transforms.Resize((256, 256)),

    transforms.RandomCrop(224),

    transforms.RandomHorizontalFlip(p=0.5),

    transforms.RandomRotation(degrees=15),

    transforms.ColorJitter(brightness=0.2, contrast=0.2, 

                        saturation=0.2, hue=0.1),

    transforms.ToTensor(),

    transforms.Normalize(mean=[0.485, 0.456, 0.406], 

                    std=[0.229, 0.224, 0.225]),

])

# Using Albumentations library

import albumentations as A

transform = A.Compose([

    A.RandomCrop(width=256, height=256),

    A.HorizontalFlip(p=0.5),

    A.RandomBrightnessContrast(p=0.2),

    A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),

    A.Rotate(limit=25, p=0.5),

])

Model Deployment

# Convert PyTorch model to ONNX

dummy_input = torch.randn(1, 3, 224, 224, device=device)

torch.onnx.export(model, dummy_input, "model.onnx", 

    input_names=['input'], output_names=['output'],

    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})

# Load ONNX model with OpenCV

net = cv2.dnn.readNetFromONNX('model.onnx')

# Inference with OpenCV DNN

blob = cv2.dnn.blobFromImage(img, 1.0, (224, 224), (104, 117, 123))

net.setInput(blob)

output = net.forward()

# Get prediction

class_id = np.argmax(output)

confidence = output[0][class_id]

Note: ONNX provides interoperability between different deep learning frameworks, making deployment easier across platforms.

Additional Resources

Learning Resources

Books: "Computer Vision: Algorithms and Applications", "Deep Learning for Computer Vision"
Courses: CS231n (Stanford), Fast.ai Computer Vision
Tutorials: OpenCV Official Tutorials, PyImageSearch
Documentation: OpenCV Docs, PyTorch Vision Docs, TensorFlow Object Detection API
Communities: OpenCV Forum, Stack Overflow, Reddit r/computervision

Useful Tools & Libraries

Image Processing: OpenCV, Scikit-image, Pillow
Deep Learning: PyTorch, TensorFlow, Keras
Augmentation: Albumentations, Imgaug, Torchvision Transforms
Visualization: Matplotlib, Seaborn, Plotly
Deployment: ONNX, TensorRT, OpenVINO, TorchServe

Quick reference guide

Comprehensive Computer Vision Cheatsheet Reference

This Computer Vision cheatsheet on Nikhil Learn Hub collects syntax, commands, and practical snippets for quick revision. Learn computer vision concepts, image processing, object detection, OpenCV, and deep learning techniques with examples.

Use the reference cards and examples above during coding sessions; return here instead of scattered searches when you need dependable reminders. Follow the AI learning roadmap when you want structured lessons beyond one-page lookups.

Quick lookup coverage

Syntax, commands, and API signatures
Copy-ready examples and common patterns
Terminology for coursework and interviews
Cross-links to the matching learning roadmap

How to study with this sheet

Production debugging and tuning reminders
Security, performance, or scale cautions
Integration with adjacent stacks on this site
Deeper study through tutorials and roadmaps

Who Should Use This Cheatsheet

Students, self-taught developers, and professionals who need fast Computer Vision lookups during labs, debugging, or interview revision should keep this page bookmarked.

Related Resources on Nikhil Learn Hub

AI learning roadmapstructured learning path for the same technology
Cheatsheets hubbrowse all quick-reference sheets
Technology hubtutorials, roadmaps, and practice hubs