Computer Vision Cheatsheet | Nikhil Learn Hub Guide

Installation & Setup

Installation

# Install OpenCV

pip install opencv-python

# Install with contrib modules

pip install opencv-contrib-python

# Install computer vision libraries

pip install opencv-python matplotlib numpy scikit-image pillow

# Install deep learning frameworks

pip install torch torchvision tensorflow keras

# Install with conda

conda install opencv matplotlib numpy scikit-image pillow

Verification

import cv2

import numpy as np

import matplotlib.pyplot as plt

# Check OpenCV version

print(cv2.__version__)

# Check if image loads correctly

img = cv2.imread('test.jpg')

print(f'Image shape: {img.shape}')

Basic Setup

# Common imports

import cv2

import numpy as np

import matplotlib.pyplot as plt

import torch

import torch.nn as nn

import torchvision.transforms as transforms

# Set device for PyTorch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f'Using device: {device}')

# Set random seed for reproducibility

np.random.seed(42)

torch.manual_seed(42)

Best Practice: Always check image dimensions and color channels when working with OpenCV (BGR format) vs Matplotlib (RGB format).

Image Basics

Image Loading & Display

# Read image

img = cv2.imread('image.jpg')

# Read image as grayscale

img_gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)

# Display image with OpenCV

cv2.imshow('Image', img)

cv2.waitKey(0)

cv2.destroyAllWindows()

# Display with Matplotlib (convert BGR to RGB)

img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

plt.imshow(img_rgb)

plt.axis('off')

plt.show()

Image Properties

# Get image properties

print(f'Shape: {img.shape}')  # (height, width, channels)

print(f'Size: {img.size}')    # total pixels

print(f'Data type: {img.dtype}')

print(f'Min value: {img.min()}')

print(f'Max value: {img.max()}')

Basic Operations

# Color space conversions

img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Resize image

resized = cv2.resize(img, (new_width, new_height))

resized_fx = cv2.resize(img, None, fx=0.5, fy=0.5)

# Crop image

cropped = img[y1:y2, x1:x2]

# Rotate image

(h, w) = img.shape[:2]

center = (w // 2, h // 2)

M = cv2.getRotationMatrix2D(center, 45, 1.0)

rotated = cv2.warpAffine(img, M, (w, h))

# Drawing functions

# Draw line

cv2.line(img, (0, 0), (100, 100), (255, 0, 0), 5)

# Draw rectangle

cv2.rectangle(img, (50, 50), (200, 200), (0, 255, 0), 3)

# Draw circle

cv2.circle(img, (100, 100), 50, (0, 0, 255), -1)

# Add text

cv2.putText(img, 'Hello', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

Image Processing

Filters & Transformations

# Gaussian blur

blurred = cv2.GaussianBlur(img, (5, 5), 0)

# Median blur

median = cv2.medianBlur(img, 5)

# Bilateral filter

bilateral = cv2.bilateralFilter(img, 9, 75, 75)

# Edge detection

edges = cv2.Canny(img, 100, 200)

# Sobel derivatives

sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=5)

sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=5)

# Morphological operations

kernel = np.ones((5,5), np.uint8)

# Erosion

erosion = cv2.erode(img, kernel, iterations=1)

# Dilation

dilation = cv2.dilate(img, kernel, iterations=1)

# Opening (erosion followed by dilation)

opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

# Closing (dilation followed by erosion)

closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)

Thresholding & Segmentation

# Simple thresholding

ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)

# Adaptive thresholding

thresh_adapt = cv2.adaptiveThreshold(img_gray, 255, 

    cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

# Otsu's thresholding

ret, thresh_otsu = cv2.threshold(img_gray, 0, 255, 

    cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# Contour detection

contours, hierarchy = cv2.findContours(thresh, 

    cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# Draw contours

contour_img = cv2.drawContours(img, contours, -1, (0, 255, 0), 3)

# Get contour properties

for cnt in contours:

    area = cv2.contourArea(cnt)

    perimeter = cv2.arcLength(cnt, True)

    approx = cv2.approxPolyDP(cnt, 0.02 * perimeter, True)

Color-based Segmentation

# Define color range in HSV

lower_blue = np.array([100, 50, 50])

upper_blue = np.array([130, 255, 255])

# Create mask

mask = cv2.inRange(img_hsv, lower_blue, upper_blue)

# Apply mask

result = cv2.bitwise_and(img, img, mask=mask)

Deep Learning for CV

CNN Architectures

# Simple CNN in PyTorch

class SimpleCNN(nn.Module):

    def __init__(self, num_classes=10):

        super(SimpleCNN, self).__init__()

        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)

        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)

        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(64 * 8 * 8, 128)

        self.fc2 = nn.Linear(128, num_classes)

        self.dropout = nn.Dropout(0.5)

    def forward(self, x):

        x = self.pool(F.relu(self.conv1(x)))

        x = self.pool(F.relu(self.conv2(x)))

        x = x.view(-1, 64 * 8 * 8)

        x = F.relu(self.fc1(x))

        x = self.dropout(x)

        x = self.fc2(x)

        return x

Popular Architectures

ResNet - Residual Networks with skip connections

VGG - Very Deep Convolutional Networks

Inception - Multiple filter sizes in parallel

EfficientNet - Compound scaling for efficiency

Vision Transformer (ViT) - Transformer-based architecture

Transfer Learning

# Using pre-trained models in PyTorch

import torchvision.models as models

# Load pre-trained model

model = models.resnet50(pretrained=True)

# Freeze all layers

for param in model.parameters():

    param.requires_grad = False

# Replace the last layer

num_ftrs = model.fc.in_features

model.fc = nn.Linear(num_ftrs, num_classes)

# Move to device

model = model.to(device)

# Data transforms for pre-trained models

from torchvision import transforms

transform = transforms.Compose([

    transforms.Resize(256),

    transforms.CenterCrop(224),

    transforms.ToTensor(),

    transforms.Normalize(mean=[0.485, 0.456, 0.406], 

                    std=[0.229, 0.224, 0.225]),

])

Note: Pre-trained models expect specific input sizes and normalization. Always check the documentation for the correct preprocessing.

Object Detection

Traditional Methods

# Haar Cascades for face detection

face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Detect faces

faces = face_cascade.detectMultiScale(img_gray, 1.1, 4)

# Draw bounding boxes

for (x, y, w, h) in faces:

    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)

# HOG (Histogram of Oriented Gradients)

from skimage.feature import hog

from skimage import exposure

# Compute HOG features

fd, hog_image = hog(img_gray, orientations=8, pixels_per_cell=(16, 16),

    cells_per_block=(1, 1), visualize=True, channel_axis=None)

# Rescale histogram for better display

hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

Deep Learning Methods

Popular Detection Models

YOLO (You Only Look Once) - Real-time object detection

SSD (Single Shot Detector) - Balance of speed and accuracy

Faster R-CNN - Region-based with high accuracy

RetinaNet - Focal loss for class imbalance

EfficientDet - Efficient object detection

# Using YOLO with OpenCV

net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

# Get output layer names

layer_names = net.getLayerNames()

output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Create blob from image

blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

net.setInput(blob)

outs = net.forward(output_layers)

Note: Deep learning-based object detection requires pre-trained models which can be downloaded from model zoos or trained on custom datasets.

Advanced Features

Image Augmentation

# Using torchvision transforms

from torchvision import transforms

train_transform = transforms.Compose([

    transforms.Resize((256, 256)),

    transforms.RandomCrop(224),

    transforms.RandomHorizontalFlip(p=0.5),

    transforms.RandomRotation(degrees=15),

    transforms.ColorJitter(brightness=0.2, contrast=0.2, 

                        saturation=0.2, hue=0.1),

    transforms.ToTensor(),

    transforms.Normalize(mean=[0.485, 0.456, 0.406], 

                    std=[0.229, 0.224, 0.225]),

])

# Using Albumentations library

import albumentations as A

transform = A.Compose([

    A.RandomCrop(width=256, height=256),

    A.HorizontalFlip(p=0.5),

    A.RandomBrightnessContrast(p=0.2),

    A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),

    A.Rotate(limit=25, p=0.5),

])

Model Deployment

# Convert PyTorch model to ONNX

dummy_input = torch.randn(1, 3, 224, 224, device=device)

torch.onnx.export(model, dummy_input, "model.onnx", 

    input_names=['input'], output_names=['output'],

    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})

# Load ONNX model with OpenCV

net = cv2.dnn.readNetFromONNX('model.onnx')

# Inference with OpenCV DNN

blob = cv2.dnn.blobFromImage(img, 1.0, (224, 224), (104, 117, 123))

net.setInput(blob)

output = net.forward()

# Get prediction

class_id = np.argmax(output)

confidence = output[0][class_id]

Note: ONNX provides interoperability between different deep learning frameworks, making deployment easier across platforms.

Additional Resources

Learning Resources

Books: "Computer Vision: Algorithms and Applications", "Deep Learning for Computer Vision"
Courses: CS231n (Stanford), Fast.ai Computer Vision
Tutorials: OpenCV Official Tutorials, PyImageSearch
Documentation: OpenCV Docs, PyTorch Vision Docs, TensorFlow Object Detection API
Communities: OpenCV Forum, Stack Overflow, Reddit r/computervision

Useful Tools & Libraries

Image Processing: OpenCV, Scikit-image, Pillow
Deep Learning: PyTorch, TensorFlow, Keras
Augmentation: Albumentations, Imgaug, Torchvision Transforms
Visualization: Matplotlib, Seaborn, Plotly
Deployment: ONNX, TensorRT, OpenVINO, TorchServe