Installation & Setup

Installation

# Install OpenCV
pip install opencv-python

# Install with contrib modules
pip install opencv-contrib-python

# Install computer vision libraries
pip install opencv-python matplotlib numpy scikit-image pillow

# Install deep learning frameworks
pip install torch torchvision tensorflow keras

# Install with conda
conda install opencv matplotlib numpy scikit-image pillow
Verification
import cv2
import numpy as np
import matplotlib.pyplot as plt

# Check OpenCV version
print(cv2.__version__)

# Check if image loads correctly
img = cv2.imread('test.jpg')
print(f'Image shape: {img.shape}')

Basic Setup

# Common imports
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.transforms as transforms

# Set device for PyTorch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)
Best Practice: Always check image dimensions and color channels when working with OpenCV (BGR format) vs Matplotlib (RGB format).

Image Basics

Image Loading & Display

# Read image
img = cv2.imread('image.jpg')

# Read image as grayscale
img_gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)

# Display image with OpenCV
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Display with Matplotlib (convert BGR to RGB)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
plt.axis('off')
plt.show()
Image Properties
# Get image properties
print(f'Shape: {img.shape}') # (height, width, channels)
print(f'Size: {img.size}') # total pixels
print(f'Data type: {img.dtype}')
print(f'Min value: {img.min()}')
print(f'Max value: {img.max()}')

Basic Operations

# Color space conversions
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Resize image
resized = cv2.resize(img, (new_width, new_height))
resized_fx = cv2.resize(img, None, fx=0.5, fy=0.5)

# Crop image
cropped = img[y1:y2, x1:x2]

# Rotate image
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated = cv2.warpAffine(img, M, (w, h))
# Drawing functions
# Draw line
cv2.line(img, (0, 0), (100, 100), (255, 0, 0), 5)

# Draw rectangle
cv2.rectangle(img, (50, 50), (200, 200), (0, 255, 0), 3)

# Draw circle
cv2.circle(img, (100, 100), 50, (0, 0, 255), -1)

# Add text
cv2.putText(img, 'Hello', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

Image Processing

Filters & Transformations

# Gaussian blur
blurred = cv2.GaussianBlur(img, (5, 5), 0)

# Median blur
median = cv2.medianBlur(img, 5)

# Bilateral filter
bilateral = cv2.bilateralFilter(img, 9, 75, 75)

# Edge detection
edges = cv2.Canny(img, 100, 200)

# Sobel derivatives
sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=5)
sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=5)
# Morphological operations
kernel = np.ones((5,5), np.uint8)

# Erosion
erosion = cv2.erode(img, kernel, iterations=1)

# Dilation
dilation = cv2.dilate(img, kernel, iterations=1)

# Opening (erosion followed by dilation)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)

# Closing (dilation followed by erosion)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)

Thresholding & Segmentation

# Simple thresholding
ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)

# Adaptive thresholding
thresh_adapt = cv2.adaptiveThreshold(img_gray, 255,
    cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)

# Otsu's thresholding
ret, thresh_otsu = cv2.threshold(img_gray, 0, 255,
    cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Contour detection
contours, hierarchy = cv2.findContours(thresh,
    cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

# Draw contours
contour_img = cv2.drawContours(img, contours, -1, (0, 255, 0), 3)

# Get contour properties
for cnt in contours:
    area = cv2.contourArea(cnt)
    perimeter = cv2.arcLength(cnt, True)
    approx = cv2.approxPolyDP(cnt, 0.02 * perimeter, True)
Color-based Segmentation
# Define color range in HSV
lower_blue = np.array([100, 50, 50])
upper_blue = np.array([130, 255, 255])

# Create mask
mask = cv2.inRange(img_hsv, lower_blue, upper_blue)

# Apply mask
result = cv2.bitwise_and(img, img, mask=mask)

Deep Learning for CV

CNN Architectures

# Simple CNN in PyTorch
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.dropout = nn.Dropout(0.5)
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x
Popular Architectures

ResNet - Residual Networks with skip connections

VGG - Very Deep Convolutional Networks

Inception - Multiple filter sizes in parallel

EfficientNet - Compound scaling for efficiency

Vision Transformer (ViT) - Transformer-based architecture

Transfer Learning

# Using pre-trained models in PyTorch
import torchvision.models as models

# Load pre-trained model
model = models.resnet50(pretrained=True)

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False

# Replace the last layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)

# Move to device
model = model.to(device)
# Data transforms for pre-trained models
from torchvision import transforms

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]),
])
Note: Pre-trained models expect specific input sizes and normalization. Always check the documentation for the correct preprocessing.

Object Detection

Traditional Methods

# Haar Cascades for face detection
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

# Detect faces
faces = face_cascade.detectMultiScale(img_gray, 1.1, 4)

# Draw bounding boxes
for (x, y, w, h) in faces:
    cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
# HOG (Histogram of Oriented Gradients)
from skimage.feature import hog
from skimage import exposure

# Compute HOG features
fd, hog_image = hog(img_gray, orientations=8, pixels_per_cell=(16, 16),
    cells_per_block=(1, 1), visualize=True, channel_axis=None)

# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))

Deep Learning Methods

Popular Detection Models

YOLO (You Only Look Once) - Real-time object detection

SSD (Single Shot Detector) - Balance of speed and accuracy

Faster R-CNN - Region-based with high accuracy

RetinaNet - Focal loss for class imbalance

EfficientDet - Efficient object detection

# Using YOLO with OpenCV
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')

# Get output layer names
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Create blob from image
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
Note: Deep learning-based object detection requires pre-trained models which can be downloaded from model zoos or trained on custom datasets.

Advanced Features

Image Augmentation

# Using torchvision transforms
from torchvision import transforms

train_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.RandomCrop(224),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2,
                        saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                    std=[0.229, 0.224, 0.225]),
])
# Using Albumentations library
import albumentations as A

transform = A.Compose([
    A.RandomCrop(width=256, height=256),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
    A.Rotate(limit=25, p=0.5),
])

Model Deployment

# Convert PyTorch model to ONNX
dummy_input = torch.randn(1, 3, 224, 224, device=device)
torch.onnx.export(model, dummy_input, "model.onnx",
    input_names=['input'], output_names=['output'],
    dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})

# Load ONNX model with OpenCV
net = cv2.dnn.readNetFromONNX('model.onnx')
# Inference with OpenCV DNN
blob = cv2.dnn.blobFromImage(img, 1.0, (224, 224), (104, 117, 123))
net.setInput(blob)
output = net.forward()

# Get prediction
class_id = np.argmax(output)
confidence = output[0][class_id]
Note: ONNX provides interoperability between different deep learning frameworks, making deployment easier across platforms.

Additional Resources

Learning Resources

  • Books: "Computer Vision: Algorithms and Applications", "Deep Learning for Computer Vision"
  • Courses: CS231n (Stanford), Fast.ai Computer Vision
  • Tutorials: OpenCV Official Tutorials, PyImageSearch
  • Documentation: OpenCV Docs, PyTorch Vision Docs, TensorFlow Object Detection API
  • Communities: OpenCV Forum, Stack Overflow, Reddit r/computervision

Useful Tools & Libraries

  • Image Processing: OpenCV, Scikit-image, Pillow
  • Deep Learning: PyTorch, TensorFlow, Keras
  • Augmentation: Albumentations, Imgaug, Torchvision Transforms
  • Visualization: Matplotlib, Seaborn, Plotly
  • Deployment: ONNX, TensorRT, OpenVINO, TorchServe