Installation & Setup
Installation
# Install OpenCV
pip install opencv-python
# Install with contrib modules
pip install opencv-contrib-python
# Install computer vision libraries
pip install opencv-python matplotlib numpy scikit-image pillow
# Install deep learning frameworks
pip install torch torchvision tensorflow keras
# Install with conda
conda install opencv matplotlib numpy scikit-image pillow
pip install opencv-python
# Install with contrib modules
pip install opencv-contrib-python
# Install computer vision libraries
pip install opencv-python matplotlib numpy scikit-image pillow
# Install deep learning frameworks
pip install torch torchvision tensorflow keras
# Install with conda
conda install opencv matplotlib numpy scikit-image pillow
Verification
import cv2
import numpy as np
import matplotlib.pyplot as plt
# Check OpenCV version
print(cv2.__version__)
# Check if image loads correctly
img = cv2.imread('test.jpg')
print(f'Image shape: {img.shape}')
import numpy as np
import matplotlib.pyplot as plt
# Check OpenCV version
print(cv2.__version__)
# Check if image loads correctly
img = cv2.imread('test.jpg')
print(f'Image shape: {img.shape}')
Basic Setup
# Common imports
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.transforms as transforms
# Set device for PyTorch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)
import cv2
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torchvision.transforms as transforms
# Set device for PyTorch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')
# Set random seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)
Best Practice: Always check image dimensions and color channels when working with OpenCV (BGR format) vs Matplotlib (RGB format).
Image Basics
Image Loading & Display
# Read image
img = cv2.imread('image.jpg')
# Read image as grayscale
img_gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)
# Display image with OpenCV
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Display with Matplotlib (convert BGR to RGB)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
plt.axis('off')
plt.show()
img = cv2.imread('image.jpg')
# Read image as grayscale
img_gray = cv2.imread('image.jpg', cv2.IMREAD_GRAYSCALE)
# Display image with OpenCV
cv2.imshow('Image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
# Display with Matplotlib (convert BGR to RGB)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
plt.axis('off')
plt.show()
Image Properties
# Get image properties
print(f'Shape: {img.shape}') # (height, width, channels)
print(f'Size: {img.size}') # total pixels
print(f'Data type: {img.dtype}')
print(f'Min value: {img.min()}')
print(f'Max value: {img.max()}')
print(f'Shape: {img.shape}') # (height, width, channels)
print(f'Size: {img.size}') # total pixels
print(f'Data type: {img.dtype}')
print(f'Min value: {img.min()}')
print(f'Max value: {img.max()}')
Basic Operations
# Color space conversions
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Resize image
resized = cv2.resize(img, (new_width, new_height))
resized_fx = cv2.resize(img, None, fx=0.5, fy=0.5)
# Crop image
cropped = img[y1:y2, x1:x2]
# Rotate image
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated = cv2.warpAffine(img, M, (w, h))
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Resize image
resized = cv2.resize(img, (new_width, new_height))
resized_fx = cv2.resize(img, None, fx=0.5, fy=0.5)
# Crop image
cropped = img[y1:y2, x1:x2]
# Rotate image
(h, w) = img.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, 45, 1.0)
rotated = cv2.warpAffine(img, M, (w, h))
# Drawing functions
# Draw line
cv2.line(img, (0, 0), (100, 100), (255, 0, 0), 5)
# Draw rectangle
cv2.rectangle(img, (50, 50), (200, 200), (0, 255, 0), 3)
# Draw circle
cv2.circle(img, (100, 100), 50, (0, 0, 255), -1)
# Add text
cv2.putText(img, 'Hello', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
# Draw line
cv2.line(img, (0, 0), (100, 100), (255, 0, 0), 5)
# Draw rectangle
cv2.rectangle(img, (50, 50), (200, 200), (0, 255, 0), 3)
# Draw circle
cv2.circle(img, (100, 100), 50, (0, 0, 255), -1)
# Add text
cv2.putText(img, 'Hello', (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
Image Processing
Filters & Transformations
# Gaussian blur
blurred = cv2.GaussianBlur(img, (5, 5), 0)
# Median blur
median = cv2.medianBlur(img, 5)
# Bilateral filter
bilateral = cv2.bilateralFilter(img, 9, 75, 75)
# Edge detection
edges = cv2.Canny(img, 100, 200)
# Sobel derivatives
sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=5)
sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=5)
blurred = cv2.GaussianBlur(img, (5, 5), 0)
# Median blur
median = cv2.medianBlur(img, 5)
# Bilateral filter
bilateral = cv2.bilateralFilter(img, 9, 75, 75)
# Edge detection
edges = cv2.Canny(img, 100, 200)
# Sobel derivatives
sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=5)
sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=5)
# Morphological operations
kernel = np.ones((5,5), np.uint8)
# Erosion
erosion = cv2.erode(img, kernel, iterations=1)
# Dilation
dilation = cv2.dilate(img, kernel, iterations=1)
# Opening (erosion followed by dilation)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
# Closing (dilation followed by erosion)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
kernel = np.ones((5,5), np.uint8)
# Erosion
erosion = cv2.erode(img, kernel, iterations=1)
# Dilation
dilation = cv2.dilate(img, kernel, iterations=1)
# Opening (erosion followed by dilation)
opening = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
# Closing (dilation followed by erosion)
closing = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel)
Thresholding & Segmentation
# Simple thresholding
ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)
# Adaptive thresholding
thresh_adapt = cv2.adaptiveThreshold(img_gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
# Otsu's thresholding
ret, thresh_otsu = cv2.threshold(img_gray, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)
ret, thresh = cv2.threshold(img_gray, 127, 255, cv2.THRESH_BINARY)
# Adaptive thresholding
thresh_adapt = cv2.adaptiveThreshold(img_gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
# Otsu's thresholding
ret, thresh_otsu = cv2.threshold(img_gray, 0, 255,
cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# Contour detection
contours, hierarchy = cv2.findContours(thresh,
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Draw contours
contour_img = cv2.drawContours(img, contours, -1, (0, 255, 0), 3)
# Get contour properties
for cnt in contours:
area = cv2.contourArea(cnt)
perimeter = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02 * perimeter, True)
contours, hierarchy = cv2.findContours(thresh,
cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# Draw contours
contour_img = cv2.drawContours(img, contours, -1, (0, 255, 0), 3)
# Get contour properties
for cnt in contours:
area = cv2.contourArea(cnt)
perimeter = cv2.arcLength(cnt, True)
approx = cv2.approxPolyDP(cnt, 0.02 * perimeter, True)
Color-based Segmentation
# Define color range in HSV
lower_blue = np.array([100, 50, 50])
upper_blue = np.array([130, 255, 255])
# Create mask
mask = cv2.inRange(img_hsv, lower_blue, upper_blue)
# Apply mask
result = cv2.bitwise_and(img, img, mask=mask)
lower_blue = np.array([100, 50, 50])
upper_blue = np.array([130, 255, 255])
# Create mask
mask = cv2.inRange(img_hsv, lower_blue, upper_blue)
# Apply mask
result = cv2.bitwise_and(img, img, mask=mask)
Deep Learning for CV
CNN Architectures
# Simple CNN in PyTorch
class SimpleCNN(nn.Module):
def __init__(self, num_classes=10):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 8 * 8, 128)
self.fc2 = nn.Linear(128, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 64 * 8 * 8)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
class SimpleCNN(nn.Module):
def __init__(self, num_classes=10):
super(SimpleCNN, self).__init__()
self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(64 * 8 * 8, 128)
self.fc2 = nn.Linear(128, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 64 * 8 * 8)
x = F.relu(self.fc1(x))
x = self.dropout(x)
x = self.fc2(x)
return x
Popular Architectures
ResNet - Residual Networks with skip connections
VGG - Very Deep Convolutional Networks
Inception - Multiple filter sizes in parallel
EfficientNet - Compound scaling for efficiency
Vision Transformer (ViT) - Transformer-based architecture
Transfer Learning
# Using pre-trained models in PyTorch
import torchvision.models as models
# Load pre-trained model
model = models.resnet50(pretrained=True)
# Freeze all layers
for param in model.parameters():
param.requires_grad = False
# Replace the last layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)
# Move to device
model = model.to(device)
import torchvision.models as models
# Load pre-trained model
model = models.resnet50(pretrained=True)
# Freeze all layers
for param in model.parameters():
param.requires_grad = False
# Replace the last layer
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, num_classes)
# Move to device
model = model.to(device)
# Data transforms for pre-trained models
from torchvision import transforms
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
from torchvision import transforms
transform = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
Note: Pre-trained models expect specific input sizes and normalization. Always check the documentation for the correct preprocessing.
Object Detection
Traditional Methods
# Haar Cascades for face detection
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# Detect faces
faces = face_cascade.detectMultiScale(img_gray, 1.1, 4)
# Draw bounding boxes
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# Detect faces
faces = face_cascade.detectMultiScale(img_gray, 1.1, 4)
# Draw bounding boxes
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x+w, y+h), (255, 0, 0), 2)
# HOG (Histogram of Oriented Gradients)
from skimage.feature import hog
from skimage import exposure
# Compute HOG features
fd, hog_image = hog(img_gray, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualize=True, channel_axis=None)
# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))
from skimage.feature import hog
from skimage import exposure
# Compute HOG features
fd, hog_image = hog(img_gray, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualize=True, channel_axis=None)
# Rescale histogram for better display
hog_image_rescaled = exposure.rescale_intensity(hog_image, in_range=(0, 10))
Deep Learning Methods
Popular Detection Models
YOLO (You Only Look Once) - Real-time object detection
SSD (Single Shot Detector) - Balance of speed and accuracy
Faster R-CNN - Region-based with high accuracy
RetinaNet - Focal loss for class imbalance
EfficientDet - Efficient object detection
# Using YOLO with OpenCV
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
# Get output layer names
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# Create blob from image
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
net = cv2.dnn.readNet('yolov3.weights', 'yolov3.cfg')
# Get output layer names
layer_names = net.getLayerNames()
output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# Create blob from image
blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)
net.setInput(blob)
outs = net.forward(output_layers)
Note: Deep learning-based object detection requires pre-trained models which can be downloaded from model zoos or trained on custom datasets.
Advanced Features
Image Augmentation
# Using torchvision transforms
from torchvision import transforms
train_transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomRotation(degrees=15),
transforms.ColorJitter(brightness=0.2, contrast=0.2,
saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
from torchvision import transforms
train_transform = transforms.Compose([
transforms.Resize((256, 256)),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(p=0.5),
transforms.RandomRotation(degrees=15),
transforms.ColorJitter(brightness=0.2, contrast=0.2,
saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),
])
# Using Albumentations library
import albumentations as A
transform = A.Compose([
A.RandomCrop(width=256, height=256),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
A.Rotate(limit=25, p=0.5),
])
import albumentations as A
transform = A.Compose([
A.RandomCrop(width=256, height=256),
A.HorizontalFlip(p=0.5),
A.RandomBrightnessContrast(p=0.2),
A.GaussNoise(var_limit=(10.0, 50.0), p=0.3),
A.Rotate(limit=25, p=0.5),
])
Model Deployment
# Convert PyTorch model to ONNX
dummy_input = torch.randn(1, 3, 224, 224, device=device)
torch.onnx.export(model, dummy_input, "model.onnx",
input_names=['input'], output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})
# Load ONNX model with OpenCV
net = cv2.dnn.readNetFromONNX('model.onnx')
dummy_input = torch.randn(1, 3, 224, 224, device=device)
torch.onnx.export(model, dummy_input, "model.onnx",
input_names=['input'], output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}})
# Load ONNX model with OpenCV
net = cv2.dnn.readNetFromONNX('model.onnx')
# Inference with OpenCV DNN
blob = cv2.dnn.blobFromImage(img, 1.0, (224, 224), (104, 117, 123))
net.setInput(blob)
output = net.forward()
# Get prediction
class_id = np.argmax(output)
confidence = output[0][class_id]
blob = cv2.dnn.blobFromImage(img, 1.0, (224, 224), (104, 117, 123))
net.setInput(blob)
output = net.forward()
# Get prediction
class_id = np.argmax(output)
confidence = output[0][class_id]
Note: ONNX provides interoperability between different deep learning frameworks, making deployment easier across platforms.
Additional Resources
Learning Resources
- Books: "Computer Vision: Algorithms and Applications", "Deep Learning for Computer Vision"
- Courses: CS231n (Stanford), Fast.ai Computer Vision
- Tutorials: OpenCV Official Tutorials, PyImageSearch
- Documentation: OpenCV Docs, PyTorch Vision Docs, TensorFlow Object Detection API
- Communities: OpenCV Forum, Stack Overflow, Reddit r/computervision
Useful Tools & Libraries
- Image Processing: OpenCV, Scikit-image, Pillow
- Deep Learning: PyTorch, TensorFlow, Keras
- Augmentation: Albumentations, Imgaug, Torchvision Transforms
- Visualization: Matplotlib, Seaborn, Plotly
- Deployment: ONNX, TensorRT, OpenVINO, TorchServe