OpenCV

Vue d'ensemble

OpenCV (Open Source Computer Vision Library) est une bibliothèque de vision par ordinateur et d'apprentissage automatique open-source, optimisée pour les applications temps réel.

Philosophie

"Computer Vision for everyone - Rendre la vision par ordinateur accessible avec des outils performants et polyvalents."

Avantages clés

Performance : Optimisations C++ et parallélisation
Polyvalence : Images, vidéos, deep learning
Multi-plateforme : Windows, Linux, macOS, mobile
Intégrations : PyTorch, TensorFlow, MLflow

Installation et configuration

# Installation Python
pip install opencv-python opencv-contrib-python

# Vérification installation
python -c "import cv2; print(cv2.__version__)"

# Installation avec support GPU (CUDA)
pip install opencv-python[headless,contrib]

Traitement d'images de base

import cv2
import numpy as np
import matplotlib.pyplot as plt

# Lecture d'image
image = cv2.imread('image.jpg')
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Redimensionnement
resized = cv2.resize(image, (800, 600))

# Filtrage
blurred = cv2.GaussianBlur(image, (15, 15), 0)
edges = cv2.Canny(image, 100, 200)

# Transformations géométriques
rows, cols = image.shape[:2]
M = cv2.getRotationMatrix2D((cols/2, rows/2), 45, 1)
rotated = cv2.warpAffine(image, M, (cols, rows))

# Seuillage
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
_, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

# Morphologie
kernel = np.ones((5,5), np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)

Détection d'objets

# Détection de contours
def detect_contours(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150)

    contours, hierarchy = cv2.findContours(
        edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
    )

    # Filtrage par aire
    min_area = 1000
    filtered_contours = [c for c in contours if cv2.contourArea(c) > min_area]

    # Dessin des contours
    result = image.copy()
    cv2.drawContours(result, filtered_contours, -1, (0, 255, 0), 2)

    return result, filtered_contours

# Détection de cercles
def detect_circles(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    circles = cv2.HoughCircles(
        gray,
        cv2.HOUGH_GRADIENT,
        dp=1,
        minDist=30,
        param1=50,
        param2=30,
        minRadius=0,
        maxRadius=0
    )

    if circles is not None:
        circles = np.round(circles[0, :]).astype("int")
        result = image.copy()

        for (x, y, r) in circles:
            cv2.circle(result, (x, y), r, (0, 255, 0), 4)
            cv2.rectangle(result, (x - 5, y - 5), (x + 5, y + 5), (0, 128, 255), -1)

        return result, circles

    return image, []

# Détection de formes géométriques
def detect_shapes(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    thresh = cv2.threshold(blurred, 60, 255, cv2.THRESH_BINARY)[1]

    contours = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]

    shapes = []
    for contour in contours:
        # Approximation polygonale
        epsilon = 0.02 * cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, epsilon, True)

        # Classification par nombre de côtés
        vertices = len(approx)
        if vertices == 3:
            shape = "triangle"
        elif vertices == 4:
            shape = "rectangle"
        else:
            shape = "circle"

        shapes.append({
            'contour': contour,
            'shape': shape,
            'vertices': vertices,
            'area': cv2.contourArea(contour)
        })

    return shapes

Intégration avec PyTorch

import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader

class OpenCVDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Lecture avec OpenCV
        image = cv2.imread(self.image_paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Preprocessing avec OpenCV
        image = self.preprocess_opencv(image)

        # Conversion vers tensor PyTorch
        if self.transform:
            image = self.transform(image)
        else:
            image = torch.from_numpy(image).float().permute(2, 0, 1) / 255.0

        return image, self.labels[idx]

    def preprocess_opencv(self, image):
        # Redimensionnement
        image = cv2.resize(image, (224, 224))

        # Égalisation d'histogramme
        lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
        lab[:,:,0] = cv2.equalizeHist(lab[:,:,0])
        image = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)

        # Débruitage
        image = cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)

        return image

# Pipeline d'augmentation combiné
class CVAugmentation:
    def __init__(self):
        self.opencv_transforms = [
            self.random_brightness,
            self.random_contrast,
            self.random_gaussian_noise,
            self.random_motion_blur
        ]

    def random_brightness(self, image, prob=0.5):
        if np.random.random() < prob:
            value = np.random.randint(-50, 50)
            hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
            hsv[:,:,2] = cv2.add(hsv[:,:,2], value)
            image = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
        return image

    def random_contrast(self, image, prob=0.5):
        if np.random.random() < prob:
            alpha = np.random.uniform(0.5, 1.5)
            image = cv2.convertScaleAbs(image, alpha=alpha, beta=0)
        return image

    def random_gaussian_noise(self, image, prob=0.3):
        if np.random.random() < prob:
            noise = np.random.randint(0, 50, image.shape, dtype=np.uint8)
            image = cv2.add(image, noise)
        return image

    def random_motion_blur(self, image, prob=0.3):
        if np.random.random() < prob:
            size = np.random.randint(10, 20)
            kernel = np.zeros((size, size))
            kernel[int((size-1)/2), :] = np.ones(size)
            kernel = kernel / size
            image = cv2.filter2D(image, -1, kernel)
        return image

    def __call__(self, image):
        for transform in self.opencv_transforms:
            image = transform(image)
        return image

# Utilisation avec PyTorch
transform = transforms.Compose([
    CVAugmentation(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

dataset = OpenCVDataset(image_paths, labels, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

Traitement vidéo temps réel

class VideoProcessor:
    def __init__(self, source=0):
        self.cap = cv2.VideoCapture(source)
        self.fps = self.cap.get(cv2.CAP_PROP_FPS)

        # Détecteurs
        self.face_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
        )
        self.body_cascade = cv2.CascadeClassifier(
            cv2.data.haarcascades + 'haarcascade_fullbody.xml'
        )

    def detect_faces(self, frame):
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = self.face_cascade.detectMultiScale(
            gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)
        )

        for (x, y, w, h) in faces:
            cv2.rectangle(frame, (x, y), (x+w, y+h), (255, 0, 0), 2)
            cv2.putText(frame, 'Face', (x, y-10), 
                       cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

        return frame, len(faces)

    def optical_flow_tracking(self, frame):
        if not hasattr(self, 'old_frame'):
            self.old_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            return frame

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Détection de points caractéristiques
        corners = cv2.goodFeaturesToTrack(
            self.old_frame, maxCorners=100, qualityLevel=0.3, 
            minDistance=7, blockSize=7
        )

        if corners is not None:
            # Calcul du flot optique
            new_corners, status, error = cv2.calcOpticalFlowPyrLK(
                self.old_frame, gray, corners, None
            )

            # Dessin des trajectoires
            for i, (new, old) in enumerate(zip(new_corners, corners)):
                if status[i] == 1:
                    a, b = new.ravel().astype(int)
                    c, d = old.ravel().astype(int)
                    cv2.line(frame, (a, b), (c, d), (0, 255, 0), 2)
                    cv2.circle(frame, (a, b), 5, (0, 0, 255), -1)

        self.old_frame = gray.copy()
        return frame

    def process_stream(self):
        while True:
            ret, frame = self.cap.read()
            if not ret:
                break

            # Traitement du frame
            frame, face_count = self.detect_faces(frame)
            frame = self.optical_flow_tracking(frame)

            # Affichage des informations
            cv2.putText(frame, f'Faces: {face_count}', (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            cv2.putText(frame, f'FPS: {self.fps:.1f}', (10, 70),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            # Affichage
            cv2.imshow('Video Processing', frame)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        self.cap.release()
        cv2.destroyAllWindows()

# Utilisation
processor = VideoProcessor()
processor.process_stream()

Intégration avec MLflow

import mlflow
import mlflow.opencv

class ComputerVisionExperiment:
    def __init__(self, experiment_name):
        mlflow.set_experiment(experiment_name)
        self.metrics = {}

    def preprocess_dataset(self, image_paths, params):
        with mlflow.start_run(run_name="preprocessing"):
            mlflow.log_params(params)

            processed_images = []
            processing_times = []

            for path in image_paths:
                start_time = time.time()

                # Lecture et preprocessing
                image = cv2.imread(path)
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                # Redimensionnement
                if params.get('resize'):
                    image = cv2.resize(image, params['target_size'])

                # Débruitage
                if params.get('denoise'):
                    image = cv2.fastNlMeansDenoisingColored(image)

                # Égalisation
                if params.get('equalize'):
                    lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
                    lab[:,:,0] = cv2.equalizeHist(lab[:,:,0])
                    image = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)

                processed_images.append(image)
                processing_times.append(time.time() - start_time)

            # Métriques de preprocessing
            avg_time = np.mean(processing_times)
            mlflow.log_metrics({
                "avg_preprocessing_time": avg_time,
                "total_images": len(processed_images),
                "images_per_second": 1.0 / avg_time
            })

            # Sauvegarde d'exemples
            sample_before = cv2.imread(image_paths[0])
            sample_after = processed_images[0]

            cv2.imwrite("sample_before.jpg", sample_before)
            cv2.imwrite("sample_after.jpg", sample_after)

            mlflow.log_artifact("sample_before.jpg")
            mlflow.log_artifact("sample_after.jpg")

            return processed_images

    def evaluate_detection_algorithm(self, algorithm, test_images, ground_truth):
        with mlflow.start_run(run_name=f"detection_{algorithm.__name__}"):
            true_positives = 0
            false_positives = 0
            false_negatives = 0
            detection_times = []

            for image, gt in zip(test_images, ground_truth):
                start_time = time.time()
                detections = algorithm(image)
                detection_time = time.time() - start_time
                detection_times.append(detection_time)

                # Calcul métriques de détection
                tp, fp, fn = self.calculate_detection_metrics(detections, gt)
                true_positives += tp
                false_positives += fp
                false_negatives += fn

            # Métriques finales
            precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
            recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
            f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
            avg_detection_time = np.mean(detection_times)

            metrics = {
                "precision": precision,
                "recall": recall,
                "f1_score": f1_score,
                "avg_detection_time": avg_detection_time,
                "detections_per_second": 1.0 / avg_detection_time
            }

            mlflow.log_metrics(metrics)

            return metrics

Applications avec drones

# Vision pour drones - détection de terrain d'atterrissage
class DroneVision:
    def __init__(self):
        self.landing_detector = self.setup_landing_detector()

    def setup_landing_detector(self):
        # Détecteur de zones planes pour atterrissage
        return cv2.HoughCircles

    def detect_landing_zones(self, aerial_image):
        """Détection de zones d'atterrissage sûres"""
        gray = cv2.cvtColor(aerial_image, cv2.COLOR_BGR2GRAY)

        # Détection de zones planes (faible gradient)
        sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
        sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
        gradient_magnitude = np.sqrt(sobelx**2 + sobely**2)

        # Zones avec faible gradient = terrain plat
        flat_mask = gradient_magnitude < 30

        # Morphologie pour nettoyer
        kernel = np.ones((20, 20), np.uint8)
        flat_mask = cv2.morphologyEx(flat_mask.astype(np.uint8), cv2.MORPH_OPEN, kernel)

        # Détection de contours des zones plates
        contours, _ = cv2.findContours(flat_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        landing_zones = []
        for contour in contours:
            area = cv2.contourArea(contour)
            if area > 10000:  # Zone suffisamment grande
                # Calculer centroïde
                M = cv2.moments(contour)
                if M["m00"] != 0:
                    cx = int(M["m10"] / M["m00"])
                    cy = int(M["m01"] / M["m00"])

                    landing_zones.append({
                        'center': (cx, cy),
                        'area': area,
                        'contour': contour,
                        'safety_score': self.calculate_safety_score(aerial_image, contour)
                    })

        return sorted(landing_zones, key=lambda x: x['safety_score'], reverse=True)

    def calculate_safety_score(self, image, contour):
        """Calcul du score de sécurité d'une zone d'atterrissage"""
        mask = np.zeros(image.shape[:2], dtype=np.uint8)
        cv2.fillPoly(mask, [contour], 255)

        # Analyse de la zone
        roi = cv2.bitwise_and(image, image, mask=mask)
        gray_roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)

        # Critères de sécurité
        uniformity = 1.0 - (np.std(gray_roi[gray_roi > 0]) / 255.0)  # Uniformité
        area_score = min(cv2.contourArea(contour) / 50000.0, 1.0)    # Taille

        return (uniformity * 0.6 + area_score * 0.4) * 100

# Exemple d'utilisation
drone_vision = DroneVision()
aerial_img = cv2.imread('drone_view.jpg')
safe_zones = drone_vision.detect_landing_zones(aerial_img)

print(f"Zones d'atterrissage détectées: {len(safe_zones)}")
for i, zone in enumerate(safe_zones[:3]):  # Top 3
    print(f"Zone {i+1}: Score {zone['safety_score']:.1f}, Centre {zone['center']}")

Ressources

Documentation : docs.opencv.org
Tutorials : opencv-python-tutroals.readthedocs.io
GitHub : github.com/opencv/opencv