PyTorch

Vue d'ensemble

PyTorch est un framework de deep learning open-source développé par Meta/Facebook, privilégiant la flexibilité et la facilité d'utilisation pour la recherche et la production.

Philosophie

"From research to production - Framework flexible permettant un prototypage rapide et un déploiement en production."

Avantages clés

Dynamic computation graphs : Graphes construits à la volée
Pythonic : API intuitive et naturelle
GPU acceleration : Support CUDA natif
Research-friendly : Debugging et expérimentation faciles

Concepts fondamentaux

Tensors

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

# Création de tensors
x = torch.tensor([1, 2, 3, 4], dtype=torch.float32)
y = torch.zeros(3, 4)
z = torch.randn(2, 3, requires_grad=True)

# Opérations sur tensors
a = torch.randn(3, 4, requires_grad=True)
b = torch.randn(3, 4, requires_grad=True)
c = a + b
d = c.sum()

# Gradient automatique
d.backward()
print(a.grad)  # Gradients de 'a'

# GPU support
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
x_gpu = x.to(device)

Réseaux de neurones

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        out = self.relu(self.fc1(x))
        out = self.dropout(out)
        out = self.relu(self.fc2(out))
        out = self.dropout(out)
        out = self.fc3(out)
        return out

# Instanciation
model = MLP(input_size=784, hidden_size=512, num_classes=10)
model = model.to(device)

# Affichage de l'architecture
print(model)

Vision par ordinateur avec OpenCV

import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import cv2
import numpy as np
from torch.utils.data import Dataset, DataLoader

class OpenCVDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Lecture avec OpenCV
        image = cv2.imread(self.image_paths[idx])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Preprocessing OpenCV
        image = self.preprocess_opencv(image)

        # Conversion vers tensor PyTorch
        image = torch.from_numpy(image).float().permute(2, 0, 1) / 255.0

        # Transforms PyTorch
        if self.transform:
            image = self.transform(image)

        return image, torch.tensor(self.labels[idx], dtype=torch.long)

    def preprocess_opencv(self, image):
        # Redimensionnement
        image = cv2.resize(image, (224, 224))

        # Amélioration de contraste
        lab = cv2.cvtColor(image, cv2.COLOR_RGB2LAB)
        lab[:,:,0] = cv2.equalizeHist(lab[:,:,0])
        image = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)

        # Débruitage
        image = cv2.bilateralFilter(image, 9, 75, 75)

        return image

# CNN pour classification d'images
class ImageClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super(ImageClassifier, self).__init__()

        # Couches convolutionnelles
        self.conv_block1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        self.conv_block2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        self.conv_block3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2)
        )

        # Couches fully connected
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(128 * 28 * 28, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        x = self.conv_block1(x)
        x = self.conv_block2(x)
        x = self.conv_block3(x)
        x = x.view(x.size(0), -1)  # Flatten
        x = self.classifier(x)
        return x

# Entraînement
def train_model(model, train_loader, val_loader, num_epochs=10):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = model.to(device)

    for epoch in range(num_epochs):
        # Phase d'entraînement
        model.train()
        running_loss = 0.0
        correct_train = 0
        total_train = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        # Phase de validation
        model.eval()
        correct_val = 0
        total_val = 0
        val_loss = 0.0

        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        scheduler.step()

        train_acc = 100 * correct_train / total_train
        val_acc = 100 * correct_val / total_val

        print(f'Epoch [{epoch+1}/{num_epochs}]')
        print(f'Train Loss: {running_loss/len(train_loader):.4f}, Train Acc: {train_acc:.2f}%')
        print(f'Val Loss: {val_loss/len(val_loader):.4f}, Val Acc: {val_acc:.2f}%')
        print('-' * 50)

    return model

Intégration avec MLflow

import mlflow
import mlflow.pytorch
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

class MLflowPyTorchTrainer:
    def __init__(self, experiment_name):
        mlflow.set_experiment(experiment_name)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    def train_with_mlflow(self, model, train_loader, val_loader, config):
        with mlflow.start_run():
            # Log des hyperparamètres
            mlflow.log_params(config)
            mlflow.log_param("device", str(self.device))
            mlflow.log_param("model_params", sum(p.numel() for p in model.parameters()))

            # Configuration entraînement
            criterion = nn.CrossEntropyLoss()
            optimizer = optim.Adam(
                model.parameters(), 
                lr=config['learning_rate'],
                weight_decay=config['weight_decay']
            )
            scheduler = optim.lr_scheduler.StepLR(
                optimizer, 
                step_size=config['scheduler_step'], 
                gamma=config['scheduler_gamma']
            )

            model = model.to(self.device)
            best_val_acc = 0.0

            for epoch in range(config['num_epochs']):
                # Entraînement
                train_loss, train_acc = self._train_epoch(
                    model, train_loader, criterion, optimizer
                )

                # Validation
                val_loss, val_acc = self._validate_epoch(
                    model, val_loader, criterion
                )

                scheduler.step()

                # Log des métriques
                mlflow.log_metrics({
                    "train_loss": train_loss,
                    "train_accuracy": train_acc,
                    "val_loss": val_loss,
                    "val_accuracy": val_acc,
                    "learning_rate": optimizer.param_groups[0]['lr']
                }, step=epoch)

                # Sauvegarde du meilleur modèle
                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    torch.save(model.state_dict(), "best_model.pth")
                    mlflow.log_artifact("best_model.pth")

                print(f'Epoch {epoch+1}: Train Acc: {train_acc:.2f}%, Val Acc: {val_acc:.2f}%')

            # Évaluation finale
            self._final_evaluation(model, val_loader)

            # Enregistrement du modèle
            mlflow.pytorch.log_model(
                model, 
                "model",
                registered_model_name=config.get('model_name', 'pytorch-classifier')
            )

            return model

    def _train_epoch(self, model, loader, criterion, optimizer):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for images, labels in loader:
            images, labels = images.to(self.device), labels.to(self.device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        return running_loss / len(loader), 100 * correct / total

    def _validate_epoch(self, model, loader, criterion):
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for images, labels in loader:
                images, labels = images.to(self.device), labels.to(self.device)
                outputs = model(images)
                loss = criterion(outputs, labels)

                running_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        return running_loss / len(loader), 100 * correct / total

    def _final_evaluation(self, model, test_loader):
        model.eval()
        all_predictions = []
        all_labels = []

        with torch.no_grad():
            for images, labels in test_loader:
                images = images.to(self.device)
                outputs = model(images)
                _, predicted = torch.max(outputs, 1)

                all_predictions.extend(predicted.cpu().numpy())
                all_labels.extend(labels.numpy())

        # Classification report
        report = classification_report(all_labels, all_predictions, output_dict=True)

        # Log des métriques finales
        mlflow.log_metrics({
            "final_precision": report['macro avg']['precision'],
            "final_recall": report['macro avg']['recall'],
            "final_f1": report['macro avg']['f1-score']
        })

        # Matrice de confusion
        cm = confusion_matrix(all_labels, all_predictions)
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.savefig('confusion_matrix.png')
        mlflow.log_artifact('confusion_matrix.png')
        plt.close()

# Utilisation
config = {
    'learning_rate': 0.001,
    'weight_decay': 1e-4,
    'num_epochs': 50,
    'scheduler_step': 15,
    'scheduler_gamma': 0.1,
    'model_name': 'image-classifier-v1'
}

trainer = MLflowPyTorchTrainer("computer-vision-experiments")
model = ImageClassifier(num_classes=10)
trained_model = trainer.train_with_mlflow(model, train_loader, val_loader, config)

Déploiement avec KubeFlow

# kubeflow-pytorch-component.py
import kfp
from kfp.components import create_component_from_func
from typing import NamedTuple

@create_component_from_func
def pytorch_training_component(
    data_path: str,
    model_output_path: str,
    learning_rate: float = 0.001,
    batch_size: int = 32,
    num_epochs: int = 10,
    gpu_limit: str = "1"
) -> NamedTuple('Outputs', [('model_path', str), ('accuracy', float)]):
    """Composant Kubeflow pour entraînement PyTorch"""

    import torch
    import torch.nn as nn
    import torch.optim as optim
    from torch.utils.data import DataLoader
    import os

    # Configuration GPU
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Training on device: {device}")

    # Chargement des données
    # (code de chargement des données)

    # Définition du modèle
    class CNN(nn.Module):
        def __init__(self, num_classes=10):
            super(CNN, self).__init__()
            self.features = nn.Sequential(
                nn.Conv2d(3, 64, 3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(64, 128, 3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2, 2),
                nn.Conv2d(128, 256, 3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2, 2),
            )
            self.classifier = nn.Sequential(
                nn.Dropout(),
                nn.Linear(256 * 28 * 28, 512),
                nn.ReLU(inplace=True),
                nn.Dropout(),
                nn.Linear(512, num_classes),
            )

        def forward(self, x):
            x = self.features(x)
            x = x.view(x.size(0), -1)
            x = self.classifier(x)
            return x

    # Entraînement
    model = CNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    # Boucle d'entraînement (simplifiée)
    model.train()
    for epoch in range(num_epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

    # Évaluation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    accuracy = 100 * correct / total

    # Sauvegarde
    os.makedirs(os.path.dirname(model_output_path), exist_ok=True)
    torch.save(model.state_dict(), model_output_path)

    return (model_output_path, accuracy)

# Pipeline Kubeflow
@kfp.dsl.pipeline(name='PyTorch Training Pipeline')
def pytorch_pipeline(
    data_path: str = '/data/training_data',
    learning_rate: float = 0.001,
    batch_size: int = 32,
    num_epochs: int = 50
):
    # Composant d'entraînement
    train_op = pytorch_training_component(
        data_path=data_path,
        model_output_path='/models/pytorch_model.pth',
        learning_rate=learning_rate,
        batch_size=batch_size,
        num_epochs=num_epochs
    )

    # Configuration GPU
    train_op.set_gpu_limit('1')
    train_op.set_memory_limit('8Gi')
    train_op.set_cpu_limit('4')

    # Ajout d'image personnalisée
    train_op.container.set_image('pytorch/pytorch:1.12.0-cuda11.3-cudnn8-runtime')

Optimisation et performance

# Techniques d'optimisation PyTorch

# 1. Mixed Precision Training
from torch.cuda.amp import GradScaler, autocast

def train_with_mixed_precision(model, train_loader, criterion, optimizer):
    scaler = GradScaler()

    for images, labels in train_loader:
        optimizer.zero_grad()

        # Forward pass avec autocast
        with autocast():
            outputs = model(images)
            loss = criterion(outputs, labels)

        # Backward pass avec scaling
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

# 2. DataParallel pour multi-GPU
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs")
    model = nn.DataParallel(model)

# 3. Optimisation mémoire
torch.backends.cudnn.benchmark = True  # Optimise pour tailles fixes
torch.backends.cudnn.deterministic = False  # Non-déterministe mais plus rapide

# 4. Gradient clipping
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

# 5. Learning rate scheduling avancé
class WarmupScheduler:
    def __init__(self, optimizer, warmup_steps, total_steps):
        self.optimizer = optimizer
        self.warmup_steps = warmup_steps
        self.total_steps = total_steps
        self.step_count = 0

    def step(self):
        self.step_count += 1
        if self.step_count <= self.warmup_steps:
            # Warmup phase
            lr = self.step_count / self.warmup_steps * 0.001
        else:
            # Cosine annealing
            progress = (self.step_count - self.warmup_steps) / (self.total_steps - self.warmup_steps)
            lr = 0.001 * 0.5 * (1 + np.cos(np.pi * progress))

        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr

Ressources

Documentation : pytorch.org/docs
Tutorials : pytorch.org/tutorials
GitHub : github.com/pytorch/pytorch