Files
citypulse/backend/test/Machine_Learning/train_ml.py
Cursor Agent 46dea3304f Refactor: Integrate backend API and normalize data
This commit integrates the backend API for fetching and updating report data. It also includes a normalization function to handle data consistency between the API and local storage.

Co-authored-by: anthonymuncher <anthonymuncher@gmail.com>
2025-09-26 10:27:39 +00:00

126 lines
4.3 KiB
Python

import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torch.cuda.amp import GradScaler, autocast
from torch.utils.tensorboard import SummaryWriter
import time
import psutil
# ---------- CONFIG ----------
DATA_DIR = "dataset" # dataset folder
BATCH_SIZE = 16
NUM_EPOCHS = 5
LR = 1e-4
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
NUM_CLASSES = 6 # pothole, streetlight, garbage
NUM_WORKERS = 10 # Windows-safe
# ---------- DATA ----------
train_transforms = transforms.Compose([
transforms.Resize((224, 224)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(15),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
transforms.ToTensor(),
])
val_transforms = transforms.Compose([
transforms.Resize((224, 224)),
transforms.ToTensor(),
])
train_dataset = datasets.ImageFolder(os.path.join(DATA_DIR, "train"), transform=train_transforms)
val_dataset = datasets.ImageFolder(os.path.join(DATA_DIR, "val"), transform=val_transforms)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
# ---------- MODEL ----------
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
model = model.to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)
scaler = GradScaler() # Mixed precision
# ---------- TENSORBOARD ----------
writer = SummaryWriter(log_dir="runs/streetlight_classification")
# ---------- DEBUG FUNCTIONS ----------
def print_gpu_memory():
if DEVICE.type == "cuda":
print(f"GPU Memory Allocated: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
print(f"GPU Memory Cached: {torch.cuda.memory_reserved()/1024**2:.2f} MB")
def print_cpu_memory():
mem = psutil.virtual_memory()
print(f"CPU Memory Usage: {mem.percent}% ({mem.used/1024**2:.2f}MB / {mem.total/1024**2:.2f}MB)")
# ---------- TRAINING FUNCTION ----------
def train_model(num_epochs):
best_acc = 0.0
for epoch in range(num_epochs):
start_time = time.time()
model.train()
running_loss = 0.0
for i, (inputs, labels) in enumerate(train_loader):
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
optimizer.zero_grad()
with autocast():
outputs = model(inputs)
loss = criterion(outputs, labels)
scaler.scale(loss).backward()
# Debug gradients for first batch
if i == 0 and epoch == 0:
for name, param in model.named_parameters():
if param.grad is not None:
print(f"Grad {name}: mean={param.grad.mean():.6f}, std={param.grad.std():.6f}")
scaler.step(optimizer)
scaler.update()
running_loss += loss.item()
if i % 10 == 0:
print(f"[Epoch {epoch+1}][Batch {i}/{len(train_loader)}] Loss: {loss.item():.4f}")
print_gpu_memory()
print_cpu_memory()
avg_loss = running_loss / len(train_loader)
# ---------- VALIDATION ----------
model.eval()
correct, total = 0, 0
with torch.no_grad():
for inputs, labels in val_loader:
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
correct += (preds == labels).sum().item()
total += labels.size(0)
val_acc = correct / total
print(f"Epoch [{epoch+1}/{num_epochs}] completed in {time.time()-start_time:.2f}s")
print(f"Train Loss: {avg_loss:.4f}, Val Accuracy: {val_acc:.4f}\n")
# TensorBoard logging
writer.add_scalar("Loss/train", avg_loss, epoch)
writer.add_scalar("Accuracy/val", val_acc, epoch)
# Save best model
if val_acc > best_acc:
best_acc = val_acc
torch.save(model.state_dict(), "best_model.pth")
print("✅ Saved best model.")
print(f"Training finished. Best Val Accuracy: {best_acc:.4f}")
if __name__ == "__main__":
train_model(NUM_EPOCHS)