This commit integrates the backend API for fetching and updating report data. It also includes a normalization function to handle data consistency between the API and local storage. Co-authored-by: anthonymuncher <anthonymuncher@gmail.com>
126 lines
4.3 KiB
Python
126 lines
4.3 KiB
Python
import os
|
|
import torch
|
|
from torch import nn, optim
|
|
from torch.utils.data import DataLoader
|
|
from torchvision import datasets, transforms, models
|
|
from torch.cuda.amp import GradScaler, autocast
|
|
from torch.utils.tensorboard import SummaryWriter
|
|
import time
|
|
import psutil
|
|
|
|
# ---------- CONFIG ----------
|
|
DATA_DIR = "dataset" # dataset folder
|
|
BATCH_SIZE = 16
|
|
NUM_EPOCHS = 5
|
|
LR = 1e-4
|
|
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
NUM_CLASSES = 6 # pothole, streetlight, garbage
|
|
NUM_WORKERS = 10 # Windows-safe
|
|
|
|
# ---------- DATA ----------
|
|
train_transforms = transforms.Compose([
|
|
transforms.Resize((224, 224)),
|
|
transforms.RandomHorizontalFlip(),
|
|
transforms.RandomRotation(15),
|
|
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
|
|
transforms.ToTensor(),
|
|
])
|
|
|
|
val_transforms = transforms.Compose([
|
|
transforms.Resize((224, 224)),
|
|
transforms.ToTensor(),
|
|
])
|
|
|
|
train_dataset = datasets.ImageFolder(os.path.join(DATA_DIR, "train"), transform=train_transforms)
|
|
val_dataset = datasets.ImageFolder(os.path.join(DATA_DIR, "val"), transform=val_transforms)
|
|
|
|
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
|
|
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
|
|
|
|
# ---------- MODEL ----------
|
|
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
|
|
model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
|
|
model = model.to(DEVICE)
|
|
|
|
criterion = nn.CrossEntropyLoss()
|
|
optimizer = optim.Adam(model.parameters(), lr=LR)
|
|
scaler = GradScaler() # Mixed precision
|
|
|
|
# ---------- TENSORBOARD ----------
|
|
writer = SummaryWriter(log_dir="runs/streetlight_classification")
|
|
|
|
# ---------- DEBUG FUNCTIONS ----------
|
|
def print_gpu_memory():
|
|
if DEVICE.type == "cuda":
|
|
print(f"GPU Memory Allocated: {torch.cuda.memory_allocated()/1024**2:.2f} MB")
|
|
print(f"GPU Memory Cached: {torch.cuda.memory_reserved()/1024**2:.2f} MB")
|
|
|
|
def print_cpu_memory():
|
|
mem = psutil.virtual_memory()
|
|
print(f"CPU Memory Usage: {mem.percent}% ({mem.used/1024**2:.2f}MB / {mem.total/1024**2:.2f}MB)")
|
|
|
|
# ---------- TRAINING FUNCTION ----------
|
|
def train_model(num_epochs):
|
|
best_acc = 0.0
|
|
for epoch in range(num_epochs):
|
|
start_time = time.time()
|
|
model.train()
|
|
running_loss = 0.0
|
|
|
|
for i, (inputs, labels) in enumerate(train_loader):
|
|
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
|
|
optimizer.zero_grad()
|
|
|
|
with autocast():
|
|
outputs = model(inputs)
|
|
loss = criterion(outputs, labels)
|
|
|
|
scaler.scale(loss).backward()
|
|
|
|
# Debug gradients for first batch
|
|
if i == 0 and epoch == 0:
|
|
for name, param in model.named_parameters():
|
|
if param.grad is not None:
|
|
print(f"Grad {name}: mean={param.grad.mean():.6f}, std={param.grad.std():.6f}")
|
|
|
|
scaler.step(optimizer)
|
|
scaler.update()
|
|
running_loss += loss.item()
|
|
|
|
if i % 10 == 0:
|
|
print(f"[Epoch {epoch+1}][Batch {i}/{len(train_loader)}] Loss: {loss.item():.4f}")
|
|
print_gpu_memory()
|
|
print_cpu_memory()
|
|
|
|
avg_loss = running_loss / len(train_loader)
|
|
|
|
# ---------- VALIDATION ----------
|
|
model.eval()
|
|
correct, total = 0, 0
|
|
with torch.no_grad():
|
|
for inputs, labels in val_loader:
|
|
inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
|
|
outputs = model(inputs)
|
|
_, preds = torch.max(outputs, 1)
|
|
correct += (preds == labels).sum().item()
|
|
total += labels.size(0)
|
|
val_acc = correct / total
|
|
|
|
print(f"Epoch [{epoch+1}/{num_epochs}] completed in {time.time()-start_time:.2f}s")
|
|
print(f"Train Loss: {avg_loss:.4f}, Val Accuracy: {val_acc:.4f}\n")
|
|
|
|
# TensorBoard logging
|
|
writer.add_scalar("Loss/train", avg_loss, epoch)
|
|
writer.add_scalar("Accuracy/val", val_acc, epoch)
|
|
|
|
# Save best model
|
|
if val_acc > best_acc:
|
|
best_acc = val_acc
|
|
torch.save(model.state_dict(), "best_model.pth")
|
|
print("✅ Saved best model.")
|
|
|
|
print(f"Training finished. Best Val Accuracy: {best_acc:.4f}")
|
|
|
|
if __name__ == "__main__":
|
|
train_model(NUM_EPOCHS)
|