DriverTrac/track_drive copy.py
2025-11-24 18:38:24 +05:30

278 lines
13 KiB
Python

import streamlit as st
import cv2
import numpy as np
import threading
import time
import logging
from datetime import datetime
import yaml
from ultralytics import YOLO
import mediapipe as mp
from roboflow import Roboflow
from sklearn.ensemble import IsolationForest
from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification
import torch
import onnxruntime as ort # For quantized inference
# Setup logging for traceability
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.FileHandler('predictions.log'), logging.StreamHandler()])
logger = logging.getLogger(__name__)
# Config (save as config.yaml or inline)
CONFIG = {
'yolo_base': 'yolov8n.pt', # COCO pretrained
'conf_threshold': 0.7,
'perclos_threshold': 0.35,
'distraction_duration': 3, # seconds
'ttc_threshold': 2.5, # for FCW
'speed_limit': 60, # km/h sim
'min_tailgate_dist': 5, # meters est
'roboflow_api_key': 'YOUR_FREE_ROBOFLOW_KEY', # Replace
'videomae_model': 'MCG-NJU/videomae-base',
'inference_skip': 3, # Frames between inferences
}
@st.cache_resource
def load_models():
"""Load all pre-trained models efficiently."""
# YOLO Base (vehicles, peds, phones)
yolo_base = YOLO(CONFIG['yolo_base'])
yolo_base.export(format='onnx', int8=True) # Quantize once
yolo_session = ort.InferenceSession('yolov8n.onnx')
# Seatbelt (Roboflow pretrained)
rf = Roboflow(api_key=CONFIG['roboflow_api_key'])
seatbelt_project = rf.workspace('karan-panja').project('seat-belt-detection-uhqwa')
seatbelt_model = seatbelt_project.version(1).model
# VideoMAE for actions (zero-shot)
processor = VideoMAEImageProcessor.from_pretrained(CONFIG['videomae_model'])
videomae = VideoMAEForVideoClassification.from_pretrained(CONFIG['videomae_model'])
videomae = torch.jit.script(videomae)
torch.jit.save(videomae, 'videomae_ts.pt')
videomae = torch.jit.load('videomae_ts.pt')
# MediaPipe for face/PERCLOS
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, refine_landmarks=True)
# Isolation Forest for anomalies (train on 'normal' once; here unsupervised)
iso_forest = IsolationForest(contamination=0.1, random_state=42)
return yolo_session, seatbelt_model, (processor, videomae), face_mesh, iso_forest
class RealTimePredictor:
def __init__(self):
self.yolo_session, self.seatbelt_model, self.videomae, self.face_mesh, self.iso_forest = load_models()
self.frame_buffer = [] # For temporal (last 10 frames)
self.alert_states = {alert: False for alert in [
'Drowsiness', 'Distraction', 'Smoking', 'No Seatbelt', 'Driver Absent',
'FCW', 'LDW', 'Pedestrian', 'Hard Braking', 'Hard Acceleration', 'Tailgating', 'Overspeed'
]}
self.last_inference = 0
self.logs = []
def preprocess_frame(self, frame):
"""Resize and normalize for speed."""
frame = cv2.resize(frame, (640, 480))
return frame
def detect_objects(self, frame):
"""YOLO for vehicles, peds, phones."""
# ONNX inference (fast)
input_name = self.yolo_session.get_inputs()[0].name
inputs = {input_name: frame[None].astype(np.float32) / 255.0}
outputs = self.yolo_session.run(None, inputs)
# Parse (simplified; use ultralytics parse for full)
bboxes = outputs[0][0, :, :4] # xyxy
confs = outputs[0][0, :, 4]
classes = np.argmax(outputs[0][0, :, 5:], axis=1) # COCO classes
high_conf = confs > CONFIG['conf_threshold']
return {'bboxes': bboxes[high_conf], 'confs': confs[high_conf], 'classes': classes[high_conf]}
def detect_seatbelt(self, frame):
"""Roboflow seatbelt."""
predictions = self.seatbelt_model.predict(frame, confidence=CONFIG['conf_threshold']).json()
has_belt = any(p['class'] == 'with_mask' for p in predictions['predictions']) # Adapt class
return has_belt, predictions[0]['confidence'] if predictions['predictions'] else 0
def analyze_face(self, frame):
"""MediaPipe PERCLOS, head pose, absence."""
rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = self.face_mesh.process(rgb)
if not results.multi_face_landmarks:
return {'perclos': 0, 'head_pose': [0,0,0], 'absent': True, 'conf': 0}
landmarks = results.multi_face_landmarks[0].landmark
# PERCLOS (eye closure %)
left_eye = np.mean([landmarks[i].y for i in [33, 7, 163, 144]])
right_eye = np.mean([landmarks[i].y for i in [362, 382, 381, 380]])
ear = (landmarks[10].y + landmarks[152].y) / 2 # Eye aspect simplified
perclos = max((left_eye - ear) / (ear - min(left_eye, ear)), (right_eye - ear) / (ear - min(right_eye, ear)))
# Head pose (simplified yaw for looking away)
yaw = (landmarks[454].x - landmarks[323].x) * 100 # Rough estimate
return {'perclos': perclos, 'head_pose': [0, yaw, 0], 'absent': False, 'conf': 0.9}
def recognize_actions(self, buffer):
"""VideoMAE zero-shot for yawn/phone."""
if len(buffer) < 8: return {'yawn': 0, 'phone': 0, 'look_away': 0}
inputs = self.videomae[0](buffer[:8], return_tensors='pt')
with torch.no_grad():
outputs = self.videomae[1](**inputs)
probs = torch.softmax(outputs.logits, dim=-1).numpy()[0]
return {'yawn': probs[0], 'phone': probs[1], 'look_away': probs[2]} # Map to Kinetics proxies
def optical_flow(self, prev_frame, curr_frame):
"""OpenCV flow for speed, braking, accel."""
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
flow = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, None, None)
magnitude = np.mean(np.sqrt(flow[0]**2 + flow[1]**2))
return magnitude # High = accel/braking; est speed ~ magnitude * scale (calib)
def estimate_distance(self, bboxes):
"""Simple bbox size for tailgating/FCW dist est (calib needed)."""
if len(bboxes) == 0: return float('inf')
areas = (bboxes[:, 2] - bboxes[:, 0]) * (bboxes[:, 3] - bboxes[:, 1])
return 10 / np.sqrt(np.max(areas)) # Inverse sqrt for dist (rough)
def detect_anomaly(self, features):
"""Flag unusual (low conf)."""
pred = self.iso_forest.predict(features.reshape(1, -1))[0]
return 1 if pred == -1 else 0
def validate_alerts(self, frame, prev_frame, detections, face_data, actions, seatbelt, flow_mag, buffer):
"""Rule-based validation for all alerts."""
features = np.array([face_data['perclos'], actions['phone'], detections['confs'].mean() if len(detections['confs']) else 0])
anomaly = self.detect_anomaly(features)
results = {}
timestamp = datetime.now().isoformat()
# DSMS
drowsy = (face_data['perclos'] > CONFIG['perclos_threshold']) and (actions['yawn'] > CONFIG['conf_threshold'])
results['Drowsiness'] = drowsy and not anomaly
distraction = (actions['phone'] > CONFIG['conf_threshold']) or (abs(face_data['head_pose'][1]) > 20)
results['Distraction'] = distraction and not anomaly
smoke = 'cigarette' in [c for c in detections['classes']] # YOLO class proxy
results['Smoking'] = smoke and detections['confs'][detections['classes'] == 67].max() > CONFIG['conf_threshold']
results['No Seatbelt'] = not seatbelt[0] and seatbelt[1] > CONFIG['conf_threshold']
results['Driver Absent'] = face_data['absent']
# ADAS (heuristics)
vehicles = sum(1 for c in detections['classes'] if c == 2) # Car class
peds = sum(1 for c in detections['classes'] if c == 0)
dist_est = self.estimate_distance(detections['bboxes'][detections['classes'] == 2])
ttc = dist_est / (flow_mag + 1e-5) if flow_mag > 0 else float('inf') # Rough TTC
results['FCW'] = (ttc < CONFIG['ttc_threshold']) and vehicles > 0
results['Tailgating'] = (dist_est < CONFIG['min_tailgate_dist']) and vehicles > 0
results['Pedestrian'] = peds > 0 and detections['confs'][detections['classes'] == 0].max() > CONFIG['conf_threshold']
# LDW: Simple edge detect for lane (OpenCV)
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray, 50, 150)
lines = cv2.HoughLinesP(edges, 1, np.pi/180, 100, minLineLength=100)
in_lane = len(lines) > 2 if lines is not None else False # Basic: many lines = on lane
results['LDW'] = not in_lane
# Braking/Accel/Overspeed via flow
braking = flow_mag > 10 and np.mean([f[1] for f in flow_mag]) < 0 # Backward flow
accel = flow_mag > 10 and np.mean([f[1] for f in flow_mag]) > 0
speed_est = flow_mag * 0.1 # Calib: km/h proxy
results['Hard Braking'] = braking
results['Hard Acceleration'] = accel
results['Overspeed'] = speed_est > CONFIG['speed_limit']
# Log all
log_entry = f"{timestamp} | Features: {features} | Anomaly: {anomaly} | Alerts: {results}"
logger.info(log_entry)
self.logs.append(log_entry[-100:]) # Last 100 chars for display
# Update states (sustain if true)
for alert, triggered in results.items():
if triggered:
self.alert_states[alert] = True
elif time.time() - self.last_inference > CONFIG['distraction_duration']:
self.alert_states[alert] = False
return results
def run_inference(self, frame, prev_frame, buffer, frame_idx):
"""Full pipeline every N frames."""
if frame_idx % CONFIG['inference_skip'] != 0: return {}, frame
start = time.time()
frame = self.preprocess_frame(frame)
detections = self.detect_objects(frame)
seatbelt = self.detect_seatbelt(frame)
face_data = self.analyze_face(frame)
buffer.append(frame)
buffer = buffer[-10:] # Keep last 10
actions = self.recognize_actions(buffer)
flow_mag = self.optical_flow(prev_frame, frame) if prev_frame is not None else 0
alerts = self.validate_alerts(frame, prev_frame, detections, face_data, actions, seatbelt, flow_mag, buffer)
self.last_inference = time.time()
# Overlay
for i, bbox in enumerate(detections['bboxes']):
x1, y1, x2, y2 = map(int, bbox)
label = f"{detections['classes'][i]}:{detections['confs'][i]:.2f}"
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
# Alert texts
for alert, active in self.alert_states.items():
if active:
cv2.putText(frame, f"ALERT: {alert}", (10, 30 + list(self.alert_states.keys()).index(alert)*20),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
logger.info(f"Inference time: {time.time() - start:.2f}s")
return alerts, frame
def video_loop(predictor, placeholder):
"""Threaded capture."""
cap = cv2.VideoCapture(0) # Webcam; for RPi: 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=640, height=480, framerate=30/1 ! nvvidconv ! video/x-raw, format=BGRx ! videoconvert ! video/x-raw, format=BGR ! appsink'
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
cap.set(cv2.CAP_PROP_FPS, 30)
prev_frame = None
buffer = []
frame_idx = 0
while True:
ret, frame = cap.read()
if not ret: continue
alerts, frame = predictor.run_inference(frame, prev_frame, buffer, frame_idx)
prev_frame = frame.copy()
frame_idx += 1
# BGR to RGB for Streamlit
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
placeholder.image(frame_rgb, channels='RGB', use_column_width=True)
time.sleep(0.033) # ~30 FPS cap
# Streamlit UI
st.title("🚗 Real-Time DSMS/ADAS Validator")
st.sidebar.title("Active Alerts")
predictor = RealTimePredictor()
# Start video thread
video_placeholder = st.empty()
thread = threading.Thread(target=video_loop, args=(predictor, video_placeholder), daemon=True)
thread.start()
# Sidebar: Alerts & Logs
with st.sidebar:
st.subheader("Alerts")
for alert, active in predictor.alert_states.items():
st.write(f"{'🔴' if active else '🟢'} {alert}")
st.subheader("Recent Logs (Traceable)")
for log in predictor.logs[-10:]:
st.text(log)
st.info("👆 Alerts trigger only on high conf + rules. Check `predictions.log` for full traces. Calibrate distances/speeds for your setup.")