DriverTrac/track_drive copy.py

import streamlit as st
import cv2
import numpy as np
import threading
import time
import logging
from datetime import datetime
import yaml
from ultralytics import YOLO
import mediapipe as mp
from roboflow import Roboflow
from sklearn.ensemble import IsolationForest
from transformers import VideoMAEImageProcessor, VideoMAEForVideoClassification
import torch
import onnxruntime as ort  # For quantized inference

# Setup logging for traceability
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', handlers=[logging.FileHandler('predictions.log'), logging.StreamHandler()])
logger = logging.getLogger(__name__)

# Config (save as config.yaml or inline)
CONFIG = {
    'yolo_base': 'yolov8n.pt',  # COCO pretrained
    'conf_threshold': 0.7,
    'perclos_threshold': 0.35,
    'distraction_duration': 3,  # seconds
    'ttc_threshold': 2.5,  # for FCW
    'speed_limit': 60,  # km/h sim
    'min_tailgate_dist': 5,  # meters est
    'roboflow_api_key': 'YOUR_FREE_ROBOFLOW_KEY',  # Replace
    'videomae_model': 'MCG-NJU/videomae-base',
    'inference_skip': 3,  # Frames between inferences
}

@st.cache_resource
def load_models():
    """Load all pre-trained models efficiently."""
    # YOLO Base (vehicles, peds, phones)
    yolo_base = YOLO(CONFIG['yolo_base'])
    yolo_base.export(format='onnx', int8=True)  # Quantize once
    yolo_session = ort.InferenceSession('yolov8n.onnx')

    # Seatbelt (Roboflow pretrained)
    rf = Roboflow(api_key=CONFIG['roboflow_api_key'])
    seatbelt_project = rf.workspace('karan-panja').project('seat-belt-detection-uhqwa')
    seatbelt_model = seatbelt_project.version(1).model

    # VideoMAE for actions (zero-shot)
    processor = VideoMAEImageProcessor.from_pretrained(CONFIG['videomae_model'])
    videomae = VideoMAEForVideoClassification.from_pretrained(CONFIG['videomae_model'])
    videomae = torch.jit.script(videomae)
    torch.jit.save(videomae, 'videomae_ts.pt')
    videomae = torch.jit.load('videomae_ts.pt')

    # MediaPipe for face/PERCLOS
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, refine_landmarks=True)

    # Isolation Forest for anomalies (train on 'normal' once; here unsupervised)
    iso_forest = IsolationForest(contamination=0.1, random_state=42)

    return yolo_session, seatbelt_model, (processor, videomae), face_mesh, iso_forest

class RealTimePredictor:
    def __init__(self):
        self.yolo_session, self.seatbelt_model, self.videomae, self.face_mesh, self.iso_forest = load_models()
        self.frame_buffer = []  # For temporal (last 10 frames)
        self.alert_states = {alert: False for alert in [
            'Drowsiness', 'Distraction', 'Smoking', 'No Seatbelt', 'Driver Absent',
            'FCW', 'LDW', 'Pedestrian', 'Hard Braking', 'Hard Acceleration', 'Tailgating', 'Overspeed'
        ]}
        self.last_inference = 0
        self.logs = []

    def preprocess_frame(self, frame):
        """Resize and normalize for speed."""
        frame = cv2.resize(frame, (640, 480))
        return frame

    def detect_objects(self, frame):
        """YOLO for vehicles, peds, phones."""
        # ONNX inference (fast)
        input_name = self.yolo_session.get_inputs()[0].name
        inputs = {input_name: frame[None].astype(np.float32) / 255.0}
        outputs = self.yolo_session.run(None, inputs)
        # Parse (simplified; use ultralytics parse for full)
        bboxes = outputs[0][0, :, :4]  # xyxy
        confs = outputs[0][0, :, 4]
        classes = np.argmax(outputs[0][0, :, 5:], axis=1)  # COCO classes
        high_conf = confs > CONFIG['conf_threshold']
        return {'bboxes': bboxes[high_conf], 'confs': confs[high_conf], 'classes': classes[high_conf]}

    def detect_seatbelt(self, frame):
        """Roboflow seatbelt."""
        predictions = self.seatbelt_model.predict(frame, confidence=CONFIG['conf_threshold']).json()
        has_belt = any(p['class'] == 'with_mask' for p in predictions['predictions'])  # Adapt class
        return has_belt, predictions[0]['confidence'] if predictions['predictions'] else 0

    def analyze_face(self, frame):
        """MediaPipe PERCLOS, head pose, absence."""
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        results = self.face_mesh.process(rgb)
        if not results.multi_face_landmarks:
            return {'perclos': 0, 'head_pose': [0,0,0], 'absent': True, 'conf': 0}

        landmarks = results.multi_face_landmarks[0].landmark
        # PERCLOS (eye closure %)
        left_eye = np.mean([landmarks[i].y for i in [33, 7, 163, 144]])
        right_eye = np.mean([landmarks[i].y for i in [362, 382, 381, 380]])
        ear = (landmarks[10].y + landmarks[152].y) / 2  # Eye aspect simplified
        perclos = max((left_eye - ear) / (ear - min(left_eye, ear)), (right_eye - ear) / (ear - min(right_eye, ear)))
        # Head pose (simplified yaw for looking away)
        yaw = (landmarks[454].x - landmarks[323].x) * 100  # Rough estimate
        return {'perclos': perclos, 'head_pose': [0, yaw, 0], 'absent': False, 'conf': 0.9}

    def recognize_actions(self, buffer):
        """VideoMAE zero-shot for yawn/phone."""
        if len(buffer) < 8: return {'yawn': 0, 'phone': 0, 'look_away': 0}
        inputs = self.videomae[0](buffer[:8], return_tensors='pt')
        with torch.no_grad():
            outputs = self.videomae[1](**inputs)
            probs = torch.softmax(outputs.logits, dim=-1).numpy()[0]
        return {'yawn': probs[0], 'phone': probs[1], 'look_away': probs[2]}  # Map to Kinetics proxies

    def optical_flow(self, prev_frame, curr_frame):
        """OpenCV flow for speed, braking, accel."""
        prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
        curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, None, None)
        magnitude = np.mean(np.sqrt(flow[0]**2 + flow[1]**2))
        return magnitude  # High = accel/braking; est speed ~ magnitude * scale (calib)

    def estimate_distance(self, bboxes):
        """Simple bbox size for tailgating/FCW dist est (calib needed)."""
        if len(bboxes) == 0: return float('inf')
        areas = (bboxes[:, 2] - bboxes[:, 0]) * (bboxes[:, 3] - bboxes[:, 1])
        return 10 / np.sqrt(np.max(areas))  # Inverse sqrt for dist (rough)

    def detect_anomaly(self, features):
        """Flag unusual (low conf)."""
        pred = self.iso_forest.predict(features.reshape(1, -1))[0]
        return 1 if pred == -1 else 0

    def validate_alerts(self, frame, prev_frame, detections, face_data, actions, seatbelt, flow_mag, buffer):
        """Rule-based validation for all alerts."""
        features = np.array([face_data['perclos'], actions['phone'], detections['confs'].mean() if len(detections['confs']) else 0])
        anomaly = self.detect_anomaly(features)

        results = {}
        timestamp = datetime.now().isoformat()

        # DSMS
        drowsy = (face_data['perclos'] > CONFIG['perclos_threshold']) and (actions['yawn'] > CONFIG['conf_threshold'])
        results['Drowsiness'] = drowsy and not anomaly
        distraction = (actions['phone'] > CONFIG['conf_threshold']) or (abs(face_data['head_pose'][1]) > 20)
        results['Distraction'] = distraction and not anomaly
        smoke = 'cigarette' in [c for c in detections['classes']]  # YOLO class proxy
        results['Smoking'] = smoke and detections['confs'][detections['classes'] == 67].max() > CONFIG['conf_threshold']
        results['No Seatbelt'] = not seatbelt[0] and seatbelt[1] > CONFIG['conf_threshold']
        results['Driver Absent'] = face_data['absent']

        # ADAS (heuristics)
        vehicles = sum(1 for c in detections['classes'] if c == 2)  # Car class
        peds = sum(1 for c in detections['classes'] if c == 0)
        dist_est = self.estimate_distance(detections['bboxes'][detections['classes'] == 2])
        ttc = dist_est / (flow_mag + 1e-5) if flow_mag > 0 else float('inf')  # Rough TTC
        results['FCW'] = (ttc < CONFIG['ttc_threshold']) and vehicles > 0
        results['Tailgating'] = (dist_est < CONFIG['min_tailgate_dist']) and vehicles > 0
        results['Pedestrian'] = peds > 0 and detections['confs'][detections['classes'] == 0].max() > CONFIG['conf_threshold']

        # LDW: Simple edge detect for lane (OpenCV)
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(gray, 50, 150)
        lines = cv2.HoughLinesP(edges, 1, np.pi/180, 100, minLineLength=100)
        in_lane = len(lines) > 2 if lines is not None else False  # Basic: many lines = on lane
        results['LDW'] = not in_lane

        # Braking/Accel/Overspeed via flow
        braking = flow_mag > 10 and np.mean([f[1] for f in flow_mag]) < 0  # Backward flow
        accel = flow_mag > 10 and np.mean([f[1] for f in flow_mag]) > 0
        speed_est = flow_mag * 0.1  # Calib: km/h proxy
        results['Hard Braking'] = braking
        results['Hard Acceleration'] = accel
        results['Overspeed'] = speed_est > CONFIG['speed_limit']

        # Log all
        log_entry = f"{timestamp} | Features: {features} | Anomaly: {anomaly} | Alerts: {results}"
        logger.info(log_entry)
        self.logs.append(log_entry[-100:])  # Last 100 chars for display

        # Update states (sustain if true)
        for alert, triggered in results.items():
            if triggered:
                self.alert_states[alert] = True
            elif time.time() - self.last_inference > CONFIG['distraction_duration']:
                self.alert_states[alert] = False

        return results

    def run_inference(self, frame, prev_frame, buffer, frame_idx):
        """Full pipeline every N frames."""
        if frame_idx % CONFIG['inference_skip'] != 0: return {}, frame
        start = time.time()

        frame = self.preprocess_frame(frame)
        detections = self.detect_objects(frame)
        seatbelt = self.detect_seatbelt(frame)
        face_data = self.analyze_face(frame)
        buffer.append(frame)
        buffer = buffer[-10:]  # Keep last 10
        actions = self.recognize_actions(buffer)
        flow_mag = self.optical_flow(prev_frame, frame) if prev_frame is not None else 0

        alerts = self.validate_alerts(frame, prev_frame, detections, face_data, actions, seatbelt, flow_mag, buffer)
        self.last_inference = time.time()

        # Overlay
        for i, bbox in enumerate(detections['bboxes']):
            x1, y1, x2, y2 = map(int, bbox)
            label = f"{detections['classes'][i]}:{detections['confs'][i]:.2f}"
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
            cv2.putText(frame, label, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

        # Alert texts
        for alert, active in self.alert_states.items():
            if active:
                cv2.putText(frame, f"ALERT: {alert}", (10, 30 + list(self.alert_states.keys()).index(alert)*20),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

        logger.info(f"Inference time: {time.time() - start:.2f}s")
        return alerts, frame

def video_loop(predictor, placeholder):
    """Threaded capture."""
    cap = cv2.VideoCapture(0)  # Webcam; for RPi: 'nvarguscamerasrc ! video/x-raw(memory:NVMM), width=640, height=480, framerate=30/1 ! nvvidconv ! video/x-raw, format=BGRx ! videoconvert ! video/x-raw, format=BGR ! appsink'
    cap.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
    cap.set(cv2.CAP_PROP_FPS, 30)

    prev_frame = None
    buffer = []
    frame_idx = 0

    while True:
        ret, frame = cap.read()
        if not ret: continue

        alerts, frame = predictor.run_inference(frame, prev_frame, buffer, frame_idx)
        prev_frame = frame.copy()
        frame_idx += 1

        # BGR to RGB for Streamlit
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        placeholder.image(frame_rgb, channels='RGB', use_column_width=True)

        time.sleep(0.033)  # ~30 FPS cap

# Streamlit UI
st.title("🚗 Real-Time DSMS/ADAS Validator")
st.sidebar.title("Active Alerts")
predictor = RealTimePredictor()

# Start video thread
video_placeholder = st.empty()
thread = threading.Thread(target=video_loop, args=(predictor, video_placeholder), daemon=True)
thread.start()

# Sidebar: Alerts & Logs
with st.sidebar:
    st.subheader("Alerts")
    for alert, active in predictor.alert_states.items():
        st.write(f"{'🔴' if active else '🟢'} {alert}")

    st.subheader("Recent Logs (Traceable)")
    for log in predictor.logs[-10:]:
        st.text(log)

st.info("👆 Alerts trigger only on high conf + rules. Check `predictions.log` for full traces. Calibrate distances/speeds for your setup.")