Smooth_FrameProcessing

2025-11-25 14:48:04 +05:30 · 2025-11-25 14:48:04 +05:30 · 6b42b731b4
commit 6b42b731b4
parent 51d1ed8d2c
2 changed files with 267 additions and 125 deletions
--- a/src/poc_demo.py
+++ b/src/poc_demo.py
@ -208,34 +208,57 @@ class POCPredictor:
        self.logs = []
    def detect_objects(self, frame):
-        """YOLO object detection - optimized for POC."""
+        """YOLO object detection - optimized for POC with performance improvements."""
-        # Resize to square for YOLO
+        try:
-        yolo_input = cv2.resize(frame, (640, 640))
+            # Resize to square for YOLO (use INTER_LINEAR for speed)
            yolo_input = cv2.resize(frame, (640, 640), interpolation=cv2.INTER_LINEAR)
-        # Convert HWC to CHW
+            # Convert HWC to CHW (optimized)
-        yolo_input = yolo_input.transpose(2, 0, 1)
+            yolo_input = yolo_input.transpose(2, 0, 1)
-        yolo_input = yolo_input[None].astype(np.float32) / 255.0
+            yolo_input = np.ascontiguousarray(yolo_input[None].astype(np.float32) / 255.0)
-        # Run inference
+            # Run inference
-        input_name = self.yolo_session.get_inputs()[0].name
+            input_name = self.yolo_session.get_inputs()[0].name
-        outputs = self.yolo_session.run(None, {input_name: yolo_input})
+            outputs = self.yolo_session.run(None, {input_name: yolo_input})
-        # Parse YOLOv8 ONNX output: (1, 84, 8400)
+            # Parse YOLOv8 ONNX output: (1, 84, 8400)
-        output = outputs[0]
+            output = outputs[0]
-        bboxes = output[0, :4, :].transpose()  # (8400, 4)
+            bboxes = output[0, :4, :].transpose()  # (8400, 4)
-        class_scores = output[0, 4:, :]  # (80, 8400)
+            class_scores = output[0, 4:, :]  # (80, 8400)
-        classes = np.argmax(class_scores, axis=0)
+            classes = np.argmax(class_scores, axis=0).astype(np.int32)  # Ensure int32
-        confs = np.max(class_scores, axis=0)
+            confs = np.max(class_scores, axis=0)
-        # Filter by confidence and relevant classes (phone and person)
+            # Filter by confidence and relevant classes (phone and person)
-        relevant_classes = [0, 67]  # person, cell phone
+            relevant_classes = np.array([0, 67], dtype=np.int32)  # person, cell phone
-        mask = (confs > CONFIG['conf_threshold']) & np.isin(classes, relevant_classes)
+            conf_mask = confs > CONFIG['conf_threshold']
            class_mask = np.isin(classes, relevant_classes)
            mask = conf_mask & class_mask
-        return {
+            # Ensure mask is boolean and arrays are properly indexed
-            'bboxes': bboxes[mask],
+            mask = mask.astype(bool)
-            'confs': confs[mask],
+            
-            'classes': classes[mask]
+            # Get indices where mask is True
-        }
+            valid_indices = np.where(mask)[0]
            if len(valid_indices) > 0:
                return {
                    'bboxes': bboxes[valid_indices],
                    'confs': confs[valid_indices],
                    'classes': classes[valid_indices]
                }
            else:
                return {
                    'bboxes': np.array([], dtype=np.float32).reshape(0, 4),
                    'confs': np.array([], dtype=np.float32),
                    'classes': np.array([], dtype=np.int32)
                }
        except Exception as e:
            logger.error(f"Error in detect_objects: {e}")
            return {
                'bboxes': np.array([], dtype=np.float32).reshape(0, 4),
                'confs': np.array([], dtype=np.float32),
                'classes': np.array([], dtype=np.int32)
            }
    def analyze_face(self, frame):
        """OpenCV face analysis - NO MediaPipe!"""
@ -286,17 +309,23 @@ class POCPredictor:
        return has_seatbelt, confidence
    def process_frame(self, frame, frame_idx, last_results=None):
-        """Process single frame - streamlined and optimized."""
+        """Process single frame - streamlined and optimized with smooth video support."""
        should_process = (frame_idx % CONFIG['inference_skip'] == 0)
-        # If not processing this frame, return last results
+        # Always use last results for smooth video (even if not processing this frame)
        if not should_process and last_results is not None:
            last_alerts = last_results[0]
-            last_face_data = last_results[1]
+            last_face_data = last_results[5] if len(last_results) > 5 else {'present': False, 'perclos': 0, 'head_yaw': 0}
-            annotated = self.draw_detections(frame, {'bboxes': [], 'confs': [], 'classes': []}, 
+            last_detections = last_results[6] if len(last_results) > 6 else {
-                                           last_face_data, last_alerts)
+                'bboxes': np.array([], dtype=np.float32).reshape(0, 4),
-            return last_alerts, annotated, False, last_face_data
+                'confs': np.array([], dtype=np.float32),
                'classes': np.array([], dtype=np.int32)
            }
            # Draw last predictions on current frame for smooth video
            annotated = self.draw_detections(frame, last_detections, last_face_data, last_alerts)
            return last_alerts, annotated, False, last_results[2] if len(last_results) > 2 else False, \
                   last_results[3] if len(last_results) > 3 else 0.0, last_face_data, last_detections
        # Process this frame
        start_time = time.time()
@ -306,7 +335,11 @@ class POCPredictor:
        if not face_data['present']:
            alerts = {'Driver Absent': True}
-            detections = {'bboxes': [], 'confs': [], 'classes': []}
+            detections = {
                'bboxes': np.array([], dtype=np.float32).reshape(0, 4),
                'confs': np.array([], dtype=np.float32),
                'classes': np.array([], dtype=np.int32)
            }
            seatbelt, belt_conf = False, 0.0
        else:
            # Run object detection
@ -327,7 +360,14 @@ class POCPredictor:
        alerts['Drowsiness'] = face_data['perclos'] > CONFIG['perclos_threshold']
        alerts['Distraction'] = abs(face_data['head_yaw']) > (CONFIG['head_pose_threshold'] * 0.8)
        alerts['Driver Absent'] = not face_data['present']
-        alerts['Phone Detected'] = np.any(detections['classes'] == 67) if len(detections['classes']) > 0 else False
+        # Safe check for phone detection
        phone_detected = False
        if len(detections['classes']) > 0:
            try:
                phone_detected = np.any(detections['classes'] == 67)
            except:
                phone_detected = False
        alerts['Phone Detected'] = phone_detected
        alerts['No Seatbelt'] = not seatbelt and belt_conf > 0.3
        # Update states with temporal smoothing (clear alerts when condition stops)
@ -363,34 +403,48 @@ class POCPredictor:
        if len(self.logs) > CONFIG['max_logs']:
            self.logs = self.logs[-CONFIG['max_logs']:]
-        return alerts, annotated_frame, True, seatbelt, belt_conf, face_data
+        return alerts, annotated_frame, True, seatbelt, belt_conf, face_data, detections
    def draw_detections(self, frame, detections, face_data, alerts):
        """Draw detections and alerts on frame."""
        annotated = frame.copy()
        h, w = annotated.shape[:2]
-        # Draw bounding boxes
+        # Draw bounding boxes (safe iteration)
-        for i, (bbox, conf, cls) in enumerate(zip(detections['bboxes'], detections['confs'], detections['classes'])):
+        if len(detections['bboxes']) > 0 and len(detections['confs']) > 0 and len(detections['classes']) > 0:
-            # Scale bbox from 640x640 to frame size
+            try:
-            x1, y1, x2, y2 = bbox
+                # Ensure all arrays have same length
-            x1, x2 = int(x1 * w / 640), int(x2 * w / 640)
+                min_len = min(len(detections['bboxes']), len(detections['confs']), len(detections['classes']))
-            y1, y2 = int(y1 * h / 640), int(y2 * h / 640)
+                for i in range(min_len):
                    bbox = detections['bboxes'][i]
                    conf = float(detections['confs'][i])
                    cls = int(detections['classes'][i])
-            # Color by class
+                    # Scale bbox from 640x640 to frame size
-            if cls == 0:  # person
+                    x1, y1, x2, y2 = bbox
-                color = (0, 255, 0)  # Green
+                    x1, x2 = int(x1 * w / 640), int(x2 * w / 640)
-                label = "Person"
+                    y1, y2 = int(y1 * h / 640), int(y2 * h / 640)
            elif cls == 67:  # phone
                color = (255, 0, 255)  # Magenta
                label = "Phone"
            else:
                color = (255, 255, 0)  # Cyan
                label = "Object"
-            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
+                    # Ensure coordinates are valid
-            cv2.putText(annotated, f"{label}: {conf:.2f}", (x1, y1-10), 
+                    x1, x2 = max(0, min(x1, w)), max(0, min(x2, w))
-                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+                    y1, y2 = max(0, min(y1, h)), max(0, min(y2, h))
                    # Color by class
                    if cls == 0:  # person
                        color = (0, 255, 0)  # Green
                        label = "Person"
                    elif cls == 67:  # phone
                        color = (255, 0, 255)  # Magenta
                        label = "Phone"
                    else:
                        color = (255, 255, 0)  # Cyan
                        label = "Object"
                    cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(annotated, f"{label}: {conf:.2f}", (x1, max(y1-10, 10)), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            except Exception as e:
                logger.warning(f"Error drawing detections: {e}")
        # Draw face status
        if face_data['present']:
@ -457,37 +511,54 @@ def video_capture_loop(predictor, frame_queue, video_source=None):
            time.sleep(0.1)
            continue
        # Always process frame (for smooth video - shows all frames with last predictions)
        try:
            results = predictor.process_frame(frame, frame_idx, last_results)
            alerts = results[0]
            processed_frame = results[1]
            was_processed = results[2]
            # Update last results if we got new predictions
            if was_processed:
                last_results = results
            # If not processed, we still use last_results for drawing (already handled in process_frame)
        except Exception as e:
-            logger.error(f"Error processing frame: {e}")
+            logger.error(f"Error processing frame: {e}", exc_info=True)
-            processed_frame = frame
+            # On error, show raw frame with last predictions if available
-            alerts = {}
+            if last_results:
-            was_processed = False
+                try:
                    last_alerts = last_results[0]
                    last_face_data = last_results[5] if len(last_results) > 5 else {'present': False, 'perclos': 0, 'head_yaw': 0}
                    last_detections = last_results[6] if len(last_results) > 6 else {'bboxes': np.array([]), 'confs': np.array([]), 'classes': np.array([])}
                    processed_frame = predictor.draw_detections(frame, last_detections, last_face_data, last_alerts)
                except:
                    processed_frame = frame
            else:
                processed_frame = frame
        frame_idx += 1
        # Convert to RGB for display
        frame_rgb = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
        # Always put frame in queue (smooth video - all frames shown)
        try:
            frame_queue.put_nowait(frame_rgb)
        except queue.Full:
            # If queue is full, replace oldest frame
            try:
                frame_queue.get_nowait()
                frame_queue.put_nowait(frame_rgb)
            except queue.Empty:
                pass
        # Frame rate control
        if video_source is not None:
            fps = cap.get(cv2.CAP_PROP_FPS) or 30
            time.sleep(1.0 / fps)
        else:
            # For camera, target 30 FPS (smooth video)
            time.sleep(0.033)
    cap.release()
@ -683,7 +754,7 @@ with col1:
    else:
        try:
            frame = frame_queue.get_nowait()
-            video_placeholder.image(frame, channels='RGB', use_container_width=True)
+            video_placeholder.image(frame, channels='RGB', width='stretch')
        except queue.Empty:
            video_placeholder.info("Waiting for camera feed...")
--- a/src/poc_demo_rpi.py
+++ b/src/poc_demo_rpi.py
@ -209,33 +209,56 @@ class POCPredictor:
    def detect_objects(self, frame):
        """YOLO object detection - optimized for POC with performance improvements."""
-        # Resize to square for YOLO (use INTER_LINEAR for speed)
+        try:
-        yolo_input = cv2.resize(frame, (640, 640), interpolation=cv2.INTER_LINEAR)
+            # Resize to square for YOLO (use INTER_LINEAR for speed)
            yolo_input = cv2.resize(frame, (640, 640), interpolation=cv2.INTER_LINEAR)
-        # Convert HWC to CHW (optimized)
+            # Convert HWC to CHW (optimized)
-        yolo_input = yolo_input.transpose(2, 0, 1)
+            yolo_input = yolo_input.transpose(2, 0, 1)
-        yolo_input = np.ascontiguousarray(yolo_input[None].astype(np.float32) / 255.0)
+            yolo_input = np.ascontiguousarray(yolo_input[None].astype(np.float32) / 255.0)
-        # Run inference
+            # Run inference
-        input_name = self.yolo_session.get_inputs()[0].name
+            input_name = self.yolo_session.get_inputs()[0].name
-        outputs = self.yolo_session.run(None, {input_name: yolo_input})
+            outputs = self.yolo_session.run(None, {input_name: yolo_input})
-        # Parse YOLOv8 ONNX output: (1, 84, 8400)
+            # Parse YOLOv8 ONNX output: (1, 84, 8400)
-        output = outputs[0]
+            output = outputs[0]
-        bboxes = output[0, :4, :].transpose()  # (8400, 4)
+            bboxes = output[0, :4, :].transpose()  # (8400, 4)
-        class_scores = output[0, 4:, :]  # (80, 8400)
+            class_scores = output[0, 4:, :]  # (80, 8400)
-        classes = np.argmax(class_scores, axis=0)
+            classes = np.argmax(class_scores, axis=0).astype(np.int32)  # Ensure int32
-        confs = np.max(class_scores, axis=0)
+            confs = np.max(class_scores, axis=0)
-        # Filter by confidence and relevant classes (phone and person)
+            # Filter by confidence and relevant classes (phone and person)
-        relevant_classes = [0, 67]  # person, cell phone
+            relevant_classes = np.array([0, 67], dtype=np.int32)  # person, cell phone
-        mask = (confs > CONFIG['conf_threshold']) & np.isin(classes, relevant_classes)
+            conf_mask = confs > CONFIG['conf_threshold']
            class_mask = np.isin(classes, relevant_classes)
            mask = conf_mask & class_mask
-        return {
+            # Ensure mask is boolean and arrays are properly indexed
-            'bboxes': bboxes[mask],
+            mask = mask.astype(bool)
-            'confs': confs[mask],
+            
-            'classes': classes[mask]
+            # Get indices where mask is True
-        }
+            valid_indices = np.where(mask)[0]
            if len(valid_indices) > 0:
                return {
                    'bboxes': bboxes[valid_indices],
                    'confs': confs[valid_indices],
                    'classes': classes[valid_indices]
                }
            else:
                return {
                    'bboxes': np.array([], dtype=np.float32).reshape(0, 4),
                    'confs': np.array([], dtype=np.float32),
                    'classes': np.array([], dtype=np.int32)
                }
        except Exception as e:
            logger.error(f"Error in detect_objects: {e}")
            return {
                'bboxes': np.array([], dtype=np.float32).reshape(0, 4),
                'confs': np.array([], dtype=np.float32),
                'classes': np.array([], dtype=np.int32)
            }
    def analyze_face(self, frame):
        """OpenCV face analysis - NO MediaPipe!"""
@ -286,17 +309,23 @@ class POCPredictor:
        return has_seatbelt, confidence
    def process_frame(self, frame, frame_idx, last_results=None):
-        """Process single frame - streamlined and optimized."""
+        """Process single frame - streamlined and optimized with smooth video support."""
        should_process = (frame_idx % CONFIG['inference_skip'] == 0)
-        # If not processing this frame, return last results
+        # Always use last results for smooth video (even if not processing this frame)
        if not should_process and last_results is not None:
            last_alerts = last_results[0]
-            last_face_data = last_results[1]
+            last_face_data = last_results[5] if len(last_results) > 5 else {'present': False, 'perclos': 0, 'head_yaw': 0}
-            annotated = self.draw_detections(frame, {'bboxes': [], 'confs': [], 'classes': []}, 
+            last_detections = last_results[6] if len(last_results) > 6 else {
-                                           last_face_data, last_alerts)
+                'bboxes': np.array([], dtype=np.float32).reshape(0, 4),
-            return last_alerts, annotated, False, last_face_data
+                'confs': np.array([], dtype=np.float32),
                'classes': np.array([], dtype=np.int32)
            }
            # Draw last predictions on current frame for smooth video
            annotated = self.draw_detections(frame, last_detections, last_face_data, last_alerts)
            return last_alerts, annotated, False, last_results[2] if len(last_results) > 2 else False, \
                   last_results[3] if len(last_results) > 3 else 0.0, last_face_data, last_detections
        # Process this frame
        start_time = time.time()
@ -306,7 +335,11 @@ class POCPredictor:
        if not face_data['present']:
            alerts = {'Driver Absent': True}
-            detections = {'bboxes': [], 'confs': [], 'classes': []}
+            detections = {
                'bboxes': np.array([], dtype=np.float32).reshape(0, 4),
                'confs': np.array([], dtype=np.float32),
                'classes': np.array([], dtype=np.int32)
            }
            seatbelt, belt_conf = False, 0.0
        else:
            # Run object detection
@ -327,7 +360,14 @@ class POCPredictor:
        alerts['Drowsiness'] = face_data['perclos'] > CONFIG['perclos_threshold']
        alerts['Distraction'] = abs(face_data['head_yaw']) > (CONFIG['head_pose_threshold'] * 0.8)
        alerts['Driver Absent'] = not face_data['present']
-        alerts['Phone Detected'] = np.any(detections['classes'] == 67) if len(detections['classes']) > 0 else False
+        # Safe check for phone detection
        phone_detected = False
        if len(detections['classes']) > 0:
            try:
                phone_detected = np.any(detections['classes'] == 67)
            except:
                phone_detected = False
        alerts['Phone Detected'] = phone_detected
        alerts['No Seatbelt'] = not seatbelt and belt_conf > 0.3
        # Update states with temporal smoothing (clear alerts when condition stops)
@ -363,34 +403,48 @@ class POCPredictor:
        if len(self.logs) > CONFIG['max_logs']:
            self.logs = self.logs[-CONFIG['max_logs']:]
-        return alerts, annotated_frame, True, seatbelt, belt_conf, face_data
+        return alerts, annotated_frame, True, seatbelt, belt_conf, face_data, detections
    def draw_detections(self, frame, detections, face_data, alerts):
        """Draw detections and alerts on frame."""
        annotated = frame.copy()
        h, w = annotated.shape[:2]
-        # Draw bounding boxes
+        # Draw bounding boxes (safe iteration)
-        for i, (bbox, conf, cls) in enumerate(zip(detections['bboxes'], detections['confs'], detections['classes'])):
+        if len(detections['bboxes']) > 0 and len(detections['confs']) > 0 and len(detections['classes']) > 0:
-            # Scale bbox from 640x640 to frame size
+            try:
-            x1, y1, x2, y2 = bbox
+                # Ensure all arrays have same length
-            x1, x2 = int(x1 * w / 640), int(x2 * w / 640)
+                min_len = min(len(detections['bboxes']), len(detections['confs']), len(detections['classes']))
-            y1, y2 = int(y1 * h / 640), int(y2 * h / 640)
+                for i in range(min_len):
                    bbox = detections['bboxes'][i]
                    conf = float(detections['confs'][i])
                    cls = int(detections['classes'][i])
-            # Color by class
+                    # Scale bbox from 640x640 to frame size
-            if cls == 0:  # person
+                    x1, y1, x2, y2 = bbox
-                color = (0, 255, 0)  # Green
+                    x1, x2 = int(x1 * w / 640), int(x2 * w / 640)
-                label = "Person"
+                    y1, y2 = int(y1 * h / 640), int(y2 * h / 640)
            elif cls == 67:  # phone
                color = (255, 0, 255)  # Magenta
                label = "Phone"
            else:
                color = (255, 255, 0)  # Cyan
                label = "Object"
-            cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
+                    # Ensure coordinates are valid
-            cv2.putText(annotated, f"{label}: {conf:.2f}", (x1, y1-10), 
+                    x1, x2 = max(0, min(x1, w)), max(0, min(x2, w))
-                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+                    y1, y2 = max(0, min(y1, h)), max(0, min(y2, h))
                    # Color by class
                    if cls == 0:  # person
                        color = (0, 255, 0)  # Green
                        label = "Person"
                    elif cls == 67:  # phone
                        color = (255, 0, 255)  # Magenta
                        label = "Phone"
                    else:
                        color = (255, 255, 0)  # Cyan
                        label = "Object"
                    cv2.rectangle(annotated, (x1, y1), (x2, y2), color, 2)
                    cv2.putText(annotated, f"{label}: {conf:.2f}", (x1, max(y1-10, 10)), 
                               cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
            except Exception as e:
                logger.warning(f"Error drawing detections: {e}")
        # Draw face status
        if face_data['present']:
@ -457,37 +511,54 @@ def video_capture_loop(predictor, frame_queue, video_source=None):
            time.sleep(0.1)
            continue
        # Always process frame (for smooth video - shows all frames with last predictions)
        try:
            results = predictor.process_frame(frame, frame_idx, last_results)
            alerts = results[0]
            processed_frame = results[1]
            was_processed = results[2]
            # Update last results if we got new predictions
            if was_processed:
                last_results = results
            # If not processed, we still use last_results for drawing (already handled in process_frame)
        except Exception as e:
-            logger.error(f"Error processing frame: {e}")
+            logger.error(f"Error processing frame: {e}", exc_info=True)
-            processed_frame = frame
+            # On error, show raw frame with last predictions if available
-            alerts = {}
+            if last_results:
-            was_processed = False
+                try:
                    last_alerts = last_results[0]
                    last_face_data = last_results[5] if len(last_results) > 5 else {'present': False, 'perclos': 0, 'head_yaw': 0}
                    last_detections = last_results[6] if len(last_results) > 6 else {'bboxes': np.array([]), 'confs': np.array([]), 'classes': np.array([])}
                    processed_frame = predictor.draw_detections(frame, last_detections, last_face_data, last_alerts)
                except:
                    processed_frame = frame
            else:
                processed_frame = frame
        frame_idx += 1
        # Convert to RGB for display
        frame_rgb = cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB)
        # Always put frame in queue (smooth video - all frames shown)
        try:
            frame_queue.put_nowait(frame_rgb)
        except queue.Full:
            # If queue is full, replace oldest frame
            try:
                frame_queue.get_nowait()
                frame_queue.put_nowait(frame_rgb)
            except queue.Empty:
                pass
        # Frame rate control
        if video_source is not None:
            fps = cap.get(cv2.CAP_PROP_FPS) or 30
            time.sleep(1.0 / fps)
        else:
            # For camera, target 30 FPS (smooth video)
            time.sleep(0.033)
    cap.release()
@ -683,7 +754,7 @@ with col1:
    else:
        try:
            frame = frame_queue.get_nowait()
-            video_placeholder.image(frame, channels='RGB', use_container_width=True)
+            video_placeholder.image(frame, channels='RGB', width='stretch')
        except queue.Empty:
            video_placeholder.info("Waiting for camera feed...")