first commit

2025-11-26 12:08:00 +05:30 · 2025-11-26 12:08:00 +05:30 · e39ed831e3
commit e39ed831e3
3 changed files with 1497 additions and 0 deletions
--- a/ai_asistant.py
+++ b/ai_asistant.py
@ -0,0 +1,446 @@
+#!/usr/bin/env python3
+"""
+Truck HPC AI Assistant - POC Demo (OPTIMIZED + TRULY NATURAL VOICE + HINDI SUPPORT)
+Optimized for Raspberry Pi 5 with Ollama + Whisper STT + MaryTTS/Festival
+NATURAL VOICE: Downloads and uses better quality voices
+OFFLINE: 100% offline capability
+MULTILINGUAL: English and Hindi support
+"""
+
+import requests
+import json
+import time
+import psutil
+import sounddevice as sd
+import numpy as np
+import subprocess
+import os
+import re
+import tempfile
+import wave
+from multiprocessing import Process, Queue
+from faster_whisper import WhisperModel
+from datetime import datetime
+
+# --------------------------------------------------------------
+# TEXT CLEANING FUNCTION
+# --------------------------------------------------------------
+def clean_text_for_speech(text):
+    """Removes markdown formatting and special characters"""
+    text = re.sub(r'#{1,6}\s*', '', text)
+    text = re.sub(r'\*\*\*(.+?)\*\*\*', r'\1', text)
+    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
+    text = re.sub(r'__(.+?)__', r'\1', text)
+    text = re.sub(r'\*(.+?)\*', r'\1', text)
+    text = re.sub(r'_(.+?)_', r'\1', text)
+    text = re.sub(r'```[\w]*\n', '', text)
+    text = re.sub(r'```', '', text)
+    text = re.sub(r'`(.+?)`', r'\1', text)
+    text = re.sub(r'^[-*_]{3,}$', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\[(.+?)\]\(.+?\)', r'\1', text)
+    text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*>\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\s+', ' ', text)
+    return text.strip()
+
+
+# --------------------------------------------------------------
+# GTTS CACHED TTS WORKER (Natural Voice with Local Cache + Hindi Support)
+# --------------------------------------------------------------
+def gtts_tts_worker(tts_queue, voice_gender="female", language="en"):
+    """
+    Uses gTTS with local caching for natural voice.
+    First run needs internet to download, then works offline.
+    Supports English and Hindi.
+    """
+    try:
+        from gtts import gTTS
+        import hashlib
+        
+        # Create cache directory
+        cache_dir = os.path.expanduser("~/.cache/truck_assistant_tts")
+        os.makedirs(cache_dir, exist_ok=True)
+        
+        lang_name = "English" if language == "en" else "Hindi"
+        print(f"✅ Using Google TTS ({voice_gender} voice, {lang_name}) with local cache\n")
+        print("💡 First run needs internet, then works offline from cache\n")
+        
+        while True:
+            data = tts_queue.get()
+            if data == "__EXIT__":
+                break
+
+            try:
+                # Support for language switching
+                if isinstance(data, dict):
+                    text = data['text']
+                    current_lang = data.get('lang', language)
+                else:
+                    text = data
+                    current_lang = language
+                
+                clean_text = clean_text_for_speech(text)
+                
+                if not clean_text:
+                    continue
+                
+                # Create hash for caching (include language in hash)
+                text_hash = hashlib.md5(f"{current_lang}_{clean_text}".encode()).hexdigest()
+                cache_file = os.path.join(cache_dir, f"{text_hash}.mp3")
+                
+                # Check if cached
+                if not os.path.exists(cache_file):
+                    # Generate with gTTS (needs internet first time)
+                    if current_lang == "en":
+                        tld = "co.uk" if voice_gender == "female" else "com"
+                        tts = gTTS(text=clean_text, lang='en', tld=tld, slow=False)
+                    else:  # Hindi
+                        tts = gTTS(text=clean_text, lang='hi', slow=False)
+                    
+                    tts.save(cache_file)
+                
+                # Play using mpg123 (faster than converting to WAV)
+                subprocess.run(['mpg123', '-q', cache_file], check=True)
+                
+                # Natural pause
+                if clean_text.endswith(("?", "!")):
+                    time.sleep(0.15)
+                elif clean_text.endswith("."):
+                    time.sleep(0.10)
+                else:
+                    time.sleep(0.05)
+
+            except Exception as e:
+                print(f"[TTS ERROR] {e}")
+                # Fallback to espeak if gTTS fails
+                try:
+                    if isinstance(data, dict):
+                        subprocess.run(['espeak-ng', '-v', 'hi' if data.get('lang') == 'hi' else 'en', clean_text], 
+                                     check=True, capture_output=True)
+                    else:
+                        subprocess.run(['espeak-ng', clean_text], check=True, capture_output=True)
+                except:
+                    pass
+    
+    except ImportError:
+        print("\n❌ gTTS not installed. Install with: pip install gtts")
+        print("Falling back to espeak-ng...\n")
+        espeak_tts_worker(tts_queue, "en-gb+f3" if voice_gender == "female" else "en-us+m3", language)
+
+
+# --------------------------------------------------------------
+# ESPEAK-NG TTS WORKER (Fallback)
+# --------------------------------------------------------------
+def espeak_tts_worker(tts_queue, voice="en-gb+f3", language="en"):
+    """Fallback to eSpeak-NG with Hindi support"""
+    
+    try:
+        subprocess.run(['espeak-ng', '--version'], 
+                      capture_output=True, text=True, timeout=2, check=True)
+    except:
+        print("\n❌ eSpeak-NG not found! Install with: sudo apt install espeak-ng")
+        return
+    
+    lang_name = "English" if language == "en" else "Hindi"
+    print(f"✅ Using eSpeak-NG ({voice} voice, {lang_name})\n")
+    
+    while True:
+        data = tts_queue.get()
+        if data == "__EXIT__":
+            break
+
+        try:
+            # Support for language switching
+            if isinstance(data, dict):
+                text = data['text']
+                current_lang = data.get('lang', language)
+            else:
+                text = data
+                current_lang = language
+            
+            clean_text = clean_text_for_speech(text)
+            
+            if not clean_text:
+                continue
+            
+            espeak_voice = 'hi' if current_lang == 'hi' else voice
+            subprocess.run(['espeak-ng', '-v', espeak_voice, '-s', '175', clean_text], 
+                         check=True, capture_output=True)
+            
+            if clean_text.endswith(("?", "!")):
+                time.sleep(0.15)
+            elif clean_text.endswith("."):
+                time.sleep(0.10)
+            else:
+                time.sleep(0.05)
+
+        except Exception as e:
+            print(f"[TTS ERROR] {e}")
+
+
+# --------------------------------------------------------------
+# MAIN ASSISTANT CLASS
+# --------------------------------------------------------------
+class TruckAssistant:
+    def __init__(self, model="llama3.2:3b-instruct-q4_K_M", base_url="http://localhost:11434", 
+                 voice_gender="female", use_gtts=True, language="en"):
+        self.model = model
+        self.base_url = base_url
+        self.conversation_history = []
+        self.language = language
+        
+        # Language-specific system prompts
+        self.system_prompts = {
+            "en": "You are a helpful AI assistant for truck drivers. Provide clear, concise, and practical answers.",
+            "hi": "आप ट्रक ड्राइवरों के लिए एक सहायक AI सहायक हैं। स्पष्ट, संक्षिप्त और व्यावहारिक उत्तर प्रदान करें। कृपया हिंदी में उत्तर दें।"
+        }
+
+        whisper_model = "tiny" if language == "hi" else "tiny.en"
+        print(f"Loading Whisper model ({whisper_model} - optimized for speed)...")
+        self.whisper = WhisperModel(
+            whisper_model, 
+            device="cpu", 
+            compute_type="int8",
+            num_workers=2
+        )
+
+        # TTS queue + process
+        self.tts_queue = Queue()
+        
+        if use_gtts:
+            self.tts_process = Process(
+                target=gtts_tts_worker, 
+                args=(self.tts_queue, voice_gender, language), 
+                daemon=True
+            )
+        else:
+            voice = "en-gb+f3" if voice_gender == "female" else "en-us+m3"
+            self.tts_process = Process(
+                target=espeak_tts_worker, 
+                args=(self.tts_queue, voice, language), 
+                daemon=True
+            )
+        
+        self.tts_process.start()
+
+    # ========== ADAPTIVE MIC RECORDING WITH VAD ==========
+    def record_audio(self, max_duration=5, samplerate=16000):
+        """Records audio with Voice Activity Detection"""
+        print("\nListening... Speak now.\n")
+        
+        silence_threshold = 0.01
+        silence_duration = 1.5
+        
+        chunk_size = int(0.1 * samplerate)
+        max_chunks = int(max_duration / 0.1)
+        
+        audio_chunks = []
+        silent_chunks = 0
+        speech_detected = False
+        
+        stream = sd.InputStream(samplerate=samplerate, channels=1, dtype='float32')
+        stream.start()
+        
+        for i in range(max_chunks):
+            chunk, _ = stream.read(chunk_size)
+            audio_chunks.append(chunk)
+            
+            energy = np.sqrt(np.mean(chunk**2))
+            
+            if energy > silence_threshold:
+                speech_detected = True
+                silent_chunks = 0
+            elif speech_detected:
+                silent_chunks += 1
+                
+                if silent_chunks > (silence_duration / 0.1):
+                    print(f"[Silence detected - stopping early after {(i+1)*0.1:.1f}s]")
+                    break
+        
+        stream.stop()
+        stream.close()
+        
+        audio = np.concatenate(audio_chunks, axis=0).flatten()
+        return audio
+
+    # ========== OPTIMIZED STT ==========
+    def speech_to_text(self, audio):
+        """Faster transcription with optimized parameters"""
+        print("Converting speech to text...")
+        
+        lang_code = "hi" if self.language == "hi" else "en"
+        
+        segments, info = self.whisper.transcribe(
+            audio, 
+            beam_size=1,
+            vad_filter=True,
+            language=lang_code,
+            condition_on_previous_text=False
+        )
+        
+        text = " ".join(seg.text for seg in segments).strip()
+        print(f"You said: {text}\n")
+        return text
+
+    # ========== VOICE CHAT PIPELINE ==========
+    def voice_chat(self):
+        audio = self.record_audio()
+        text = self.speech_to_text(audio)
+
+        if not text:
+            print("No speech detected. Try again.\n")
+            return
+
+        self.chat(text)
+
+    # ========== OPTIMIZED LLaMA CHAT WITH LIVE TTS ==========
+    def chat(self, prompt, stream=True):
+        url = f"{self.base_url}/api/chat"
+        
+        # Prepare messages with system prompt
+        messages = [{"role": "system", "content": self.system_prompts[self.language]}]
+        messages.extend(self.conversation_history)
+        messages.append({"role": "user", "content": prompt})
+
+        payload = {
+            "model": self.model,
+            "messages": messages,
+            "stream": stream,
+            "options": {
+                "temperature": 0.7,
+                "top_p": 0.9,
+                "num_predict": 150,
+                "num_ctx": 2048
+            }
+        }
+
+        print(f"\nAssistant: ", end="", flush=True)
+
+        start_time = time.time()
+        full_response = ""
+        token_count = 0
+
+        try:
+            response = requests.post(url, json=payload, stream=True, timeout=30)
+
+            if stream:
+                sentence_buffer = ""
+
+                for line in response.iter_lines():
+                    if not line:
+                        continue
+
+                    chunk = json.loads(line)
+
+                    if "message" in chunk and "content" in chunk["message"]:
+                        content = chunk["message"]["content"]
+
+                        print(content, end="", flush=True)
+
+                        full_response += content
+                        sentence_buffer += content
+                        token_count += 1
+
+                        # Sentence end detection (works for both English and Hindi)
+                        if any(sentence_buffer.endswith(p) for p in [".", "!", "?", ",", ";", "।", "?"]):
+                            stripped = sentence_buffer.strip()
+                            if len(stripped) > 5:
+                                self.tts_queue.put({"text": stripped, "lang": self.language})
+                                sentence_buffer = ""
+
+                if sentence_buffer.strip():
+                    self.tts_queue.put({"text": sentence_buffer.strip(), "lang": self.language})
+
+            else:
+                data = response.json()
+                full_response = data["message"]["content"]
+                print(full_response)
+                self.tts_queue.put({"text": full_response, "lang": self.language})
+
+            inference_time = time.time() - start_time
+            tokens_per_sec = token_count / inference_time if inference_time > 0 else 0
+            print(f"\n\n⚡ Time: {inference_time:.2f}s | Speed: {tokens_per_sec:.1f} tokens/sec")
+
+            self.conversation_history.append({"role": "user", "content": prompt})
+            self.conversation_history.append({"role": "assistant", "content": full_response})
+
+            return full_response
+
+        except Exception as e:
+            print(f"\n❌ Error: {e}")
+            return None
+
+    # ========== CLEANUP ==========
+    def stop(self):
+        self.tts_queue.put("__EXIT__")
+        self.tts_process.terminate()
+
+
+# --------------------------------------------------------------
+# MAIN
+# --------------------------------------------------------------
+def main():
+    print("\n🚀 Truck Assistant - Raspberry Pi 5")
+    print("🎤 Natural Human Voice (Google TTS)")
+    print("🌐 Multilingual Support (English & Hindi)\n")
+
+    # Language selection
+    print("Select Language:")
+    print("1. English")
+    print("2. Hindi (हिंदी)")
+    
+    lang_choice = input("\nLanguage (1 or 2, default=1): ").strip() or "1"
+    language = "en" if lang_choice == "1" else "hi"
+    
+    # Simple voice selection
+    print("\nSelect Voice:")
+    print("1. Female (Natural)")
+    print("2. Male (Natural)")
+    
+    voice_choice = input("\nVoice (1 or 2, default=1): ").strip() or "1"
+    voice_gender = "female" if voice_choice == "1" else "male"
+    
+    lang_display = "English" if language == "en" else "हिंदी"
+    print(f"\n✅ Language: {lang_display}")
+    print(f"✅ Voice: {voice_gender.capitalize()}")
+    print("📥 Installing dependencies if needed...\n")
+
+    assistant = TruckAssistant(voice_gender=voice_gender, use_gtts=True, language=language)
+
+    # Check Ollama
+    try:
+        requests.get("http://localhost:11434/api/tags", timeout=5)
+        print("✅ Ollama running\n")
+    except:
+        print("❌ Ollama not running. Start with: ollama serve\n")
+        return
+
+    print("="*60)
+    print("Mode:")
+    print("1. Demo")
+    print("2. Text chat")
+    print("3. Voice chat")
+    print("="*60)
+
+    mode = input("\nSelect (1-3): ").strip()
+
+    if mode == "3":
+        print("\n🎤 VOICE MODE - Press Enter to speak\n")
+        while True:
+            input("Press Enter...")
+            assistant.voice_chat()
+    else:
+        print("\n💬 TEXT MODE - type 'quit' to exit\n")
+        while True:
+            user_input = input("You: ").strip()
+            if user_input.lower() in ["quit", "exit", "q"]:
+                assistant.stop()
+                print("\n👋 Goodbye!")
+                break
+            if user_input:
+                assistant.chat(user_input)
+
+
+if __name__ == "__main__":
+    main()
--- a/ai_asistant1.py
+++ b/ai_asistant1.py
@ -0,0 +1,520 @@
+#!/usr/bin/env python3
+"""
+Truck HPC AI Assistant - POC Demo (OPTIMIZED + TRULY NATURAL VOICE + HINDI SUPPORT)
+Optimized for Raspberry Pi 5 with Ollama + Whisper STT + MaryTTS/Festival
+NATURAL VOICE: Downloads and uses better quality voices
+OFFLINE: 100% offline capability
+MULTILINGUAL: English and Hindi support
+FIXED: Auto-detects correct audio sample rate
+"""
+
+import requests
+import json
+import time
+import psutil
+import sounddevice as sd
+import numpy as np
+import subprocess
+import os
+import re
+import tempfile
+import wave
+from multiprocessing import Process, Queue
+from faster_whisper import WhisperModel
+from datetime import datetime
+
+# --------------------------------------------------------------
+# AUDIO DEVICE DETECTION
+# --------------------------------------------------------------
+def get_default_samplerate():
+    """Detect the default sample rate supported by the input device"""
+    try:
+        device_info = sd.query_devices(kind='input')
+        default_sr = int(device_info['default_samplerate'])
+        print(f"🎤 Detected audio device: {device_info['name']}")
+        print(f"🎵 Using sample rate: {default_sr} Hz")
+        return default_sr
+    except Exception as e:
+        print(f"⚠️ Could not detect sample rate, using 44100 Hz: {e}")
+        return 44100
+
+# --------------------------------------------------------------
+# TEXT CLEANING FUNCTION
+# --------------------------------------------------------------
+def clean_text_for_speech(text):
+    """Removes markdown formatting and special characters"""
+    text = re.sub(r'#{1,6}\s*', '', text)
+    text = re.sub(r'\*\*\*(.+?)\*\*\*', r'\1', text)
+    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
+    text = re.sub(r'__(.+?)__', r'\1', text)
+    text = re.sub(r'\*(.+?)\*', r'\1', text)
+    text = re.sub(r'_(.+?)_', r'\1', text)
+    text = re.sub(r'```[\w]*\n', '', text)
+    text = re.sub(r'```', '', text)
+    text = re.sub(r'`(.+?)`', r'\1', text)
+    text = re.sub(r'^[-*_]{3,}$', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\[(.+?)\]\(.+?\)', r'\1', text)
+    text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*>\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\s+', ' ', text)
+    return text.strip()
+
+
+# --------------------------------------------------------------
+# GTTS CACHED TTS WORKER (Natural Voice with Local Cache + Hindi Support)
+# --------------------------------------------------------------
+def gtts_tts_worker(tts_queue, voice_gender="female", language="en"):
+    """
+    Uses gTTS with local caching for natural voice.
+    First run needs internet to download, then works offline.
+    Supports English and Hindi.
+    """
+    try:
+        from gtts import gTTS
+        import hashlib
+        
+        # Create cache directory
+        cache_dir = os.path.expanduser("~/.cache/truck_assistant_tts")
+        os.makedirs(cache_dir, exist_ok=True)
+        
+        lang_name = "English" if language == "en" else "Hindi"
+        print(f"✅ Using Google TTS ({voice_gender} voice, {lang_name}) with local cache\n")
+        print("💡 First run needs internet, then works offline from cache\n")
+        
+        while True:
+            data = tts_queue.get()
+            if data == "__EXIT__":
+                break
+
+            try:
+                # Support for language switching
+                if isinstance(data, dict):
+                    text = data['text']
+                    current_lang = data.get('lang', language)
+                else:
+                    text = data
+                    current_lang = language
+                
+                clean_text = clean_text_for_speech(text)
+                
+                if not clean_text:
+                    continue
+                
+                # Create hash for caching (include language in hash)
+                text_hash = hashlib.md5(f"{current_lang}_{clean_text}".encode()).hexdigest()
+                cache_file = os.path.join(cache_dir, f"{text_hash}.mp3")
+                
+                # Check if cached
+                if not os.path.exists(cache_file):
+                    # Generate with gTTS (needs internet first time)
+                    if current_lang == "en":
+                        tld = "co.uk" if voice_gender == "female" else "com"
+                        tts = gTTS(text=clean_text, lang='en', tld=tld, slow=False)
+                    else:  # Hindi
+                        tts = gTTS(text=clean_text, lang='hi', slow=False)
+                    
+                    tts.save(cache_file)
+                
+                # Play using mpg123 (faster than converting to WAV)
+                subprocess.run(['mpg123', '-q', cache_file], check=True)
+                
+                # Natural pause
+                if clean_text.endswith(("?", "!")):
+                    time.sleep(0.15)
+                elif clean_text.endswith("."):
+                    time.sleep(0.10)
+                else:
+                    time.sleep(0.05)
+
+            except Exception as e:
+                print(f"[TTS ERROR] {e}")
+                # Fallback to espeak if gTTS fails
+                try:
+                    if isinstance(data, dict):
+                        subprocess.run(['espeak-ng', '-v', 'hi' if data.get('lang') == 'hi' else 'en', clean_text], 
+                                     check=True, capture_output=True)
+                    else:
+                        subprocess.run(['espeak-ng', clean_text], check=True, capture_output=True)
+                except:
+                    pass
+    
+    except ImportError:
+        print("\n❌ gTTS not installed. Install with: pip install gtts")
+        print("Falling back to espeak-ng...\n")
+        espeak_tts_worker(tts_queue, "en-gb+f3" if voice_gender == "female" else "en-us+m3", language)
+
+
+# --------------------------------------------------------------
+# ESPEAK-NG TTS WORKER (Fallback)
+# --------------------------------------------------------------
+def espeak_tts_worker(tts_queue, voice="en-gb+f3", language="en"):
+    """Fallback to eSpeak-NG with Hindi support"""
+    
+    try:
+        subprocess.run(['espeak-ng', '--version'], 
+                      capture_output=True, text=True, timeout=2, check=True)
+    except:
+        print("\n❌ eSpeak-NG not found! Install with: sudo apt install espeak-ng")
+        return
+    
+    lang_name = "English" if language == "en" else "Hindi"
+    print(f"✅ Using eSpeak-NG ({voice} voice, {lang_name})\n")
+    
+    while True:
+        data = tts_queue.get()
+        if data == "__EXIT__":
+            break
+
+        try:
+            # Support for language switching
+            if isinstance(data, dict):
+                text = data['text']
+                current_lang = data.get('lang', language)
+            else:
+                text = data
+                current_lang = language
+            
+            clean_text = clean_text_for_speech(text)
+            
+            if not clean_text:
+                continue
+            
+            espeak_voice = 'hi' if current_lang == 'hi' else voice
+            subprocess.run(['espeak-ng', '-v', espeak_voice, '-s', '175', clean_text], 
+                         check=True, capture_output=True)
+            
+            if clean_text.endswith(("?", "!")):
+                time.sleep(0.15)
+            elif clean_text.endswith("."):
+                time.sleep(0.10)
+            else:
+                time.sleep(0.05)
+
+        except Exception as e:
+            print(f"[TTS ERROR] {e}")
+
+
+# --------------------------------------------------------------
+# AUDIO RESAMPLING FUNCTION
+# --------------------------------------------------------------
+def resample_audio(audio, orig_sr, target_sr=16000):
+    """Resample audio to target sample rate for Whisper"""
+    if orig_sr == target_sr:
+        return audio
+    
+    # Simple resampling using linear interpolation
+    duration = len(audio) / orig_sr
+    target_length = int(duration * target_sr)
+    
+    from scipy import signal
+    resampled = signal.resample(audio, target_length)
+    return resampled.astype(np.float32)
+
+
+# --------------------------------------------------------------
+# MAIN ASSISTANT CLASS
+# --------------------------------------------------------------
+class TruckAssistant:
+    def __init__(self, model="llama3.2:3b-instruct-q4_K_M", base_url="http://localhost:11434", 
+                 voice_gender="female", use_gtts=True, language="en"):
+        self.model = model
+        self.base_url = base_url
+        self.conversation_history = []
+        self.language = language
+        
+        # Detect and store the device's native sample rate
+        self.native_samplerate = get_default_samplerate()
+        self.whisper_samplerate = 16000  # Whisper expects 16kHz
+        
+        # Language-specific system prompts
+        self.system_prompts = {
+            "en": "You are a helpful AI assistant for truck drivers. Provide clear, concise, and practical answers.",
+            "hi": "आप ट्रक ड्राइवरों के लिए एक सहायक AI सहायक हैं। स्पष्ट, संक्षिप्त और व्यावहारिक उत्तर प्रदान करें। कृपया हिंदी में उत्तर दें।"
+        }
+
+        whisper_model = "tiny" if language == "hi" else "tiny.en"
+        print(f"Loading Whisper model ({whisper_model} - optimized for speed)...")
+        self.whisper = WhisperModel(
+            whisper_model, 
+            device="cpu", 
+            compute_type="int8",
+            num_workers=2
+        )
+
+        # TTS queue + process
+        self.tts_queue = Queue()
+        
+        if use_gtts:
+            self.tts_process = Process(
+                target=gtts_tts_worker, 
+                args=(self.tts_queue, voice_gender, language), 
+                daemon=True
+            )
+        else:
+            voice = "en-gb+f3" if voice_gender == "female" else "en-us+m3"
+            self.tts_process = Process(
+                target=espeak_tts_worker, 
+                args=(self.tts_queue, voice, language), 
+                daemon=True
+            )
+        
+        self.tts_process.start()
+
+    # ========== ADAPTIVE MIC RECORDING WITH VAD ==========
+    def record_audio(self, max_duration=5):
+        """Records audio with Voice Activity Detection using device's native sample rate"""
+        print("\nListening... Speak now.\n")
+        
+        silence_threshold = 0.01
+        silence_duration = 1.5
+        
+        chunk_size = int(0.1 * self.native_samplerate)
+        max_chunks = int(max_duration / 0.1)
+        
+        audio_chunks = []
+        silent_chunks = 0
+        speech_detected = False
+        
+        try:
+            stream = sd.InputStream(
+                samplerate=self.native_samplerate, 
+                channels=1, 
+                dtype='float32'
+            )
+            stream.start()
+            
+            for i in range(max_chunks):
+                chunk, _ = stream.read(chunk_size)
+                audio_chunks.append(chunk)
+                
+                energy = np.sqrt(np.mean(chunk**2))
+                
+                if energy > silence_threshold:
+                    speech_detected = True
+                    silent_chunks = 0
+                elif speech_detected:
+                    silent_chunks += 1
+                    
+                    if silent_chunks > (silence_duration / 0.1):
+                        print(f"[Silence detected - stopping early after {(i+1)*0.1:.1f}s]")
+                        break
+            
+            stream.stop()
+            stream.close()
+            
+            audio = np.concatenate(audio_chunks, axis=0).flatten()
+            
+            # Resample to 16kHz for Whisper
+            if self.native_samplerate != self.whisper_samplerate:
+                print(f"Resampling audio from {self.native_samplerate}Hz to {self.whisper_samplerate}Hz...")
+                audio = resample_audio(audio, self.native_samplerate, self.whisper_samplerate)
+            
+            return audio
+            
+        except Exception as e:
+            print(f"❌ Recording error: {e}")
+            return None
+
+    # ========== OPTIMIZED STT ==========
+    def speech_to_text(self, audio):
+        """Faster transcription with optimized parameters"""
+        if audio is None:
+            return ""
+            
+        print("Converting speech to text...")
+        
+        lang_code = "hi" if self.language == "hi" else "en"
+        
+        try:
+            segments, info = self.whisper.transcribe(
+                audio, 
+                beam_size=1,
+                vad_filter=True,
+                language=lang_code,
+                condition_on_previous_text=False
+            )
+            
+            text = " ".join(seg.text for seg in segments).strip()
+            print(f"You said: {text}\n")
+            return text
+        except Exception as e:
+            print(f"❌ Transcription error: {e}")
+            return ""
+
+    # ========== VOICE CHAT PIPELINE ==========
+    def voice_chat(self):
+        audio = self.record_audio()
+        
+        if audio is None:
+            print("Recording failed. Try again.\n")
+            return
+            
+        text = self.speech_to_text(audio)
+
+        if not text:
+            print("No speech detected. Try again.\n")
+            return
+
+        self.chat(text)
+
+    # ========== OPTIMIZED LLaMA CHAT WITH LIVE TTS ==========
+    def chat(self, prompt, stream=True):
+        url = f"{self.base_url}/api/chat"
+        
+        # Prepare messages with system prompt
+        messages = [{"role": "system", "content": self.system_prompts[self.language]}]
+        messages.extend(self.conversation_history)
+        messages.append({"role": "user", "content": prompt})
+
+        payload = {
+            "model": self.model,
+            "messages": messages,
+            "stream": stream,
+            "options": {
+                "temperature": 0.7,
+                "top_p": 0.9,
+                "num_predict": 150,
+                "num_ctx": 2048
+            }
+        }
+
+        print(f"\nAssistant: ", end="", flush=True)
+
+        start_time = time.time()
+        full_response = ""
+        token_count = 0
+
+        try:
+            response = requests.post(url, json=payload, stream=True, timeout=30)
+
+            if stream:
+                sentence_buffer = ""
+
+                for line in response.iter_lines():
+                    if not line:
+                        continue
+
+                    chunk = json.loads(line)
+
+                    if "message" in chunk and "content" in chunk["message"]:
+                        content = chunk["message"]["content"]
+
+                        print(content, end="", flush=True)
+
+                        full_response += content
+                        sentence_buffer += content
+                        token_count += 1
+
+                        # Sentence end detection (works for both English and Hindi)
+                        if any(sentence_buffer.endswith(p) for p in [".", "!", "?", ",", ";", "।", "?"]):
+                            stripped = sentence_buffer.strip()
+                            if len(stripped) > 5:
+                                self.tts_queue.put({"text": stripped, "lang": self.language})
+                                sentence_buffer = ""
+
+                if sentence_buffer.strip():
+                    self.tts_queue.put({"text": sentence_buffer.strip(), "lang": self.language})
+
+            else:
+                data = response.json()
+                full_response = data["message"]["content"]
+                print(full_response)
+                self.tts_queue.put({"text": full_response, "lang": self.language})
+
+            inference_time = time.time() - start_time
+            tokens_per_sec = token_count / inference_time if inference_time > 0 else 0
+            print(f"\n\n⚡ Time: {inference_time:.2f}s | Speed: {tokens_per_sec:.1f} tokens/sec")
+
+            self.conversation_history.append({"role": "user", "content": prompt})
+            self.conversation_history.append({"role": "assistant", "content": full_response})
+
+            return full_response
+
+        except Exception as e:
+            print(f"\n❌ Error: {e}")
+            return None
+
+    # ========== CLEANUP ==========
+    def stop(self):
+        self.tts_queue.put("__EXIT__")
+        self.tts_process.terminate()
+
+
+# --------------------------------------------------------------
+# MAIN
+# --------------------------------------------------------------
+def main():
+    print("\n🚀 Truck Assistant - Raspberry Pi 5")
+    print("🎤 Natural Human Voice (Google TTS)")
+    print("🌐 Multilingual Support (English & Hindi)\n")
+
+    # Language selection
+    print("Select Language:")
+    print("1. English")
+    print("2. Hindi (हिंदी)")
+    
+    lang_choice = input("\nLanguage (1 or 2, default=1): ").strip() or "1"
+    language = "en" if lang_choice == "1" else "hi"
+    
+    # Simple voice selection
+    print("\nSelect Voice:")
+    print("1. Female (Natural)")
+    print("2. Male (Natural)")
+    
+    voice_choice = input("\nVoice (1 or 2, default=1): ").strip() or "1"
+    voice_gender = "female" if voice_choice == "1" else "male"
+    
+    lang_display = "English" if language == "en" else "हिंदी"
+    print(f"\n✅ Language: {lang_display}")
+    print(f"✅ Voice: {voice_gender.capitalize()}")
+    print("📥 Installing dependencies if needed...\n")
+
+    assistant = TruckAssistant(voice_gender=voice_gender, use_gtts=True, language=language)
+
+    # Check Ollama
+    try:
+        requests.get("http://localhost:11434/api/tags", timeout=5)
+        print("✅ Ollama running\n")
+    except:
+        print("❌ Ollama not running. Start with: ollama serve\n")
+        return
+
+    print("="*60)
+    print("Mode:")
+    print("1. Demo")
+    print("2. Text chat")
+    print("3. Voice chat")
+    print("="*60)
+
+    mode = input("\nSelect (1-3): ").strip()
+
+    if mode == "3":
+        print("\n🎤 VOICE MODE - Press Enter to speak\n")
+        while True:
+            try:
+                input("Press Enter...")
+                assistant.voice_chat()
+            except KeyboardInterrupt:
+                print("\n\n👋 Goodbye!")
+                assistant.stop()
+                break
+    else:
+        print("\n💬 TEXT MODE - type 'quit' to exit\n")
+        while True:
+            try:
+                user_input = input("You: ").strip()
+                if user_input.lower() in ["quit", "exit", "q"]:
+                    assistant.stop()
+                    print("\n👋 Goodbye!")
+                    break
+                if user_input:
+                    assistant.chat(user_input)
+            except KeyboardInterrupt:
+                print("\n\n👋 Goodbye!")
+                assistant.stop()
+                break
+
+
+if __name__ == "__main__":
+    main()
--- a/ai_assistant2.py
+++ b/ai_assistant2.py
@ -0,0 +1,531 @@
+#!/usr/bin/env python3
+"""
+Truck HPC AI Assistant - POC Demo (OPTIMIZED + TRULY NATURAL VOICE + HINDI SUPPORT)
+Optimized for Raspberry Pi 5 with Ollama + Whisper STT + MaryTTS/Festival
+NATURAL VOICE: Downloads and uses better quality voices
+OFFLINE: 100% offline capability
+MULTILINGUAL: English and Hindi support
+FIXED: Auto-detects correct audio sample rate
+FIXED: Proper loop control - waits for speech to complete before next input
+"""
+
+import requests
+import json
+import time
+import psutil
+import sounddevice as sd
+import numpy as np
+import subprocess
+import os
+import re
+import tempfile
+import wave
+from multiprocessing import Process, Queue
+from faster_whisper import WhisperModel
+from datetime import datetime
+
+# --------------------------------------------------------------
+# AUDIO DEVICE DETECTION
+# --------------------------------------------------------------
+def get_default_samplerate():
+    """Detect the default sample rate supported by the input device"""
+    try:
+        device_info = sd.query_devices(kind='input')
+        default_sr = int(device_info['default_samplerate'])
+        print(f"🎤 Detected audio device: {device_info['name']}")
+        print(f"🎵 Using sample rate: {default_sr} Hz")
+        return default_sr
+    except Exception as e:
+        print(f"⚠️ Could not detect sample rate, using 44100 Hz: {e}")
+        return 44100
+
+# --------------------------------------------------------------
+# TEXT CLEANING FUNCTION
+# --------------------------------------------------------------
+def clean_text_for_speech(text):
+    """Removes markdown formatting and special characters"""
+    text = re.sub(r'#{1,6}\s*', '', text)
+    text = re.sub(r'\*\*\*(.+?)\*\*\*', r'\1', text)
+    text = re.sub(r'\*\*(.+?)\*\*', r'\1', text)
+    text = re.sub(r'__(.+?)__', r'\1', text)
+    text = re.sub(r'\*(.+?)\*', r'\1', text)
+    text = re.sub(r'_(.+?)_', r'\1', text)
+    text = re.sub(r'```[\w]*\n', '', text)
+    text = re.sub(r'```', '', text)
+    text = re.sub(r'`(.+?)`', r'\1', text)
+    text = re.sub(r'^[-*_]{3,}$', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\[(.+?)\]\(.+?\)', r'\1', text)
+    text = re.sub(r'^\s*[-*+]\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*\d+\.\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'^\s*>\s+', '', text, flags=re.MULTILINE)
+    text = re.sub(r'\s+', ' ', text)
+    return text.strip()
+
+
+# --------------------------------------------------------------
+# GTTS CACHED TTS WORKER (Natural Voice with Local Cache + Hindi Support)
+# --------------------------------------------------------------
+def gtts_tts_worker(tts_queue, voice_gender="female", language="en"):
+    """
+    Uses gTTS with local caching for natural voice.
+    First run needs internet to download, then works offline.
+    Supports English and Hindi.
+    """
+    try:
+        from gtts import gTTS
+        import hashlib
+        
+        # Create cache directory
+        cache_dir = os.path.expanduser("~/.cache/truck_assistant_tts")
+        os.makedirs(cache_dir, exist_ok=True)
+        
+        lang_name = "English" if language == "en" else "Hindi"
+        print(f"✅ Using Google TTS ({voice_gender} voice, {lang_name}) with local cache\n")
+        print("💡 First run needs internet, then works offline from cache\n")
+        
+        while True:
+            data = tts_queue.get()
+            if data == "__EXIT__":
+                break
+
+            try:
+                # Support for language switching
+                if isinstance(data, dict):
+                    text = data['text']
+                    current_lang = data.get('lang', language)
+                else:
+                    text = data
+                    current_lang = language
+                
+                clean_text = clean_text_for_speech(text)
+                
+                if not clean_text:
+                    continue
+                
+                # Create hash for caching (include language in hash)
+                text_hash = hashlib.md5(f"{current_lang}_{clean_text}".encode()).hexdigest()
+                cache_file = os.path.join(cache_dir, f"{text_hash}.mp3")
+                
+                # Check if cached
+                if not os.path.exists(cache_file):
+                    # Generate with gTTS (needs internet first time)
+                    if current_lang == "en":
+                        tld = "co.uk" if voice_gender == "female" else "com"
+                        tts = gTTS(text=clean_text, lang='en', tld=tld, slow=False)
+                    else:  # Hindi
+                        tts = gTTS(text=clean_text, lang='hi', slow=False)
+                    
+                    tts.save(cache_file)
+                
+                # Play using mpg123 (faster than converting to WAV)
+                subprocess.run(['mpg123', '-q', cache_file], check=True)
+                
+                # Natural pause
+                if clean_text.endswith(("?", "!")):
+                    time.sleep(0.15)
+                elif clean_text.endswith("."):
+                    time.sleep(0.10)
+                else:
+                    time.sleep(0.05)
+
+            except Exception as e:
+                print(f"[TTS ERROR] {e}")
+                # Fallback to espeak if gTTS fails
+                try:
+                    if isinstance(data, dict):
+                        subprocess.run(['espeak-ng', '-v', 'hi' if data.get('lang') == 'hi' else 'en', clean_text], 
+                                     check=True, capture_output=True)
+                    else:
+                        subprocess.run(['espeak-ng', clean_text], check=True, capture_output=True)
+                except:
+                    pass
+    
+    except ImportError:
+        print("\n❌ gTTS not installed. Install with: pip install gtts")
+        print("Falling back to espeak-ng...\n")
+        espeak_tts_worker(tts_queue, "en-gb+f3" if voice_gender == "female" else "en-us+m3", language)
+
+
+# --------------------------------------------------------------
+# ESPEAK-NG TTS WORKER (Fallback)
+# --------------------------------------------------------------
+def espeak_tts_worker(tts_queue, voice="en-gb+f3", language="en"):
+    """Fallback to eSpeak-NG with Hindi support"""
+    
+    try:
+        subprocess.run(['espeak-ng', '--version'], 
+                      capture_output=True, text=True, timeout=2, check=True)
+    except:
+        print("\n❌ eSpeak-NG not found! Install with: sudo apt install espeak-ng")
+        return
+    
+    lang_name = "English" if language == "en" else "Hindi"
+    print(f"✅ Using eSpeak-NG ({voice} voice, {lang_name})\n")
+    
+    while True:
+        data = tts_queue.get()
+        if data == "__EXIT__":
+            break
+
+        try:
+            # Support for language switching
+            if isinstance(data, dict):
+                text = data['text']
+                current_lang = data.get('lang', language)
+            else:
+                text = data
+                current_lang = language
+            
+            clean_text = clean_text_for_speech(text)
+            
+            if not clean_text:
+                continue
+            
+            espeak_voice = 'hi' if current_lang == 'hi' else voice
+            subprocess.run(['espeak-ng', '-v', espeak_voice, '-s', '175', clean_text], 
+                         check=True, capture_output=True)
+            
+            if clean_text.endswith(("?", "!")):
+                time.sleep(0.15)
+            elif clean_text.endswith("."):
+                time.sleep(0.10)
+            else:
+                time.sleep(0.05)
+
+        except Exception as e:
+            print(f"[TTS ERROR] {e}")
+
+
+# --------------------------------------------------------------
+# AUDIO RESAMPLING FUNCTION
+# --------------------------------------------------------------
+def resample_audio(audio, orig_sr, target_sr=16000):
+    """Resample audio to target sample rate for Whisper"""
+    if orig_sr == target_sr:
+        return audio
+    
+    # Simple resampling using linear interpolation
+    duration = len(audio) / orig_sr
+    target_length = int(duration * target_sr)
+    
+    from scipy import signal
+    resampled = signal.resample(audio, target_length)
+    return resampled.astype(np.float32)
+
+
+# --------------------------------------------------------------
+# MAIN ASSISTANT CLASS
+# --------------------------------------------------------------
+class TruckAssistant:
+    def __init__(self, model="llama3.2:3b-instruct-q4_K_M", base_url="http://localhost:11434", 
+                 voice_gender="female", use_gtts=True, language="en"):
+        self.model = model
+        self.base_url = base_url
+        self.conversation_history = []
+        self.language = language
+        
+        # Detect and store the device's native sample rate
+        self.native_samplerate = get_default_samplerate()
+        self.whisper_samplerate = 16000  # Whisper expects 16kHz
+        
+        # Language-specific system prompts
+        self.system_prompts = {
+            "en": "You are a helpful AI assistant for truck drivers. Provide clear, concise, and practical answers.",
+            "hi": "आप ट्रक ड्राइवरों के लिए एक सहायक AI सहायक हैं। स्पष्ट, संक्षिप्त और व्यावहारिक उत्तर प्रदान करें। कृपया हिंदी में उत्तर दें।"
+        }
+
+        whisper_model = "tiny" if language == "hi" else "tiny.en"
+        print(f"Loading Whisper model ({whisper_model} - optimized for speed)...")
+        self.whisper = WhisperModel(
+            whisper_model, 
+            device="cpu", 
+            compute_type="int8",
+            num_workers=2
+        )
+
+        # TTS queue + process
+        self.tts_queue = Queue()
+        
+        if use_gtts:
+            self.tts_process = Process(
+                target=gtts_tts_worker, 
+                args=(self.tts_queue, voice_gender, language), 
+                daemon=True
+            )
+        else:
+            voice = "en-gb+f3" if voice_gender == "female" else "en-us+m3"
+            self.tts_process = Process(
+                target=espeak_tts_worker, 
+                args=(self.tts_queue, voice, language), 
+                daemon=True
+            )
+        
+        self.tts_process.start()
+
+    # ========== ADAPTIVE MIC RECORDING WITH VAD ==========
+    def record_audio(self, max_duration=5):
+        """Records audio with Voice Activity Detection using device's native sample rate"""
+        print("\nListening... Speak now.\n")
+        
+        silence_threshold = 0.01
+        silence_duration = 1.5
+        
+        chunk_size = int(0.1 * self.native_samplerate)
+        max_chunks = int(max_duration / 0.1)
+        
+        audio_chunks = []
+        silent_chunks = 0
+        speech_detected = False
+        
+        try:
+            stream = sd.InputStream(
+                samplerate=self.native_samplerate, 
+                channels=1, 
+                dtype='float32'
+            )
+            stream.start()
+            
+            for i in range(max_chunks):
+                chunk, _ = stream.read(chunk_size)
+                audio_chunks.append(chunk)
+                
+                energy = np.sqrt(np.mean(chunk**2))
+                
+                if energy > silence_threshold:
+                    speech_detected = True
+                    silent_chunks = 0
+                elif speech_detected:
+                    silent_chunks += 1
+                    
+                    if silent_chunks > (silence_duration / 0.1):
+                        print(f"[Silence detected - stopping early after {(i+1)*0.1:.1f}s]")
+                        break
+            
+            stream.stop()
+            stream.close()
+            
+            audio = np.concatenate(audio_chunks, axis=0).flatten()
+            
+            # Resample to 16kHz for Whisper
+            if self.native_samplerate != self.whisper_samplerate:
+                print(f"Resampling audio from {self.native_samplerate}Hz to {self.whisper_samplerate}Hz...")
+                audio = resample_audio(audio, self.native_samplerate, self.whisper_samplerate)
+            
+            return audio
+            
+        except Exception as e:
+            print(f"❌ Recording error: {e}")
+            return None
+
+    # ========== OPTIMIZED STT ==========
+    def speech_to_text(self, audio):
+        """Faster transcription with optimized parameters"""
+        if audio is None:
+            return ""
+            
+        print("Converting speech to text...")
+        
+        lang_code = "hi" if self.language == "hi" else "en"
+        
+        try:
+            segments, info = self.whisper.transcribe(
+                audio, 
+                beam_size=1,
+                vad_filter=True,
+                language=lang_code,
+                condition_on_previous_text=False
+            )
+            
+            text = " ".join(seg.text for seg in segments).strip()
+            print(f"You said: {text}\n")
+            return text
+        except Exception as e:
+            print(f"❌ Transcription error: {e}")
+            return ""
+
+    # ========== VOICE CHAT PIPELINE ==========
+    def voice_chat(self):
+        audio = self.record_audio()
+        
+        if audio is None:
+            print("Recording failed. Try again.\n")
+            return
+            
+        text = self.speech_to_text(audio)
+
+        if not text:
+            print("No speech detected. Try again.\n")
+            return
+
+        self.chat(text)
+
+    # ========== OPTIMIZED LLaMA CHAT WITH LIVE TTS ==========
+    def chat(self, prompt, stream=True):
+        url = f"{self.base_url}/api/chat"
+        
+        # Prepare messages with system prompt
+        messages = [{"role": "system", "content": self.system_prompts[self.language]}]
+        messages.extend(self.conversation_history)
+        messages.append({"role": "user", "content": prompt})
+
+        payload = {
+            "model": self.model,
+            "messages": messages,
+            "stream": stream,
+            "options": {
+                "temperature": 0.7,
+                "top_p": 0.9,
+                "num_predict": 150,
+                "num_ctx": 2048
+            }
+        }
+
+        print(f"\nAssistant: ", end="", flush=True)
+
+        start_time = time.time()
+        full_response = ""
+        token_count = 0
+
+        try:
+            response = requests.post(url, json=payload, stream=True, timeout=30)
+
+            if stream:
+                sentence_buffer = ""
+
+                for line in response.iter_lines():
+                    if not line:
+                        continue
+
+                    chunk = json.loads(line)
+
+                    if "message" in chunk and "content" in chunk["message"]:
+                        content = chunk["message"]["content"]
+
+                        print(content, end="", flush=True)
+
+                        full_response += content
+                        sentence_buffer += content
+                        token_count += 1
+
+                        # Sentence end detection (works for both English and Hindi)
+                        if any(sentence_buffer.endswith(p) for p in [".", "!", "?", ",", ";", "।", "?"]):
+                            stripped = sentence_buffer.strip()
+                            if len(stripped) > 5:
+                                self.tts_queue.put({"text": stripped, "lang": self.language})
+                                sentence_buffer = ""
+
+                if sentence_buffer.strip():
+                    self.tts_queue.put({"text": sentence_buffer.strip(), "lang": self.language})
+
+            else:
+                data = response.json()
+                full_response = data["message"]["content"]
+                print(full_response)
+                self.tts_queue.put({"text": full_response, "lang": self.language})
+
+            inference_time = time.time() - start_time
+            tokens_per_sec = token_count / inference_time if inference_time > 0 else 0
+            print(f"\n\n⚡ Time: {inference_time:.2f}s | Speed: {tokens_per_sec:.1f} tokens/sec")
+
+            self.conversation_history.append({"role": "user", "content": prompt})
+            self.conversation_history.append({"role": "assistant", "content": full_response})
+
+            # Wait for TTS queue to be empty (all speech completed)
+            print("\n[Waiting for speech to complete...]")
+            while not self.tts_queue.empty():
+                time.sleep(0.1)
+            
+            # Additional small delay to ensure the last audio finishes playing
+            time.sleep(0.5)
+
+            return full_response
+
+        except Exception as e:
+            print(f"\n❌ Error: {e}")
+            return None
+
+    # ========== CLEANUP ==========
+    def stop(self):
+        self.tts_queue.put("__EXIT__")
+        self.tts_process.terminate()
+
+
+# --------------------------------------------------------------
+# MAIN
+# --------------------------------------------------------------
+def main():
+    print("\n🚀 Truck Assistant - Raspberry Pi 5")
+    print("🎤 Natural Human Voice (Google TTS)")
+    print("🌐 Multilingual Support (English & Hindi)\n")
+
+    # Language selection
+    print("Select Language:")
+    print("1. English")
+    print("2. Hindi (हिंदी)")
+    
+    lang_choice = input("\nLanguage (1 or 2, default=1): ").strip() or "1"
+    language = "en" if lang_choice == "1" else "hi"
+    
+    # Simple voice selection
+    print("\nSelect Voice:")
+    print("1. Female (Natural)")
+    print("2. Male (Natural)")
+    
+    voice_choice = input("\nVoice (1 or 2, default=1): ").strip() or "1"
+    voice_gender = "female" if voice_choice == "1" else "male"
+    
+    lang_display = "English" if language == "en" else "हिंदी"
+    print(f"\n✅ Language: {lang_display}")
+    print(f"✅ Voice: {voice_gender.capitalize()}")
+    print("📥 Installing dependencies if needed...\n")
+
+    assistant = TruckAssistant(voice_gender=voice_gender, use_gtts=True, language=language)
+
+    # Check Ollama
+    try:
+        requests.get("http://localhost:11434/api/tags", timeout=5)
+        print("✅ Ollama running\n")
+    except:
+        print("❌ Ollama not running. Start with: ollama serve\n")
+        return
+
+    print("="*60)
+    print("Mode:")
+    print("1. Demo")
+    print("2. Text chat")
+    print("3. Voice chat")
+    print("="*60)
+
+    mode = input("\nSelect (1-3): ").strip()
+
+    if mode == "3":
+        print("\n🎤 VOICE MODE - Press Enter to speak, Ctrl+C to exit\n")
+        try:
+            while True:
+                input("Press Enter to speak...")
+                assistant.voice_chat()
+                print("\n" + "="*60 + "\n")
+        except KeyboardInterrupt:
+            print("\n\n👋 Exiting gracefully...")
+            assistant.stop()
+            print("Goodbye!")
+    else:
+        print("\n💬 TEXT MODE - type 'quit' to exit\n")
+        try:
+            while True:
+                user_input = input("You: ").strip()
+                if user_input.lower() in ["quit", "exit", "q"]:
+                    assistant.stop()
+                    print("\n👋 Goodbye!")
+                    break
+                if user_input:
+                    assistant.chat(user_input)
+                    print("\n" + "="*60 + "\n")
+        except KeyboardInterrupt:
+            print("\n\n👋 Exiting gracefully...")
+            assistant.stop()
+            print("Goodbye!")
+
+
+if __name__ == "__main__":
+    main()