Spaces:

sagar007
/

DeepSeekR1_Search

Runtime error

App Files Files Community

sagar007 commited on 9 days ago

Commit

b8c63a2

verified ·

1 Parent(s): a6e4f9f

Update app.py

Browse files

Files changed (1) hide show

app.py +375 -153

app.py CHANGED Viewed

@@ -9,43 +9,42 @@ import os
 import subprocess
 import numpy as np
 from typing import List, Dict, Tuple, Any
-# Install required dependencies for Kokoro with better error handling
 try:
-    subprocess.run(['git', 'lfs', 'install'], check=True)
-    if not os.path.exists('Kokoro-82M'):
-        subprocess.run(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M'], check=True)
-    # Try installing espeak with proper package manager commands
-    try:
-        subprocess.run(['apt-get', 'update'], check=True)
-        subprocess.run(['apt-get', 'install', '-y', 'espeak'], check=True)
-    except subprocess.CalledProcessError:
-        print("Warning: Could not install espeak. Attempting espeak-ng...")
-        try:
-            subprocess.run(['apt-get', 'install', '-y', 'espeak-ng'], check=True)
-        except subprocess.CalledProcessError:
-            print("Warning: Could not install espeak or espeak-ng. TTS functionality may be limited.")
 except Exception as e:
-    print(f"Warning: Initial setup error: {str(e)}")
-    print("Continuing with limited functionality...")
-# --- Initialization (Do this ONCE) ---
-model_name = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-tokenizer.pad_token = tokenizer.eos_token
-# Initialize DeepSeek model
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    device_map="auto",
-    offload_folder="offload",
-    low_cpu_mem_usage=True,
-    torch_dtype=torch.float16
-)
-# Initialize Kokoro TTS (with error handling)
 VOICE_CHOICES = {
     '🇺🇸 Female (Default)': 'af',
     '🇺🇸 Bella': 'af_bella',
@@ -54,41 +53,81 @@ VOICE_CHOICES = {
 }
 TTS_ENABLED = False
 TTS_MODEL = None
-VOICEPACK = None
-try:
-    if os.path.exists('Kokoro-82M'):
-        import sys
-        sys.path.append('Kokoro-82M')
-        from models import build_model  # type: ignore
-        from kokoro import generate  # type: ignore
-        device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        TTS_MODEL = build_model('Kokoro-82M/kokoro-v0_19.pth', device)
-        # Load default voice
         try:
-            VOICEPACK = torch.load('Kokoro-82M/voices/af.pt', map_location=device, weights_only=True)
-        except Exception as e:
-            print(f"Warning: Could not load default voice: {e}")
-            raise
-        TTS_ENABLED = True
-    else:
-        print("Warning: Kokoro-82M directory not found. TTS disabled.")
-except Exception as e:
-    print(f"Warning: Could not initialize Kokoro TTS: {str(e)}")
-    TTS_ENABLED = False
-def get_web_results(query: str, max_results: int = 5) -> List[Dict[str, str]]:
-    """Get web search results using DuckDuckGo"""
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=max_results))
             return [{
                 "title": result.get("title", ""),
-                "snippet": result["body"],
-                "url": result["href"],
                 "date": result.get("published", "")
             } for result in results]
     except Exception as e:
@@ -116,23 +155,24 @@ def format_sources(web_results: List[Dict[str, str]]) -> str:
     sources_html = "<div class='sources-container'>"
     for i, res in enumerate(web_results, 1):
         title = res["title"] or "Source"
-        date = f"<span class='source-date'>{res['date']}</span>" if res['date'] else ""
         sources_html += f"""
         <div class='source-item'>
             <div class='source-number'>[{i}]</div>
             <div class='source-content'>
                 <a href="{res['url']}" target="_blank" class='source-title'>{title}</a>
                 {date}
-                <div class='source-snippet'>{res['snippet'][:150]}...</div>
             </div>
         </div>
         """
     sources_html += "</div>"
     return sources_html
-@spaces.GPU(duration=30)
 def generate_answer(prompt: str) -> str:
-    """Generate answer using the DeepSeek model"""
     inputs = tokenizer(
         prompt,
         return_tensors="pt",
@@ -142,52 +182,56 @@ def generate_answer(prompt: str) -> str:
         return_attention_mask=True
     ).to(model.device)
-    outputs = model.generate(
-        inputs.input_ids,
-        attention_mask=inputs.attention_mask,
-        max_new_tokens=256,
-        temperature=0.7,
-        top_p=0.95,
-        pad_token_id=tokenizer.eos_token_id,
-        do_sample=True,
-        early_stopping=True
-    )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
-@spaces.GPU(duration=30)
-def generate_speech_with_gpu(text: str, voice_name: str = 'af', tts_model=TTS_MODEL, voicepack=VOICEPACK) -> Tuple[int, np.ndarray] | None:
-    """Generate speech from text using Kokoro TTS model."""
-    if not TTS_ENABLED or tts_model is None:
-        print("TTS is not enabled or model is not loaded.")
         return None
     try:
         device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        # Handle voicepack loading
-        voice_file = f'Kokoro-82M/voices/{voice_name}.pt'
-        if voice_name == 'af' and voicepack is not None:
-            # Use the pre-loaded default voicepack
-            pass
-        elif os.path.exists(voice_file):
-            # Load the selected voicepack if it exists
-            voicepack = torch.load(voice_file, map_location=device, weights_only=True)
-        else:
-            # Fall back to default 'af' if selected voicepack is missing
-            print(f"Voicepack {voice_name}.pt not found. Falling back to default 'af'.")
-            voice_file = 'Kokoro-82M/voices/af.pt'
-            if os.path.exists(voice_file):
-                voicepack = torch.load(voice_file, map_location=device, weights_only=True)
             else:
-                print("Default voicepack 'af.pt' not found. Cannot generate audio.")
-                return None
         # Clean the text
         clean_text = ' '.join([line for line in text.split('\n') if not line.startswith('#')])
         clean_text = clean_text.replace('[', '').replace(']', '').replace('*', '')
         # Split long text into chunks
-        max_chars = 1000
         chunks = []
         if len(clean_text) > max_chars:
             sentences = clean_text.split('.')
@@ -207,7 +251,7 @@ def generate_speech_with_gpu(text: str, voice_name: str = 'af', tts_model=TTS_MO
         audio_chunks = []
         for chunk in chunks:
             if chunk.strip():
-                chunk_audio, _ = generate(tts_model, chunk, voicepack, lang='a')
                 if isinstance(chunk_audio, torch.Tensor):
                     chunk_audio = chunk_audio.cpu().numpy()
                 audio_chunks.append(chunk_audio)
@@ -215,35 +259,61 @@ def generate_speech_with_gpu(text: str, voice_name: str = 'af', tts_model=TTS_MO
         # Concatenate chunks
         if audio_chunks:
             final_audio = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
-            return (24000, final_audio)
-        else:
-            return None
     except Exception as e:
         print(f"Error generating speech: {str(e)}")
         return None
 def process_query(query: str, history: List[List[str]], selected_voice: str = 'af'):
-    """Process user query with streaming effect"""
     try:
         if history is None:
             history = []
-        # Get web results first
-        web_results = get_web_results(query)
-        sources_html = format_sources(web_results)
         current_history = history + [[query, "*Searching...*"]]
         # Yield initial searching state
         yield (
             "*Searching & Thinking...*",  # answer_output (Markdown)
-            sources_html,                  # sources_output (HTML)
             "Searching...",               # search_btn (Button)
             current_history,              # chat_history_display (Chatbot)
             None                          # audio_output (Audio)
         )
         # Generate answer
         prompt = format_prompt(query, web_results)
         answer = generate_answer(prompt)
@@ -251,26 +321,27 @@ def process_query(query: str, history: List[List[str]], selected_voice: str = 'a
         # Update history before TTS
         updated_history = history + [[query, final_answer]]
-        # Generate speech from the answer (only if enabled)
-        if TTS_ENABLED:
-            yield (
-                final_answer,              # answer_output
-                sources_html,              # sources_output
-                "Generating audio...",     # search_btn
-                updated_history,           # chat_history_display
-                None                       # audio_output
-            )
             try:
-                audio = generate_speech_with_gpu(final_answer, selected_voice)
                 if audio is None:
                     final_answer += "\n\n*Audio generation failed. The voicepack may be missing or incompatible.*"
             except Exception as e:
                 final_answer += f"\n\n*Error generating audio: {str(e)}*"
-                audio = None
         else:
-            final_answer += "\n\n*TTS is disabled. Audio not available.*"
-            audio = None
         # Yield final result
         yield (
@@ -278,7 +349,7 @@ def process_query(query: str, history: List[List[str]], selected_voice: str = 'a
             sources_html,              # sources_output
             "Search",                  # search_btn
             updated_history,           # chat_history_display
-            audio if audio is not None else None  # audio_output
         )
     except Exception as e:
@@ -287,13 +358,13 @@ def process_query(query: str, history: List[List[str]], selected_voice: str = 'a
             error_message = "⚠️ GPU quota exceeded. Please try again later when the daily quota resets."
         yield (
             f"Error: {error_message}",  # answer_output
-            sources_html,               # sources_output
             "Search",                   # search_btn
             history + [[query, f"*Error: {error_message}*"]],  # chat_history_display
             None                        # audio_output
         )
-# Update the CSS for better contrast and readability
 css = """
 .gradio-container {
     max-width: 1200px !important;
@@ -303,36 +374,44 @@ css = """
     text-align: center;
     margin-bottom: 2rem;
     padding: 2rem 0;
-    background: #1a1b1e;
     border-radius: 12px;
     color: white;
 }
 #header h1 {
     color: white;
     font-size: 2.5rem;
     margin-bottom: 0.5rem;
 }
 #header h3 {
     color: #a8a9ab;
 }
 .search-container {
-    background: #1a1b1e;
     border-radius: 12px;
-    box-shadow: 0 4px 12px rgba(0,0,0,0.1);
-    padding: 1rem;
-    margin-bottom: 1rem;
 }
 .search-box {
     padding: 1rem;
     background: #2c2d30;
-    border-radius: 8px;
     margin-bottom: 1rem;
 }
 .search-box input[type="text"] {
     background: #3a3b3e !important;
     border: 1px solid #4a4b4e !important;
     color: white !important;
     border-radius: 8px !important;
 }
 .search-box input[type="text"]::placeholder {
     color: #a8a9ab !important;
@@ -340,23 +419,43 @@ css = """
 .search-box button {
     background: #2563eb !important;
     border: none !important;
 }
 .results-container {
     background: #2c2d30;
-    border-radius: 8px;
-    padding: 1rem;
-    margin-top: 1rem;
 }
 .answer-box {
     background: #3a3b3e;
-    border-radius: 8px;
     padding: 1.5rem;
     color: white;
-    margin-bottom: 1rem;
 }
 .answer-box p {
     color: #e5e7eb;
-    line-height: 1.6;
 }
 .sources-container {
     margin-top: 1rem;
@@ -367,13 +466,16 @@ css = """
 .source-item {
     display: flex;
     padding: 12px;
-    margin: 8px 0;
     background: #3a3b3e;
     border-radius: 8px;
     transition: all 0.2s;
 }
 .source-item:hover {
     background: #4a4b4e;
 }
 .source-number {
     font-weight: bold;
@@ -388,7 +490,12 @@ css = """
     font-weight: 500;
     text-decoration: none;
     display: block;
-    margin-bottom: 4px;
 }
 .source-date {
     color: #a8a9ab;
@@ -398,7 +505,7 @@ css = """
 .source-snippet {
     color: #e5e7eb;
     font-size: 0.9em;
-    line-height: 1.4;
 }
 .chat-history {
     max-height: 400px;
@@ -407,6 +514,18 @@ css = """
     background: #2c2d30;
     border-radius: 8px;
     margin-top: 1rem;
 }
 .examples-container {
     background: #2c2d30;
@@ -418,20 +537,73 @@ css = """
     background: #3a3b3e !important;
     border: 1px solid #4a4b4e !important;
     color: #e5e7eb !important;
 }
 .markdown-content {
     color: #e5e7eb !important;
 }
 .markdown-content h1, .markdown-content h2, .markdown-content h3 {
     color: white !important;
 }
 .markdown-content a {
     color: #60a5fa !important;
 }
 .accordion {
     background: #2c2d30 !important;
     border-radius: 8px !important;
     margin-top: 1rem !important;
 }
 .voice-selector {
     margin-top: 1rem;
@@ -443,10 +615,54 @@ css = """
     background: #3a3b3e !important;
     color: white !important;
     border: 1px solid #4a4b4e !important;
 }
 """
-# Update the Gradio interface layout
 with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
     chat_history = gr.State([])
@@ -462,13 +678,14 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
                 scale=5,
                 container=False
             )
-            search_btn = gr.Button("Search", variant="primary", scale=1)
             voice_select = gr.Dropdown(
-                choices=list(VOICE_CHOICES.items()),
-                value='af',
-                label="Select Voice",
-                elem_classes="voice-selector"
             )
         with gr.Row(elem_classes="results-container"):
             with gr.Column(scale=2):
@@ -486,28 +703,33 @@ with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
         with gr.Row(elem_classes="examples-container"):
             gr.Examples(
                 examples=[
-                    "musk explores blockchain for doge",
-                    "nvidia to launch new gaming card",
                     "What are the best practices for sustainable living?",
-                    "tesla mistaken for asteroid"
                 ],
                 inputs=search_input,
                 label="Try these examples"
             )
     # Handle interactions
     search_btn.click(
         fn=process_query,
-        inputs=[search_input, chat_history, voice_select],
         outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
     # Also trigger search on Enter key
     search_input.submit(
         fn=process_query,
-        inputs=[search_input, chat_history, voice_select],
         outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import subprocess
 import numpy as np
 from typing import List, Dict, Tuple, Any
+from functools import lru_cache
+import asyncio
+import threading
+from concurrent.futures import ThreadPoolExecutor
+# --- Configuration ---
+MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
+MAX_SEARCH_RESULTS = 5
+TTS_SAMPLE_RATE = 24000
+MAX_TTS_CHARS = 1000
+GPU_DURATION = 30  # for spaces.GPU decorator
+MAX_NEW_TOKENS = 256
+TEMPERATURE = 0.7
+TOP_P = 0.95
+# --- Initialization ---
+# Initialize model and tokenizer with better error handling
 try:
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    tokenizer.pad_token = tokenizer.eos_token
+    print("Loading model...")
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_NAME,
+        device_map="auto",
+        offload_folder="offload",
+        low_cpu_mem_usage=True,
+        torch_dtype=torch.float16
+    )
+    print("Model and tokenizer loaded successfully")
 except Exception as e:
+    print(f"Error initializing model: {str(e)}")
+    raise
+# --- TTS Setup ---
 VOICE_CHOICES = {
     '🇺🇸 Female (Default)': 'af',
     '🇺🇸 Bella': 'af_bella',
 }
 TTS_ENABLED = False
 TTS_MODEL = None
+VOICEPACKS = {}  # Cache voice packs
+# Initialize Kokoro TTS in a separate thread to avoid blocking startup
+def setup_tts():
+    global TTS_ENABLED, TTS_MODEL, VOICEPACKS
+    try:
+        # Install dependencies first
+        subprocess.run(['git', 'lfs', 'install'], check=True)
+        if not os.path.exists('Kokoro-82M'):
+            subprocess.run(['git', 'clone', 'https://huggingface.co/hexgrad/Kokoro-82M'], check=True)
+        # Install espeak
         try:
+            subprocess.run(['apt-get', 'update'], check=True)
+            subprocess.run(['apt-get', 'install', '-y', 'espeak'], check=True)
+        except subprocess.CalledProcessError:
+            try:
+                subprocess.run(['apt-get', 'install', '-y', 'espeak-ng'], check=True)
+            except subprocess.CalledProcessError:
+                print("Warning: Could not install espeak or espeak-ng. TTS functionality may be limited.")
+        # Set up Kokoro TTS
+        if os.path.exists('Kokoro-82M'):
+            import sys
+            sys.path.append('Kokoro-82M')
+            from models import build_model
+            from kokoro import generate
+            # Make these functions accessible globally
+            globals()['build_model'] = build_model
+            globals()['generate_tts'] = generate
+            device = 'cuda' if torch.cuda.is_available() else 'cpu'
+            TTS_MODEL = build_model('Kokoro-82M/kokoro-v0_19.pth', device)
+            # Preload default voice
+            default_voice = 'af'
+            VOICEPACKS[default_voice] = torch.load(f'Kokoro-82M/voices/{default_voice}.pt',
+                                                   map_location=device,
+                                                   weights_only=True)
+            # Preload other common voices to reduce latency
+            for voice_name in ['af_bella', 'af_sarah', 'af_nicole']:
+                try:
+                    voice_path = f'Kokoro-82M/voices/{voice_name}.pt'
+                    if os.path.exists(voice_path):
+                        VOICEPACKS[voice_name] = torch.load(voice_path,
+                                                           map_location=device,
+                                                           weights_only=True)
+                except Exception as e:
+                    print(f"Warning: Could not preload voice {voice_name}: {str(e)}")
+            TTS_ENABLED = True
+            print("TTS setup completed successfully")
+        else:
+            print("Warning: Kokoro-82M directory not found. TTS disabled.")
+    except Exception as e:
+        print(f"Warning: Could not initialize Kokoro TTS: {str(e)}")
+        TTS_ENABLED = False
+# Start TTS setup in a separate thread
+threading.Thread(target=setup_tts, daemon=True).start()
+# --- Search and Generation Functions ---
+@lru_cache(maxsize=128)
+def get_web_results(query: str, max_results: int = MAX_SEARCH_RESULTS) -> List[Dict[str, str]]:
+    """Get web search results using DuckDuckGo with caching for improved performance"""
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=max_results))
             return [{
                 "title": result.get("title", ""),
+                "snippet": result.get("body", ""),
+                "url": result.get("href", ""),
                 "date": result.get("published", "")
             } for result in results]
     except Exception as e:
     sources_html = "<div class='sources-container'>"
     for i, res in enumerate(web_results, 1):
         title = res["title"] or "Source"
+        date = f"<span class='source-date'>{res['date']}</span>" if res.get('date') else ""
+        snippet = res.get("snippet", "")[:150] + "..." if res.get("snippet") else ""
         sources_html += f"""
         <div class='source-item'>
             <div class='source-number'>[{i}]</div>
             <div class='source-content'>
                 <a href="{res['url']}" target="_blank" class='source-title'>{title}</a>
                 {date}
+                <div class='source-snippet'>{snippet}</div>
             </div>
         </div>
         """
     sources_html += "</div>"
     return sources_html
+@spaces.GPU(duration=GPU_DURATION)
 def generate_answer(prompt: str) -> str:
+    """Generate answer using the DeepSeek model with optimized settings"""
     inputs = tokenizer(
         prompt,
         return_tensors="pt",
         return_attention_mask=True
     ).to(model.device)
+    with torch.no_grad():  # Disable gradient calculation for inference
+        outputs = model.generate(
+            inputs.input_ids,
+            attention_mask=inputs.attention_mask,
+            max_new_tokens=MAX_NEW_TOKENS,
+            temperature=TEMPERATURE,
+            top_p=TOP_P,
+            pad_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            early_stopping=True
+        )
     return tokenizer.decode(outputs[0], skip_special_tokens=True)
+@spaces.GPU(duration=GPU_DURATION)
+def generate_speech(text: str, voice_name: str = 'af') -> Tuple[int, np.ndarray] | None:
+    """Generate speech from text using Kokoro TTS model with improved error handling and caching."""
+    global VOICEPACKS, TTS_MODEL, TTS_ENABLED
+    if not TTS_ENABLED or TTS_MODEL is None:
         return None
     try:
+        from kokoro import generate as generate_tts
         device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        # Load voicepack if needed
+        if voice_name not in VOICEPACKS:
+            voice_file = f'Kokoro-82M/voices/{voice_name}.pt'
+            if not os.path.exists(voice_file):
+                print(f"Voicepack {voice_name}.pt not found. Falling back to default 'af'.")
+                voice_name = 'af'
+                # Check if default is already loaded
+                if voice_name not in VOICEPACKS:
+                    voice_file = f'Kokoro-82M/voices/{voice_name}.pt'
+                    if os.path.exists(voice_file):
+                        VOICEPACKS[voice_name] = torch.load(voice_file, map_location=device, weights_only=True)
+                    else:
+                        print("Default voicepack 'af.pt' not found. Cannot generate audio.")
+                        return None
             else:
+                VOICEPACKS[voice_name] = torch.load(voice_file, map_location=device, weights_only=True)
         # Clean the text
         clean_text = ' '.join([line for line in text.split('\n') if not line.startswith('#')])
         clean_text = clean_text.replace('[', '').replace(']', '').replace('*', '')
         # Split long text into chunks
+        max_chars = MAX_TTS_CHARS
         chunks = []
         if len(clean_text) > max_chars:
             sentences = clean_text.split('.')
         audio_chunks = []
         for chunk in chunks:
             if chunk.strip():
+                chunk_audio, _ = generate_tts(TTS_MODEL, chunk, VOICEPACKS[voice_name], lang='a')
                 if isinstance(chunk_audio, torch.Tensor):
                     chunk_audio = chunk_audio.cpu().numpy()
                 audio_chunks.append(chunk_audio)
         # Concatenate chunks
         if audio_chunks:
             final_audio = np.concatenate(audio_chunks) if len(audio_chunks) > 1 else audio_chunks[0]
+            return (TTS_SAMPLE_RATE, final_audio)
+        return None
     except Exception as e:
         print(f"Error generating speech: {str(e)}")
         return None
+# --- Asynchronous Processing ---
+async def async_web_search(query: str) -> List[Dict[str, str]]:
+    """Run web search in a non-blocking way"""
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, get_web_results, query)
+async def async_answer_generation(prompt: str) -> str:
+    """Run answer generation in a non-blocking way"""
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, generate_answer, prompt)
+async def async_speech_generation(text: str, voice_name: str) -> Tuple[int, np.ndarray] | None:
+    """Run speech generation in a non-blocking way"""
+    loop = asyncio.get_event_loop()
+    return await loop.run_in_executor(None, generate_speech, text, voice_name)
 def process_query(query: str, history: List[List[str]], selected_voice: str = 'af'):
+    """Process user query with streaming effect and non-blocking operations"""
     try:
         if history is None:
             history = []
+        # Start the search task
         current_history = history + [[query, "*Searching...*"]]
         # Yield initial searching state
         yield (
             "*Searching & Thinking...*",  # answer_output (Markdown)
+            "<div class='searching'>Searching for results...</div>",  # sources_output (HTML)
             "Searching...",               # search_btn (Button)
             current_history,              # chat_history_display (Chatbot)
             None                          # audio_output (Audio)
         )
+        # Get web results
+        web_results = get_web_results(query)
+        sources_html = format_sources(web_results)
+        # Update with the search results obtained
+        yield (
+            "*Analyzing search results...*",  # answer_output
+            sources_html,                     # sources_output
+            "Generating answer...",           # search_btn
+            current_history,                  # chat_history_display
+            None                              # audio_output
+        )
         # Generate answer
         prompt = format_prompt(query, web_results)
         answer = generate_answer(prompt)
         # Update history before TTS
         updated_history = history + [[query, final_answer]]
+        # Update with the answer before generating speech
+        yield (
+            final_answer,              # answer_output
+            sources_html,              # sources_output
+            "Generating audio...",     # search_btn
+            updated_history,           # chat_history_display
+            None                       # audio_output
+        )
+        # Generate speech (but don't block if TTS is still initializing)
+        audio = None
+        if TTS_ENABLED and TTS_MODEL is not None:
             try:
+                audio = generate_speech(final_answer, selected_voice)
                 if audio is None:
                     final_answer += "\n\n*Audio generation failed. The voicepack may be missing or incompatible.*"
             except Exception as e:
                 final_answer += f"\n\n*Error generating audio: {str(e)}*"
         else:
+            final_answer += "\n\n*TTS is still initializing or is disabled. Try again in a moment.*"
         # Yield final result
         yield (
             sources_html,              # sources_output
             "Search",                  # search_btn
             updated_history,           # chat_history_display
+            audio                      # audio_output
         )
     except Exception as e:
             error_message = "⚠️ GPU quota exceeded. Please try again later when the daily quota resets."
         yield (
             f"Error: {error_message}",  # answer_output
+            "<div class='error'>An error occurred during search</div>",  # sources_output
             "Search",                   # search_btn
             history + [[query, f"*Error: {error_message}*"]],  # chat_history_display
             None                        # audio_output
         )
+# --- Improved UI ---
 css = """
 .gradio-container {
     max-width: 1200px !important;
     text-align: center;
     margin-bottom: 2rem;
     padding: 2rem 0;
+    background: linear-gradient(135deg, #1a1b1e, #2d2e32);
     border-radius: 12px;
     color: white;
+    box-shadow: 0 8px 32px rgba(0,0,0,0.2);
 }
 #header h1 {
     color: white;
     font-size: 2.5rem;
     margin-bottom: 0.5rem;
+    text-shadow: 0 2px 4px rgba(0,0,0,0.3);
 }
 #header h3 {
     color: #a8a9ab;
 }
 .search-container {
+    background: linear-gradient(135deg, #1a1b1e, #2d2e32);
     border-radius: 12px;
+    box-shadow: 0 4px 16px rgba(0,0,0,0.15);
+    padding: 1.5rem;
+    margin-bottom: 1.5rem;
 }
 .search-box {
     padding: 1rem;
     background: #2c2d30;
+    border-radius: 10px;
     margin-bottom: 1rem;
+    box-shadow: inset 0 2px 4px rgba(0,0,0,0.1);
 }
 .search-box input[type="text"] {
     background: #3a3b3e !important;
     border: 1px solid #4a4b4e !important;
     color: white !important;
     border-radius: 8px !important;
+    transition: all 0.3s ease;
+}
+.search-box input[type="text"]:focus {
+    border-color: #60a5fa !important;
+    box-shadow: 0 0 0 2px rgba(96, 165, 250, 0.3) !important;
 }
 .search-box input[type="text"]::placeholder {
     color: #a8a9ab !important;
 .search-box button {
     background: #2563eb !important;
     border: none !important;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.2) !important;
+    transition: all 0.3s ease !important;
+}
+.search-box button:hover {
+    background: #1d4ed8 !important;
+    transform: translateY(-1px) !important;
+}
+.search-box button:active {
+    transform: translateY(1px) !important;
 }
 .results-container {
     background: #2c2d30;
+    border-radius: 10px;
+    padding: 1.5rem;
+    margin-top: 1.5rem;
+    box-shadow: 0 4px 12px rgba(0,0,0,0.1);
 }
 .answer-box {
     background: #3a3b3e;
+    border-radius: 10px;
     padding: 1.5rem;
     color: white;
+    margin-bottom: 1.5rem;
+    box-shadow: 0 2px 8px rgba(0,0,0,0.15);
+    transition: all 0.3s ease;
+}
+.answer-box:hover {
+    box-shadow: 0 4px 16px rgba(0,0,0,0.2);
 }
 .answer-box p {
     color: #e5e7eb;
+    line-height: 1.7;
+}
+.answer-box code {
+    background: #2c2d30;
+    border-radius: 4px;
+    padding: 2px 4px;
 }
 .sources-container {
     margin-top: 1rem;
 .source-item {
     display: flex;
     padding: 12px;
+    margin: 12px 0;
     background: #3a3b3e;
     border-radius: 8px;
     transition: all 0.2s;
+    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
 }
 .source-item:hover {
     background: #4a4b4e;
+    transform: translateY(-2px);
+    box-shadow: 0 4px 8px rgba(0,0,0,0.15);
 }
 .source-number {
     font-weight: bold;
     font-weight: 500;
     text-decoration: none;
     display: block;
+    margin-bottom: 6px;
+    transition: all 0.2s;
+}
+.source-title:hover {
+    color: #93c5fd;
+    text-decoration: underline;
 }
 .source-date {
     color: #a8a9ab;
 .source-snippet {
     color: #e5e7eb;
     font-size: 0.9em;
+    line-height: 1.5;
 }
 .chat-history {
     max-height: 400px;
     background: #2c2d30;
     border-radius: 8px;
     margin-top: 1rem;
+    scrollbar-width: thin;
+    scrollbar-color: #4a4b4e #2c2d30;
+}
+.chat-history::-webkit-scrollbar {
+    width: 8px;
+}
+.chat-history::-webkit-scrollbar-track {
+    background: #2c2d30;
+}
+.chat-history::-webkit-scrollbar-thumb {
+    background-color: #4a4b4e;
+    border-radius: 20px;
 }
 .examples-container {
     background: #2c2d30;
     background: #3a3b3e !important;
     border: 1px solid #4a4b4e !important;
     color: #e5e7eb !important;
+    transition: all 0.2s;
+    margin: 4px !important;
+}
+.examples-container button:hover {
+    background: #4a4b4e !important;
+    transform: translateY(-1px);
 }
 .markdown-content {
     color: #e5e7eb !important;
 }
 .markdown-content h1, .markdown-content h2, .markdown-content h3 {
     color: white !important;
+    margin-top: 1.2em !important;
+    margin-bottom: 0.8em !important;
+}
+.markdown-content h1 {
+    font-size: 1.7em !important;
+}
+.markdown-content h2 {
+    font-size: 1.5em !important;
+}
+.markdown-content h3 {
+    font-size: 1.3em !important;
 }
 .markdown-content a {
     color: #60a5fa !important;
+    text-decoration: none !important;
+    transition: all 0.2s;
+}
+.markdown-content a:hover {
+    color: #93c5fd !important;
+    text-decoration: underline !important;
+}
+.markdown-content code {
+    background: #2c2d30 !important;
+    padding: 2px 6px !important;
+    border-radius: 4px !important;
+    font-family: monospace !important;
+}
+.markdown-content pre {
+    background: #2c2d30 !important;
+    padding: 12px !important;
+    border-radius: 8px !important;
+    overflow-x: auto !important;
+}
+.markdown-content blockquote {
+    border-left: 4px solid #60a5fa !important;
+    padding-left: 1em !important;
+    margin-left: 0 !important;
+    color: #a8a9ab !important;
+}
+.markdown-content table {
+    border-collapse: collapse !important;
+    width: 100% !important;
+}
+.markdown-content th, .markdown-content td {
+    padding: 8px 12px !important;
+    border: 1px solid #4a4b4e !important;
+}
+.markdown-content th {
+    background: #2c2d30 !important;
 }
 .accordion {
     background: #2c2d30 !important;
     border-radius: 8px !important;
     margin-top: 1rem !important;
+    box-shadow: 0 2px 8px rgba(0,0,0,0.1) !important;
 }
 .voice-selector {
     margin-top: 1rem;
     background: #3a3b3e !important;
     color: white !important;
     border: 1px solid #4a4b4e !important;
+    border-radius: 4px !important;
+    padding: 8px !important;
+    transition: all 0.2s;
+}
+.voice-selector select:focus {
+    border-color: #60a5fa !important;
+}
+.audio-player {
+    margin-top: 1rem;
+    background: #2c2d30 !important;
+    border-radius: 8px !important;
+    padding: 0.5rem !important;
+}
+.audio-player audio {
+    width: 100% !important;
+}
+.searching, .error {
+    padding: 1rem;
+    border-radius: 8px;
+    text-align: center;
+    margin: 1rem 0;
+}
+.searching {
+    background: rgba(96, 165, 250, 0.1);
+    color: #60a5fa;
+}
+.error {
+    background: rgba(239, 68, 68, 0.1);
+    color: #ef4444;
+}
+.no-sources {
+    padding: 1rem;
+    text-align: center;
+    color: #a8a9ab;
+    background: #2c2d30;
+    border-radius: 8px;
+}
+@keyframes pulse {
+    0% { opacity: 0.6; }
+    50% { opacity: 1; }
+    100% { opacity: 0.6; }
+}
+.searching {
+    animation: pulse 1.5s infinite;
 }
 """
+# --- Gradio Interface ---
 with gr.Blocks(title="AI Search Assistant", css=css, theme="dark") as demo:
     chat_history = gr.State([])
                 scale=5,
                 container=False
             )
             voice_select = gr.Dropdown(
+                choices=list(VOICE_CHOICES.keys()),
+                value=list(VOICE_CHOICES.keys())[0],
+                label="Voice",
+                elem_classes="voice-selector",
+                scale=1
             )
+            search_btn = gr.Button("Search", variant="primary", scale=1)
         with gr.Row(elem_classes="results-container"):
             with gr.Column(scale=2):
         with gr.Row(elem_classes="examples-container"):
             gr.Examples(
                 examples=[
+                    "Latest news about artificial intelligence advances",
+                    "How does blockchain technology work?",
                     "What are the best practices for sustainable living?",
+                    "Compare electric vehicles and traditional cars"
                 ],
                 inputs=search_input,
                 label="Try these examples"
             )
+    # Handle voice selection mapping
+    def get_voice_id(voice_name):
+        return VOICE_CHOICES.get(voice_name, 'af')
     # Handle interactions
     search_btn.click(
         fn=process_query,
+        inputs=[search_input, chat_history, lambda x: get_voice_id(x), voice_select],
         outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
     # Also trigger search on Enter key
     search_input.submit(
         fn=process_query,
+        inputs=[search_input, chat_history, lambda x: get_voice_id(x), voice_select],
         outputs=[answer_output, sources_output, search_btn, chat_history_display, audio_output]
     )
 if __name__ == "__main__":
+    # Start the app with optimized settings
+    demo.queue(concurrency_count=5, max_size=20).launch(share=True)