Spaces:

cstr
/

PDF-Summarizer

Running

App Files Files Community

cstr commited on Dec 7, 2024

Commit

19a6585

verified ·

1 Parent(s): e61f887

Update app.py

Browse files

Files changed (1) hide show

app.py +264 -176

app.py CHANGED Viewed

@@ -108,11 +108,10 @@ MODEL_CONTEXT_SIZES = {
     "GLHF API": {
         "mistralai/Mistral-7B-Instruct-v0.3": 32768,
         "microsoft/phi-3-mini-4k-instruct": 4096,
         "microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
         "HuggingFaceH4/zephyr-7b-beta": 8192,
-        "mistralai/Mistral-7B-Instruct-v0.3": 32768,
         "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
-        "microsoft/Phi-3.5-mini-instruct": 4096,
         "google/gemma-2-2b-it": 2048,
         "microsoft/phi-2": 2048,
         # Add other model contexts here
@@ -225,6 +224,12 @@ class ModelRegistry:
        """Refresh the list of available Groq models"""
        self.groq_models = self._fetch_groq_models()
        return self.groq_models
 class PDFProcessor:
     """Handles PDF conversion to text and markdown using different methods"""
@@ -382,7 +387,7 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
                  groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
                  cohere_api_key=None, cohere_model=None, glhf_api_key=None, glhf_model=None,
                  glhf_custom_model=None):
-    """Wrapper function for send_to_model_impl with comprehensive error handling."""
     logging.info("send to model starting...")
@@ -393,29 +398,54 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
         logging.info("sending to model preparation.")
         # Basic input validation
-        valid_selections = ["Clipboard only", "HuggingFace Inference", "Groq API", "OpenAI ChatGPT", "Cohere API", "GLHF API"]
         if model_selection not in valid_selections:
             return "Error: Invalid model selection", None
-        # Model-specific validation
         if model_selection == "Groq API" and not groq_api_key:
-            return "Error: Groq API key required", None
         elif model_selection == "OpenAI ChatGPT" and not openai_api_key:
-            return "Error: OpenAI API key required", None
-        elif model_selection == "GLHF API":
-            if not glhf_api_key:
-                return "Error: GLHF API key required", None
-            if glhf_model == "Use HuggingFace Model":
-                model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
             else:
-                if not glhf_custom_model.strip():
-                    return "Error: Custom model ID required", None
-                model_id = glhf_custom_model.strip()
-            summary = send_to_glhf(prompt, glhf_model == "Use HuggingFace Model", model_id, glhf_custom_model, glhf_api_key)
-        # Call implementation with error handling
         try:
             logging.info("calling send_to_model_impl.")
             summary, download_file = send_to_model_impl(
                 prompt=prompt.strip(),
                 model_selection=model_selection,
@@ -426,13 +456,13 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
                 groq_api_key=groq_api_key,
                 openai_api_key=openai_api_key,
                 openai_model_choice=openai_model_choice,
-                cohere_api_key=cohere_api_key,
                 cohere_model=cohere_model,
                 glhf_api_key=glhf_api_key,
                 glhf_model=glhf_model,
-                glhf_custom_model=glhf_custom_model
             )
-            logging.info("summary received:", summary)
             if summary is None or not isinstance(summary, str):
                 return "Error: No response from model", None
@@ -452,10 +482,13 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
             error_msg = "Unknown error occurred"
         logging.error(f"Error in send_to_model: {error_msg}")
         return f"Error: {error_msg}", None
 def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model, hf_api_key,
                       groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
-                      cohere_api_key=None, cohere_model=None, glhf_api_key=None):
     """Implementation of model sending with all providers."""
     logging.info("send to model impl commencing...")
@@ -463,33 +496,50 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
         if model_selection == "Clipboard only":
             return "Text copied to clipboard. Use paste for processing.", None
         if model_selection == "HuggingFace Inference":
             model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
             summary = send_to_hf_inference(prompt, model_id)
-            if summary.startswith("Error") and hf_api_key:
-                summary = send_to_hf_inference(prompt, model_id, hf_api_key)
         elif model_selection == "Groq API":
-            summary = send_to_groq(prompt, groq_model_choice, groq_api_key)
         elif model_selection == "OpenAI ChatGPT":
-            summary = send_to_openai(prompt, openai_api_key, model=openai_model_choice)
         elif model_selection == "Cohere API":
-            summary = send_to_cohere(prompt, cohere_api_key, cohere_model)
         elif model_selection == "GLHF API":
             if not glhf_api_key:
                 return "Error: GLHF API key required", None
-            model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
-            summary = send_to_glhf(prompt, model_id, glhf_api_key)
         else:
             return "Error: Invalid model selection", None
         # Validate response
-        if not summary or not isinstance(summary, str):
-            return "Error: Invalid response from model", None
         # Create download file for valid responses
         if not summary.startswith("Error"):
@@ -506,34 +556,16 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
         logging.error(f"Error in send_to_model_impl: {error_msg}")
         return f"Error: {error_msg}", None
-def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None) -> str:
-    """Send prompt to HuggingFace Inference API with optional authentication."""
-    try:
         # Check token limits first
         is_within_limits, error_msg = check_token_limits(prompt, model_name)
         if not is_within_limits:
             return error_msg
-        client = InferenceClient(token=api_key) if api_key else InferenceClient()
-        response = client.text_generation(
-            prompt,
-            model=model_name,
-            max_new_tokens=500,
-            temperature=0.7,
-            top_p=0.95,
-            repetition_penalty=1.1
-        )
-        return str(response)
-    except Exception as e:
-        logging.error(f"HuggingFace inference error: {e}")
-        return f"Error with HuggingFace inference: {str(e)}"  # Return error message instead of raising
-def send_to_hf_inference_old(prompt: str, model_name: str, api_key: str = None) -> str:
-    """Send prompt to HuggingFace Inference API with optional authentication."""
-    try:
-        # First try without authentication
         try:
-            client = InferenceClient()  # No token
             response = client.text_generation(
                 prompt,
                 model=model_name,
@@ -543,142 +575,168 @@ def send_to_hf_inference_old(prompt: str, model_name: str, api_key: str = None)
                 repetition_penalty=1.1
             )
             return str(response)
-        except Exception as public_error:
-            logging.info(f"Public inference failed: {public_error}")
-            # If that fails and we have an API key, try with authentication
-            if api_key:
-                client = InferenceClient(token=api_key)
-                response = client.text_generation(
-                    prompt,
-                    model=model_name,
-                    max_new_tokens=500,
-                    temperature=0.7,
-                    top_p=0.95,
-                    repetition_penalty=1.1
                 )
-                return str(response)
-            else:
-                # If we don't have an API key, inform the user they need one
-                return "Error: This model requires authentication. Please enter your HuggingFace API key."
-    except Exception as e:
-        logging.error(f"HuggingFace inference error: {e}")
-        return f"Error with HuggingFace inference: {str(e)}"
-def send_to_groq(prompt: str, model_name: str, api_key: str) -> str:
-    """Send prompt to Groq API with better error handling."""
-    try:
-        client = Groq(api_key=api_key)
-        response = client.chat.completions.create(
-            model=model_name,
-            messages=[{
-                "role": "user",
-                "content": prompt
-            }],
-            temperature=0.7,
-            max_tokens=500,
-            top_p=0.95
-        )
-        return response.choices[0].message.content
-    except Exception as e:
-        logging.error(f"Groq API error: {e}")
-        raise  # Re-raise to be handled by caller
-def send_to_openai(prompt: str, api_key: str, model: str = "gpt-3.5-turbo") -> str:
     """Send prompt to OpenAI API."""
-    try:
-        from openai import OpenAI
-        client = OpenAI(api_key=api_key)
-        response = client.chat.completions.create(
-            model=model,
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant that provides detailed responses."},
-                {"role": "user", "content": prompt}
-            ],
-            temperature=0.7,
-            max_tokens=500,
-            top_p=0.95
-        )
-        if response.choices and len(response.choices) > 0:
-            return response.choices[0].message.content
-        else:
-            raise Exception("No response generated")
-    except ImportError:
-        raise Exception("Please install the latest version of openai package (pip install --upgrade openai)")
-    except Exception as e:
-        logging.error(f"OpenAI API error: {e}")
-        raise  # Re-raise to be handled by caller
-@RateLimit(calls_per_min=16)  # 80% of 20 calls/min
-def send_to_cohere(prompt: str, api_key: str = None) -> str:
-    """Send prompt to Cohere API with V2 and V1 fallback."""
-    try:
-        # Try V2 first
         try:
-            import cohere
-            client = cohere.ClientV2(api_key) if api_key else cohere.ClientV2()
-            response = client.chat(
-                model="command-r-plus-08-2024",  # Using latest model
-                messages=[{
-                    "role": "user",
-                    "content": prompt
-                }],
-                temperature=0.7,
-            )
-            return response.message.content[0].text
-        except Exception as v2_error:
-            logging.warning(f"Cohere V2 failed, trying V1: {v2_error}")
-            # Fallback to V1
-            client = cohere.Client(api_key) if api_key else cohere.Client()
-            response = client.chat(
-                message=prompt,
                 temperature=0.7,
                 max_tokens=500,
             )
-            return response.text
-    except Exception as e:
-        logging.error(f"Cohere API error: {e}")
-        return f"Error with Cohere API: {str(e)}"
-@RateLimit(calls_per_min=384)  # 80% of 480/8hours = 60/hour = 1/min
-def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str, api_key: str) -> str:
     """Send prompt to GLHF API with model selection."""
-    try:
-        import openai
-        client = openai.OpenAI(
-            api_key=api_key,
-            base_url="https://glhf.chat/api/openai/v1",
-        )
-        # Select model based on user choice
-        model_id = f"hf:{model_name if use_hf_model else custom_model}"
-        # Always use streaming for reliability
-        completion = client.chat.completions.create(
-            stream=True,
-            model=model_id,
-            messages=[
-                {"role": "system", "content": "You are a helpful assistant."},
-                {"role": "user", "content": prompt}
-            ],
-        )
-        response_text = []
-        for chunk in completion:
-            if chunk.choices[0].delta.content is not None:
-                response_text.append(chunk.choices[0].delta.content)
-        return "".join(response_text)
-    except Exception as e:
-        logging.error(f"GLHF API error: {e}")
-        return f"Error with GLHF API: {str(e)}"
 def estimate_tokens(text: str) -> int:
     """Rough token estimation: ~4 characters per token on average"""
@@ -1057,13 +1115,14 @@ with gr.Blocks(css="""
             first_model = list(ctx_size.keys())[0]
             ctx_size = ctx_size[first_model]
-            # Prepare dropdown choices based on provider
             if choice == "OpenAI ChatGPT":
                 model_choices = list(MODEL_CONTEXT_SIZES["OpenAI ChatGPT"].keys())
                 return [
                     gr.update(visible=False),  # hf_options
                     gr.update(visible=False),  # groq_options
                     gr.update(visible=True),   # openai_options
                     gr.update(value=ctx_size), # context_size
                     gr.Dropdown(choices=model_choices, value=first_model)  # openai_model
                 ]
@@ -1073,8 +1132,10 @@ with gr.Blocks(css="""
                     gr.update(visible=True),   # hf_options
                     gr.update(visible=False),  # groq_options
                     gr.update(visible=False),  # openai_options
                     gr.update(value=ctx_size), # context_size
-                    gr.Dropdown(choices=model_choices, value="Phi-3 Mini 4K")  # openai_model (not used)
                 ]
             elif choice == "Groq API":
                 model_choices = list(model_registry.groq_models.keys())
@@ -1082,8 +1143,30 @@ with gr.Blocks(css="""
                     gr.update(visible=False),  # hf_options
                     gr.update(visible=True),   # groq_options
                     gr.update(visible=False),  # openai_options
                     gr.update(value=ctx_size), # context_size
-                    gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)  # openai_model (not used)
                 ]
         # Default return for "Clipboard only" or other options
@@ -1091,8 +1174,10 @@ with gr.Blocks(css="""
             gr.update(visible=False),  # hf_options
             gr.update(visible=False),  # groq_options
             gr.update(visible=False),  # openai_options
             gr.update(value=4096),    # context_size
-            gr.Dropdown(choices=[])    # openai_model (not used)
         ]
     # PDF Processing Handlers
@@ -1204,6 +1289,7 @@ with gr.Blocks(css="""
         outputs=[progress_status, generated_prompt, download_snippet] # Connect download_snippet
     )
     # Model selection
     model_choice.change(
         handle_model_selection,
@@ -1212,6 +1298,8 @@ with gr.Blocks(css="""
             hf_options,
             groq_options,
             openai_options,
             context_size,
             openai_model
         ]

     "GLHF API": {
         "mistralai/Mistral-7B-Instruct-v0.3": 32768,
         "microsoft/phi-3-mini-4k-instruct": 4096,
+        "microsoft/Phi-3.5-mini-instruct": 4096,
         "microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
         "HuggingFaceH4/zephyr-7b-beta": 8192,
         "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
         "google/gemma-2-2b-it": 2048,
         "microsoft/phi-2": 2048,
         # Add other model contexts here
        """Refresh the list of available Groq models"""
        self.groq_models = self._fetch_groq_models()
        return self.groq_models
+def apply_rate_limit(func, calls_per_min, *args, **kwargs):
+    """Apply rate limiting only when needed."""
+    rate_decorator = RateLimit(calls_per_min)
+    wrapped_func = rate_decorator(func)
+    return wrapped_func(*args, **kwargs)
 class PDFProcessor:
     """Handles PDF conversion to text and markdown using different methods"""
                  groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
                  cohere_api_key=None, cohere_model=None, glhf_api_key=None, glhf_model=None,
                  glhf_custom_model=None):
+    """Primary wrapper for model interactions with error handling."""
     logging.info("send to model starting...")
         logging.info("sending to model preparation.")
         # Basic input validation
+        valid_selections = ["Clipboard only", "HuggingFace Inference", "Groq API",
+                          "OpenAI ChatGPT", "Cohere API", "GLHF API"]
         if model_selection not in valid_selections:
             return "Error: Invalid model selection", None
+        # Check environment API keys
+        env_api_keys = {
+            "GROQ_API_KEY": os.getenv('GROQ_API_KEY'),
+            "OPENAI_API_KEY": os.getenv('OPENAI_API_KEY'),
+            "COHERE_API_KEY": os.getenv('COHERE_API_KEY'),
+            "GLHF_API_KEY": os.getenv('GLHF_API_KEY')
+        }
+        for key_name, key_value in env_api_keys.items():
+            if not key_value:
+                logging.warning(f"No {key_name} found in environment")
+        # Model-specific validation - check only required keys
         if model_selection == "Groq API" and not groq_api_key:
+            if env_api_keys["GROQ_API_KEY"]:
+                groq_api_key = env_api_keys["GROQ_API_KEY"]
+            else:
+                return "Error: Groq API key required", None
         elif model_selection == "OpenAI ChatGPT" and not openai_api_key:
+            if env_api_keys["OPENAI_API_KEY"]:
+                openai_api_key = env_api_keys["OPENAI_API_KEY"]
+            else:
+                return "Error: OpenAI API key required", None
+        elif model_selection == "GLHF API" and not glhf_api_key:
+            if env_api_keys["GLHF_API_KEY"]:
+                glhf_api_key = env_api_keys["GLHF_API_KEY"]
             else:
+                return "Error: GLHF API key required", None
+        # Try implementation
         try:
             logging.info("calling send_to_model_impl.")
+            # Use rate limits only with environment API keys
+            use_rate_limits = {
+                "Groq API": groq_api_key == env_api_keys["GROQ_API_KEY"],
+                "OpenAI ChatGPT": openai_api_key == env_api_keys["OPENAI_API_KEY"],
+                "Cohere API": cohere_api_key == env_api_keys["COHERE_API_KEY"],
+                "GLHF API": glhf_api_key == env_api_keys["GLHF_API_KEY"]
+            }.get(model_selection, False)
             summary, download_file = send_to_model_impl(
                 prompt=prompt.strip(),
                 model_selection=model_selection,
                 groq_api_key=groq_api_key,
                 openai_api_key=openai_api_key,
                 openai_model_choice=openai_model_choice,
+                cohere_api_key=cohere_api_key or env_api_keys["COHERE_API_KEY"],
                 cohere_model=cohere_model,
                 glhf_api_key=glhf_api_key,
                 glhf_model=glhf_model,
+                glhf_custom_model=glhf_custom_model,
+                use_rate_limits=use_rate_limits
             )
             if summary is None or not isinstance(summary, str):
                 return "Error: No response from model", None
             error_msg = "Unknown error occurred"
         logging.error(f"Error in send_to_model: {error_msg}")
         return f"Error: {error_msg}", None
+    finally:
+        logging.info("send to model completed.")
 def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model, hf_api_key,
                       groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
+                      cohere_api_key=None, cohere_model=None, glhf_api_key=None, glhf_model=None,
+                      glhf_custom_model=None, use_rate_limits=False):
     """Implementation of model sending with all providers."""
     logging.info("send to model impl commencing...")
         if model_selection == "Clipboard only":
             return "Text copied to clipboard. Use paste for processing.", None
+        # Get the summary based on model selection
         if model_selection == "HuggingFace Inference":
             model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
+            # Always try without API key first
             summary = send_to_hf_inference(prompt, model_id)
+            if summary.startswith("Error: This model requires authentication") and hf_api_key:
+                # Only try with API key if the model specifically requires it
+                summary = send_to_hf_inference(prompt, model_id, hf_api_key, use_rate_limits)
         elif model_selection == "Groq API":
+            if not groq_api_key:
+                return "Error: Groq API key required", None
+            summary = send_to_groq(prompt, groq_model_choice, groq_api_key, use_rate_limits)
         elif model_selection == "OpenAI ChatGPT":
+            if not openai_api_key:
+                return "Error: OpenAI API key required", None
+            summary = send_to_openai(prompt, openai_api_key, model=openai_model_choice,
+                                   use_rate_limit=use_rate_limits)
         elif model_selection == "Cohere API":
+            summary = send_to_cohere(prompt, cohere_api_key, cohere_model, use_rate_limits)
         elif model_selection == "GLHF API":
             if not glhf_api_key:
                 return "Error: GLHF API key required", None
+            summary = send_to_glhf(
+                prompt,
+                glhf_model == "Use HuggingFace Model",
+                hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice],
+                glhf_custom_model,
+                glhf_api_key,
+                use_rate_limits
+            )
         else:
             return "Error: Invalid model selection", None
         # Validate response
+        if not summary:
+            return "Error: No response from model", None
+        if not isinstance(summary, str):
+            return "Error: Invalid response type from model", None
         # Create download file for valid responses
         if not summary.startswith("Error"):
         logging.error(f"Error in send_to_model_impl: {error_msg}")
         return f"Error: {error_msg}", None
+def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None, use_rate_limit: bool = False) -> str:
+    """Send prompt to HuggingFace Inference API."""
+    def _send():
         # Check token limits first
         is_within_limits, error_msg = check_token_limits(prompt, model_name)
         if not is_within_limits:
             return error_msg
         try:
+            client = InferenceClient(token=api_key) if api_key else InferenceClient()
             response = client.text_generation(
                 prompt,
                 model=model_name,
                 repetition_penalty=1.1
             )
             return str(response)
+        except Exception as e:
+            logging.error(f"HuggingFace inference error: {e}")
+            return f"Error with HuggingFace inference: {str(e)}"
+    return apply_rate_limit(_send, 16) if use_rate_limit else _send()
+def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
+                api_key: str, use_rate_limit: bool = False) -> str:
+    """Send prompt to GLHF API with model selection and proper stream handling."""
+    def _send():
+        try:
+            import openai
+            client = openai.OpenAI(
+                api_key=api_key,
+                base_url="https://glhf.chat/api/openai/v1",
+            )
+            model_id = f"hf:{model_name if use_hf_model else custom_model}"
+            try:
+                # First try without streaming
+                completion = client.chat.completions.create(
+                    stream=False,
+                    model=model_id,
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant."},
+                        {"role": "user", "content": prompt}
+                    ],
                 )
+                return completion.choices[0].message.content
+            except Exception as non_stream_error:
+                logging.warning(f"Non-streaming GLHF failed, trying streaming: {non_stream_error}")
+                # Fallback to streaming if needed
+                completion = client.chat.completions.create(
+                    stream=True,
+                    model=model_id,
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant."},
+                        {"role": "user", "content": prompt}
+                    ],
+                )
+                response_text = []
+                try:
+                    for chunk in completion:
+                        if chunk.choices and chunk.choices[0].delta.content is not None:
+                            response_text.append(chunk.choices[0].delta.content)
+                except Exception as stream_error:
+                    if response_text:  # If we got partial response, return it
+                        logging.warning(f"Streaming interrupted but got partial response: {stream_error}")
+                        return "".join(response_text)
+                    raise  # Re-raise if we got nothing
+                return "".join(response_text)
+        except Exception as e:
+            logging.error(f"GLHF API error: {e}")
+            return f"Error with GLHF API: {str(e)}"
+    return apply_rate_limit(_send, 384) if use_rate_limit else _send()
+def send_to_openai(prompt: str, api_key: str, model: str = "gpt-3.5-turbo", use_rate_limit: bool = False) -> str:
     """Send prompt to OpenAI API."""
+    def _send():
         try:
+            from openai import OpenAI
+            client = OpenAI(api_key=api_key)
+            response = client.chat.completions.create(
+                model=model,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant that provides detailed responses."},
+                    {"role": "user", "content": prompt}
+                ],
                 temperature=0.7,
                 max_tokens=500,
+                top_p=0.95
             )
+            if response.choices and len(response.choices) > 0:
+                return response.choices[0].message.content
+            return "Error: No response generated"
+        except ImportError:
+            return "Error: Please install the latest version of openai package"
+        except Exception as e:
+            logging.error(f"OpenAI API error: {e}")
+            return f"Error with OpenAI API: {str(e)}"
+    return apply_rate_limit(_send, 3000/60) if use_rate_limit else _send()
+def send_to_cohere(prompt: str, api_key: str = None, model: str = None, use_rate_limit: bool = False) -> str:
+    """Send prompt to Cohere API with V2 and V1 fallback."""
+    def _send():
+        try:
+            import cohere
+            # Try V2 first
+            try:
+                client = cohere.ClientV2(api_key) if api_key else cohere.ClientV2()
+                response = client.chat(
+                    model=model or "command-r-plus-08-2024",
+                    messages=[{
+                        "role": "user",
+                        "content": prompt
+                    }],
+                    temperature=0.7,
+                )
+                return response.message.content[0].text
+            except Exception as v2_error:
+                logging.warning(f"Cohere V2 failed, trying V1: {v2_error}")
+                # Fallback to V1
+                client = cohere.Client(api_key) if api_key else cohere.Client()
+                response = client.chat(
+                    message=prompt,
+                    model=model or "command-r-plus-08-2024",
+                    temperature=0.7,
+                    max_tokens=500,
+                )
+                return response.text
+        except Exception as e:
+            logging.error(f"Cohere API error: {e}")
+            return f"Error with Cohere API: {str(e)}"
+    return apply_rate_limit(_send, 16) if use_rate_limit else _send()
+def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
+                api_key: str, use_rate_limit: bool = False) -> str:
     """Send prompt to GLHF API with model selection."""
+    def _send():
+        try:
+            import openai
+            client = openai.OpenAI(
+                api_key=api_key,
+                base_url="https://glhf.chat/api/openai/v1",
+            )
+            model_id = f"hf:{model_name if use_hf_model else custom_model}"
+            # For GLHF, always use streaming for reliability
+            completion = client.chat.completions.create(
+                stream=True,
+                model=model_id,
+                messages=[
+                    {"role": "system", "content": "You are a helpful assistant."},
+                    {"role": "user", "content": prompt}
+                ],
+            )
+            response_text = []
+            for chunk in completion:
+                if chunk.choices[0].delta.content is not None:
+                    response_text.append(chunk.choices[0].delta.content)
+            return "".join(response_text)
+        except Exception as e:
+            logging.error(f"GLHF API error: {e}")
+            return f"Error with GLHF API: {str(e)}"
+    return apply_rate_limit(_send, 384) if use_rate_limit else _send()
 def estimate_tokens(text: str) -> int:
     """Rough token estimation: ~4 characters per token on average"""
             first_model = list(ctx_size.keys())[0]
             ctx_size = ctx_size[first_model]
             if choice == "OpenAI ChatGPT":
                 model_choices = list(MODEL_CONTEXT_SIZES["OpenAI ChatGPT"].keys())
                 return [
                     gr.update(visible=False),  # hf_options
                     gr.update(visible=False),  # groq_options
                     gr.update(visible=True),   # openai_options
+                    gr.update(visible=False),  # cohere_options
+                    gr.update(visible=False),  # glhf_options
                     gr.update(value=ctx_size), # context_size
                     gr.Dropdown(choices=model_choices, value=first_model)  # openai_model
                 ]
                     gr.update(visible=True),   # hf_options
                     gr.update(visible=False),  # groq_options
                     gr.update(visible=False),  # openai_options
+                    gr.update(visible=False),  # cohere_options
+                    gr.update(visible=False),  # glhf_options
                     gr.update(value=ctx_size), # context_size
+                    gr.Dropdown(choices=model_choices, value="Mixtral 7B")  # Update default value
                 ]
             elif choice == "Groq API":
                 model_choices = list(model_registry.groq_models.keys())
                     gr.update(visible=False),  # hf_options
                     gr.update(visible=True),   # groq_options
                     gr.update(visible=False),  # openai_options
+                    gr.update(visible=False),  # cohere_options
+                    gr.update(visible=False),  # glhf_options
+                    gr.update(value=ctx_size), # context_size
+                    gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)
+                ]
+            elif choice == "Cohere API":
+                return [
+                    gr.update(visible=False),  # hf_options
+                    gr.update(visible=False),  # groq_options
+                    gr.update(visible=False),  # openai_options
+                    gr.update(visible=True),   # cohere_options
+                    gr.update(visible=False),  # glhf_options
+                    gr.update(value=ctx_size), # context_size
+                    gr.Dropdown(choices=[])    # not used
+                ]
+            elif choice == "GLHF API":
+                return [
+                    gr.update(visible=False),  # hf_options
+                    gr.update(visible=False),  # groq_options
+                    gr.update(visible=False),  # openai_options
+                    gr.update(visible=False),  # cohere_options
+                    gr.update(visible=True),   # glhf_options
                     gr.update(value=ctx_size), # context_size
+                    gr.Dropdown(choices=[])    # not used
                 ]
         # Default return for "Clipboard only" or other options
             gr.update(visible=False),  # hf_options
             gr.update(visible=False),  # groq_options
             gr.update(visible=False),  # openai_options
+            gr.update(visible=False),  # cohere_options
+            gr.update(visible=False),  # glhf_options
             gr.update(value=4096),    # context_size
+            gr.Dropdown(choices=[])    # not used
         ]
     # PDF Processing Handlers
         outputs=[progress_status, generated_prompt, download_snippet] # Connect download_snippet
     )
+    # Model selection
     # Model selection
     model_choice.change(
         handle_model_selection,
             hf_options,
             groq_options,
             openai_options,
+            cohere_options,
+            glhf_options,
             context_size,
             openai_model
         ]