cstr commited on
Commit
19a6585
·
verified ·
1 Parent(s): e61f887

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +264 -176
app.py CHANGED
@@ -108,11 +108,10 @@ MODEL_CONTEXT_SIZES = {
108
  "GLHF API": {
109
  "mistralai/Mistral-7B-Instruct-v0.3": 32768,
110
  "microsoft/phi-3-mini-4k-instruct": 4096,
 
111
  "microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
112
  "HuggingFaceH4/zephyr-7b-beta": 8192,
113
- "mistralai/Mistral-7B-Instruct-v0.3": 32768,
114
  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
115
- "microsoft/Phi-3.5-mini-instruct": 4096,
116
  "google/gemma-2-2b-it": 2048,
117
  "microsoft/phi-2": 2048,
118
  # Add other model contexts here
@@ -225,6 +224,12 @@ class ModelRegistry:
225
  """Refresh the list of available Groq models"""
226
  self.groq_models = self._fetch_groq_models()
227
  return self.groq_models
 
 
 
 
 
 
228
 
229
  class PDFProcessor:
230
  """Handles PDF conversion to text and markdown using different methods"""
@@ -382,7 +387,7 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
382
  groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
383
  cohere_api_key=None, cohere_model=None, glhf_api_key=None, glhf_model=None,
384
  glhf_custom_model=None):
385
- """Wrapper function for send_to_model_impl with comprehensive error handling."""
386
 
387
  logging.info("send to model starting...")
388
 
@@ -393,29 +398,54 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
393
  logging.info("sending to model preparation.")
394
 
395
  # Basic input validation
396
- valid_selections = ["Clipboard only", "HuggingFace Inference", "Groq API", "OpenAI ChatGPT", "Cohere API", "GLHF API"]
 
397
  if model_selection not in valid_selections:
398
  return "Error: Invalid model selection", None
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
- # Model-specific validation
401
  if model_selection == "Groq API" and not groq_api_key:
402
- return "Error: Groq API key required", None
 
 
 
 
403
  elif model_selection == "OpenAI ChatGPT" and not openai_api_key:
404
- return "Error: OpenAI API key required", None
405
- elif model_selection == "GLHF API":
406
- if not glhf_api_key:
407
- return "Error: GLHF API key required", None
408
- if glhf_model == "Use HuggingFace Model":
409
- model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
 
 
410
  else:
411
- if not glhf_custom_model.strip():
412
- return "Error: Custom model ID required", None
413
- model_id = glhf_custom_model.strip()
414
- summary = send_to_glhf(prompt, glhf_model == "Use HuggingFace Model", model_id, glhf_custom_model, glhf_api_key)
415
 
416
- # Call implementation with error handling
417
  try:
418
  logging.info("calling send_to_model_impl.")
 
 
 
 
 
 
 
 
 
419
  summary, download_file = send_to_model_impl(
420
  prompt=prompt.strip(),
421
  model_selection=model_selection,
@@ -426,13 +456,13 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
426
  groq_api_key=groq_api_key,
427
  openai_api_key=openai_api_key,
428
  openai_model_choice=openai_model_choice,
429
- cohere_api_key=cohere_api_key,
430
  cohere_model=cohere_model,
431
  glhf_api_key=glhf_api_key,
432
  glhf_model=glhf_model,
433
- glhf_custom_model=glhf_custom_model
 
434
  )
435
- logging.info("summary received:", summary)
436
 
437
  if summary is None or not isinstance(summary, str):
438
  return "Error: No response from model", None
@@ -452,10 +482,13 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
452
  error_msg = "Unknown error occurred"
453
  logging.error(f"Error in send_to_model: {error_msg}")
454
  return f"Error: {error_msg}", None
 
 
455
 
456
  def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model, hf_api_key,
457
  groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
458
- cohere_api_key=None, cohere_model=None, glhf_api_key=None):
 
459
  """Implementation of model sending with all providers."""
460
  logging.info("send to model impl commencing...")
461
 
@@ -463,33 +496,50 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
463
  if model_selection == "Clipboard only":
464
  return "Text copied to clipboard. Use paste for processing.", None
465
 
 
466
  if model_selection == "HuggingFace Inference":
467
  model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
 
468
  summary = send_to_hf_inference(prompt, model_id)
469
- if summary.startswith("Error") and hf_api_key:
470
- summary = send_to_hf_inference(prompt, model_id, hf_api_key)
 
471
 
472
  elif model_selection == "Groq API":
473
- summary = send_to_groq(prompt, groq_model_choice, groq_api_key)
 
 
474
 
475
  elif model_selection == "OpenAI ChatGPT":
476
- summary = send_to_openai(prompt, openai_api_key, model=openai_model_choice)
 
 
 
477
 
478
  elif model_selection == "Cohere API":
479
- summary = send_to_cohere(prompt, cohere_api_key, cohere_model)
480
 
481
  elif model_selection == "GLHF API":
482
  if not glhf_api_key:
483
  return "Error: GLHF API key required", None
484
- model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
485
- summary = send_to_glhf(prompt, model_id, glhf_api_key)
 
 
 
 
 
 
486
 
487
  else:
488
  return "Error: Invalid model selection", None
489
 
490
  # Validate response
491
- if not summary or not isinstance(summary, str):
492
- return "Error: Invalid response from model", None
 
 
 
493
 
494
  # Create download file for valid responses
495
  if not summary.startswith("Error"):
@@ -506,34 +556,16 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
506
  logging.error(f"Error in send_to_model_impl: {error_msg}")
507
  return f"Error: {error_msg}", None
508
 
509
- def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None) -> str:
510
- """Send prompt to HuggingFace Inference API with optional authentication."""
511
- try:
512
  # Check token limits first
513
  is_within_limits, error_msg = check_token_limits(prompt, model_name)
514
  if not is_within_limits:
515
  return error_msg
516
 
517
- client = InferenceClient(token=api_key) if api_key else InferenceClient()
518
- response = client.text_generation(
519
- prompt,
520
- model=model_name,
521
- max_new_tokens=500,
522
- temperature=0.7,
523
- top_p=0.95,
524
- repetition_penalty=1.1
525
- )
526
- return str(response)
527
- except Exception as e:
528
- logging.error(f"HuggingFace inference error: {e}")
529
- return f"Error with HuggingFace inference: {str(e)}" # Return error message instead of raising
530
-
531
- def send_to_hf_inference_old(prompt: str, model_name: str, api_key: str = None) -> str:
532
- """Send prompt to HuggingFace Inference API with optional authentication."""
533
- try:
534
- # First try without authentication
535
  try:
536
- client = InferenceClient() # No token
537
  response = client.text_generation(
538
  prompt,
539
  model=model_name,
@@ -543,142 +575,168 @@ def send_to_hf_inference_old(prompt: str, model_name: str, api_key: str = None)
543
  repetition_penalty=1.1
544
  )
545
  return str(response)
546
- except Exception as public_error:
547
- logging.info(f"Public inference failed: {public_error}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
 
549
- # If that fails and we have an API key, try with authentication
550
- if api_key:
551
- client = InferenceClient(token=api_key)
552
- response = client.text_generation(
553
- prompt,
554
- model=model_name,
555
- max_new_tokens=500,
556
- temperature=0.7,
557
- top_p=0.95,
558
- repetition_penalty=1.1
559
  )
560
- return str(response)
561
- else:
562
- # If we don't have an API key, inform the user they need one
563
- return "Error: This model requires authentication. Please enter your HuggingFace API key."
564
 
565
- except Exception as e:
566
- logging.error(f"HuggingFace inference error: {e}")
567
- return f"Error with HuggingFace inference: {str(e)}"
 
 
 
 
 
 
568
 
569
- def send_to_groq(prompt: str, model_name: str, api_key: str) -> str:
570
- """Send prompt to Groq API with better error handling."""
571
- try:
572
- client = Groq(api_key=api_key)
573
- response = client.chat.completions.create(
574
- model=model_name,
575
- messages=[{
576
- "role": "user",
577
- "content": prompt
578
- }],
579
- temperature=0.7,
580
- max_tokens=500,
581
- top_p=0.95
582
- )
583
- return response.choices[0].message.content
584
- except Exception as e:
585
- logging.error(f"Groq API error: {e}")
586
- raise # Re-raise to be handled by caller
587
 
588
- def send_to_openai(prompt: str, api_key: str, model: str = "gpt-3.5-turbo") -> str:
589
  """Send prompt to OpenAI API."""
590
- try:
591
- from openai import OpenAI
592
- client = OpenAI(api_key=api_key)
593
- response = client.chat.completions.create(
594
- model=model,
595
- messages=[
596
- {"role": "system", "content": "You are a helpful assistant that provides detailed responses."},
597
- {"role": "user", "content": prompt}
598
- ],
599
- temperature=0.7,
600
- max_tokens=500,
601
- top_p=0.95
602
- )
603
-
604
- if response.choices and len(response.choices) > 0:
605
- return response.choices[0].message.content
606
- else:
607
- raise Exception("No response generated")
608
-
609
- except ImportError:
610
- raise Exception("Please install the latest version of openai package (pip install --upgrade openai)")
611
- except Exception as e:
612
- logging.error(f"OpenAI API error: {e}")
613
- raise # Re-raise to be handled by caller
614
-
615
- @RateLimit(calls_per_min=16) # 80% of 20 calls/min
616
- def send_to_cohere(prompt: str, api_key: str = None) -> str:
617
- """Send prompt to Cohere API with V2 and V1 fallback."""
618
- try:
619
- # Try V2 first
620
  try:
621
- import cohere
622
- client = cohere.ClientV2(api_key) if api_key else cohere.ClientV2()
623
- response = client.chat(
624
- model="command-r-plus-08-2024", # Using latest model
625
- messages=[{
626
- "role": "user",
627
- "content": prompt
628
- }],
629
- temperature=0.7,
630
- )
631
- return response.message.content[0].text
632
- except Exception as v2_error:
633
- logging.warning(f"Cohere V2 failed, trying V1: {v2_error}")
634
-
635
- # Fallback to V1
636
- client = cohere.Client(api_key) if api_key else cohere.Client()
637
- response = client.chat(
638
- message=prompt,
639
  temperature=0.7,
640
  max_tokens=500,
 
641
  )
642
- return response.text
643
 
644
- except Exception as e:
645
- logging.error(f"Cohere API error: {e}")
646
- return f"Error with Cohere API: {str(e)}"
 
 
 
 
 
 
 
 
647
 
648
- @RateLimit(calls_per_min=384) # 80% of 480/8hours = 60/hour = 1/min
649
- def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str, api_key: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
650
  """Send prompt to GLHF API with model selection."""
651
- try:
652
- import openai
653
-
654
- client = openai.OpenAI(
655
- api_key=api_key,
656
- base_url="https://glhf.chat/api/openai/v1",
657
- )
658
 
659
- # Select model based on user choice
660
- model_id = f"hf:{model_name if use_hf_model else custom_model}"
661
-
662
- # Always use streaming for reliability
663
- completion = client.chat.completions.create(
664
- stream=True,
665
- model=model_id,
666
- messages=[
667
- {"role": "system", "content": "You are a helpful assistant."},
668
- {"role": "user", "content": prompt}
669
- ],
670
- )
671
 
672
- response_text = []
673
- for chunk in completion:
674
- if chunk.choices[0].delta.content is not None:
675
- response_text.append(chunk.choices[0].delta.content)
676
-
677
- return "".join(response_text)
678
 
679
- except Exception as e:
680
- logging.error(f"GLHF API error: {e}")
681
- return f"Error with GLHF API: {str(e)}"
 
 
682
 
683
  def estimate_tokens(text: str) -> int:
684
  """Rough token estimation: ~4 characters per token on average"""
@@ -1057,13 +1115,14 @@ with gr.Blocks(css="""
1057
  first_model = list(ctx_size.keys())[0]
1058
  ctx_size = ctx_size[first_model]
1059
 
1060
- # Prepare dropdown choices based on provider
1061
  if choice == "OpenAI ChatGPT":
1062
  model_choices = list(MODEL_CONTEXT_SIZES["OpenAI ChatGPT"].keys())
1063
  return [
1064
  gr.update(visible=False), # hf_options
1065
  gr.update(visible=False), # groq_options
1066
  gr.update(visible=True), # openai_options
 
 
1067
  gr.update(value=ctx_size), # context_size
1068
  gr.Dropdown(choices=model_choices, value=first_model) # openai_model
1069
  ]
@@ -1073,8 +1132,10 @@ with gr.Blocks(css="""
1073
  gr.update(visible=True), # hf_options
1074
  gr.update(visible=False), # groq_options
1075
  gr.update(visible=False), # openai_options
 
 
1076
  gr.update(value=ctx_size), # context_size
1077
- gr.Dropdown(choices=model_choices, value="Phi-3 Mini 4K") # openai_model (not used)
1078
  ]
1079
  elif choice == "Groq API":
1080
  model_choices = list(model_registry.groq_models.keys())
@@ -1082,8 +1143,30 @@ with gr.Blocks(css="""
1082
  gr.update(visible=False), # hf_options
1083
  gr.update(visible=True), # groq_options
1084
  gr.update(visible=False), # openai_options
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1085
  gr.update(value=ctx_size), # context_size
1086
- gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None) # openai_model (not used)
1087
  ]
1088
 
1089
  # Default return for "Clipboard only" or other options
@@ -1091,8 +1174,10 @@ with gr.Blocks(css="""
1091
  gr.update(visible=False), # hf_options
1092
  gr.update(visible=False), # groq_options
1093
  gr.update(visible=False), # openai_options
 
 
1094
  gr.update(value=4096), # context_size
1095
- gr.Dropdown(choices=[]) # openai_model (not used)
1096
  ]
1097
 
1098
  # PDF Processing Handlers
@@ -1204,6 +1289,7 @@ with gr.Blocks(css="""
1204
  outputs=[progress_status, generated_prompt, download_snippet] # Connect download_snippet
1205
  )
1206
 
 
1207
  # Model selection
1208
  model_choice.change(
1209
  handle_model_selection,
@@ -1212,6 +1298,8 @@ with gr.Blocks(css="""
1212
  hf_options,
1213
  groq_options,
1214
  openai_options,
 
 
1215
  context_size,
1216
  openai_model
1217
  ]
 
108
  "GLHF API": {
109
  "mistralai/Mistral-7B-Instruct-v0.3": 32768,
110
  "microsoft/phi-3-mini-4k-instruct": 4096,
111
+ "microsoft/Phi-3.5-mini-instruct": 4096,
112
  "microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
113
  "HuggingFaceH4/zephyr-7b-beta": 8192,
 
114
  "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
 
115
  "google/gemma-2-2b-it": 2048,
116
  "microsoft/phi-2": 2048,
117
  # Add other model contexts here
 
224
  """Refresh the list of available Groq models"""
225
  self.groq_models = self._fetch_groq_models()
226
  return self.groq_models
227
+
228
+ def apply_rate_limit(func, calls_per_min, *args, **kwargs):
229
+ """Apply rate limiting only when needed."""
230
+ rate_decorator = RateLimit(calls_per_min)
231
+ wrapped_func = rate_decorator(func)
232
+ return wrapped_func(*args, **kwargs)
233
 
234
  class PDFProcessor:
235
  """Handles PDF conversion to text and markdown using different methods"""
 
387
  groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
388
  cohere_api_key=None, cohere_model=None, glhf_api_key=None, glhf_model=None,
389
  glhf_custom_model=None):
390
+ """Primary wrapper for model interactions with error handling."""
391
 
392
  logging.info("send to model starting...")
393
 
 
398
  logging.info("sending to model preparation.")
399
 
400
  # Basic input validation
401
+ valid_selections = ["Clipboard only", "HuggingFace Inference", "Groq API",
402
+ "OpenAI ChatGPT", "Cohere API", "GLHF API"]
403
  if model_selection not in valid_selections:
404
  return "Error: Invalid model selection", None
405
+
406
+ # Check environment API keys
407
+ env_api_keys = {
408
+ "GROQ_API_KEY": os.getenv('GROQ_API_KEY'),
409
+ "OPENAI_API_KEY": os.getenv('OPENAI_API_KEY'),
410
+ "COHERE_API_KEY": os.getenv('COHERE_API_KEY'),
411
+ "GLHF_API_KEY": os.getenv('GLHF_API_KEY')
412
+ }
413
+
414
+ for key_name, key_value in env_api_keys.items():
415
+ if not key_value:
416
+ logging.warning(f"No {key_name} found in environment")
417
 
418
+ # Model-specific validation - check only required keys
419
  if model_selection == "Groq API" and not groq_api_key:
420
+ if env_api_keys["GROQ_API_KEY"]:
421
+ groq_api_key = env_api_keys["GROQ_API_KEY"]
422
+ else:
423
+ return "Error: Groq API key required", None
424
+
425
  elif model_selection == "OpenAI ChatGPT" and not openai_api_key:
426
+ if env_api_keys["OPENAI_API_KEY"]:
427
+ openai_api_key = env_api_keys["OPENAI_API_KEY"]
428
+ else:
429
+ return "Error: OpenAI API key required", None
430
+
431
+ elif model_selection == "GLHF API" and not glhf_api_key:
432
+ if env_api_keys["GLHF_API_KEY"]:
433
+ glhf_api_key = env_api_keys["GLHF_API_KEY"]
434
  else:
435
+ return "Error: GLHF API key required", None
 
 
 
436
 
437
+ # Try implementation
438
  try:
439
  logging.info("calling send_to_model_impl.")
440
+
441
+ # Use rate limits only with environment API keys
442
+ use_rate_limits = {
443
+ "Groq API": groq_api_key == env_api_keys["GROQ_API_KEY"],
444
+ "OpenAI ChatGPT": openai_api_key == env_api_keys["OPENAI_API_KEY"],
445
+ "Cohere API": cohere_api_key == env_api_keys["COHERE_API_KEY"],
446
+ "GLHF API": glhf_api_key == env_api_keys["GLHF_API_KEY"]
447
+ }.get(model_selection, False)
448
+
449
  summary, download_file = send_to_model_impl(
450
  prompt=prompt.strip(),
451
  model_selection=model_selection,
 
456
  groq_api_key=groq_api_key,
457
  openai_api_key=openai_api_key,
458
  openai_model_choice=openai_model_choice,
459
+ cohere_api_key=cohere_api_key or env_api_keys["COHERE_API_KEY"],
460
  cohere_model=cohere_model,
461
  glhf_api_key=glhf_api_key,
462
  glhf_model=glhf_model,
463
+ glhf_custom_model=glhf_custom_model,
464
+ use_rate_limits=use_rate_limits
465
  )
 
466
 
467
  if summary is None or not isinstance(summary, str):
468
  return "Error: No response from model", None
 
482
  error_msg = "Unknown error occurred"
483
  logging.error(f"Error in send_to_model: {error_msg}")
484
  return f"Error: {error_msg}", None
485
+ finally:
486
+ logging.info("send to model completed.")
487
 
488
  def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model, hf_api_key,
489
  groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
490
+ cohere_api_key=None, cohere_model=None, glhf_api_key=None, glhf_model=None,
491
+ glhf_custom_model=None, use_rate_limits=False):
492
  """Implementation of model sending with all providers."""
493
  logging.info("send to model impl commencing...")
494
 
 
496
  if model_selection == "Clipboard only":
497
  return "Text copied to clipboard. Use paste for processing.", None
498
 
499
+ # Get the summary based on model selection
500
  if model_selection == "HuggingFace Inference":
501
  model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
502
+ # Always try without API key first
503
  summary = send_to_hf_inference(prompt, model_id)
504
+ if summary.startswith("Error: This model requires authentication") and hf_api_key:
505
+ # Only try with API key if the model specifically requires it
506
+ summary = send_to_hf_inference(prompt, model_id, hf_api_key, use_rate_limits)
507
 
508
  elif model_selection == "Groq API":
509
+ if not groq_api_key:
510
+ return "Error: Groq API key required", None
511
+ summary = send_to_groq(prompt, groq_model_choice, groq_api_key, use_rate_limits)
512
 
513
  elif model_selection == "OpenAI ChatGPT":
514
+ if not openai_api_key:
515
+ return "Error: OpenAI API key required", None
516
+ summary = send_to_openai(prompt, openai_api_key, model=openai_model_choice,
517
+ use_rate_limit=use_rate_limits)
518
 
519
  elif model_selection == "Cohere API":
520
+ summary = send_to_cohere(prompt, cohere_api_key, cohere_model, use_rate_limits)
521
 
522
  elif model_selection == "GLHF API":
523
  if not glhf_api_key:
524
  return "Error: GLHF API key required", None
525
+ summary = send_to_glhf(
526
+ prompt,
527
+ glhf_model == "Use HuggingFace Model",
528
+ hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice],
529
+ glhf_custom_model,
530
+ glhf_api_key,
531
+ use_rate_limits
532
+ )
533
 
534
  else:
535
  return "Error: Invalid model selection", None
536
 
537
  # Validate response
538
+ if not summary:
539
+ return "Error: No response from model", None
540
+
541
+ if not isinstance(summary, str):
542
+ return "Error: Invalid response type from model", None
543
 
544
  # Create download file for valid responses
545
  if not summary.startswith("Error"):
 
556
  logging.error(f"Error in send_to_model_impl: {error_msg}")
557
  return f"Error: {error_msg}", None
558
 
559
+ def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None, use_rate_limit: bool = False) -> str:
560
+ """Send prompt to HuggingFace Inference API."""
561
+ def _send():
562
  # Check token limits first
563
  is_within_limits, error_msg = check_token_limits(prompt, model_name)
564
  if not is_within_limits:
565
  return error_msg
566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  try:
568
+ client = InferenceClient(token=api_key) if api_key else InferenceClient()
569
  response = client.text_generation(
570
  prompt,
571
  model=model_name,
 
575
  repetition_penalty=1.1
576
  )
577
  return str(response)
578
+ except Exception as e:
579
+ logging.error(f"HuggingFace inference error: {e}")
580
+ return f"Error with HuggingFace inference: {str(e)}"
581
+
582
+ return apply_rate_limit(_send, 16) if use_rate_limit else _send()
583
+
584
+ def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
585
+ api_key: str, use_rate_limit: bool = False) -> str:
586
+ """Send prompt to GLHF API with model selection and proper stream handling."""
587
+ def _send():
588
+ try:
589
+ import openai
590
+ client = openai.OpenAI(
591
+ api_key=api_key,
592
+ base_url="https://glhf.chat/api/openai/v1",
593
+ )
594
+
595
+ model_id = f"hf:{model_name if use_hf_model else custom_model}"
596
 
597
+ try:
598
+ # First try without streaming
599
+ completion = client.chat.completions.create(
600
+ stream=False,
601
+ model=model_id,
602
+ messages=[
603
+ {"role": "system", "content": "You are a helpful assistant."},
604
+ {"role": "user", "content": prompt}
605
+ ],
 
606
  )
607
+ return completion.choices[0].message.content
608
+ except Exception as non_stream_error:
609
+ logging.warning(f"Non-streaming GLHF failed, trying streaming: {non_stream_error}")
 
610
 
611
+ # Fallback to streaming if needed
612
+ completion = client.chat.completions.create(
613
+ stream=True,
614
+ model=model_id,
615
+ messages=[
616
+ {"role": "system", "content": "You are a helpful assistant."},
617
+ {"role": "user", "content": prompt}
618
+ ],
619
+ )
620
 
621
+ response_text = []
622
+ try:
623
+ for chunk in completion:
624
+ if chunk.choices and chunk.choices[0].delta.content is not None:
625
+ response_text.append(chunk.choices[0].delta.content)
626
+ except Exception as stream_error:
627
+ if response_text: # If we got partial response, return it
628
+ logging.warning(f"Streaming interrupted but got partial response: {stream_error}")
629
+ return "".join(response_text)
630
+ raise # Re-raise if we got nothing
631
+
632
+ return "".join(response_text)
633
+
634
+ except Exception as e:
635
+ logging.error(f"GLHF API error: {e}")
636
+ return f"Error with GLHF API: {str(e)}"
637
+
638
+ return apply_rate_limit(_send, 384) if use_rate_limit else _send()
639
 
640
+ def send_to_openai(prompt: str, api_key: str, model: str = "gpt-3.5-turbo", use_rate_limit: bool = False) -> str:
641
  """Send prompt to OpenAI API."""
642
+ def _send():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
643
  try:
644
+ from openai import OpenAI
645
+ client = OpenAI(api_key=api_key)
646
+ response = client.chat.completions.create(
647
+ model=model,
648
+ messages=[
649
+ {"role": "system", "content": "You are a helpful assistant that provides detailed responses."},
650
+ {"role": "user", "content": prompt}
651
+ ],
 
 
 
 
 
 
 
 
 
 
652
  temperature=0.7,
653
  max_tokens=500,
654
+ top_p=0.95
655
  )
 
656
 
657
+ if response.choices and len(response.choices) > 0:
658
+ return response.choices[0].message.content
659
+ return "Error: No response generated"
660
+
661
+ except ImportError:
662
+ return "Error: Please install the latest version of openai package"
663
+ except Exception as e:
664
+ logging.error(f"OpenAI API error: {e}")
665
+ return f"Error with OpenAI API: {str(e)}"
666
+
667
+ return apply_rate_limit(_send, 3000/60) if use_rate_limit else _send()
668
 
669
+ def send_to_cohere(prompt: str, api_key: str = None, model: str = None, use_rate_limit: bool = False) -> str:
670
+ """Send prompt to Cohere API with V2 and V1 fallback."""
671
+ def _send():
672
+ try:
673
+ import cohere
674
+ # Try V2 first
675
+ try:
676
+ client = cohere.ClientV2(api_key) if api_key else cohere.ClientV2()
677
+ response = client.chat(
678
+ model=model or "command-r-plus-08-2024",
679
+ messages=[{
680
+ "role": "user",
681
+ "content": prompt
682
+ }],
683
+ temperature=0.7,
684
+ )
685
+ return response.message.content[0].text
686
+ except Exception as v2_error:
687
+ logging.warning(f"Cohere V2 failed, trying V1: {v2_error}")
688
+
689
+ # Fallback to V1
690
+ client = cohere.Client(api_key) if api_key else cohere.Client()
691
+ response = client.chat(
692
+ message=prompt,
693
+ model=model or "command-r-plus-08-2024",
694
+ temperature=0.7,
695
+ max_tokens=500,
696
+ )
697
+ return response.text
698
+
699
+ except Exception as e:
700
+ logging.error(f"Cohere API error: {e}")
701
+ return f"Error with Cohere API: {str(e)}"
702
+
703
+ return apply_rate_limit(_send, 16) if use_rate_limit else _send()
704
+
705
+ def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
706
+ api_key: str, use_rate_limit: bool = False) -> str:
707
  """Send prompt to GLHF API with model selection."""
708
+ def _send():
709
+ try:
710
+ import openai
711
+ client = openai.OpenAI(
712
+ api_key=api_key,
713
+ base_url="https://glhf.chat/api/openai/v1",
714
+ )
715
 
716
+ model_id = f"hf:{model_name if use_hf_model else custom_model}"
717
+
718
+ # For GLHF, always use streaming for reliability
719
+ completion = client.chat.completions.create(
720
+ stream=True,
721
+ model=model_id,
722
+ messages=[
723
+ {"role": "system", "content": "You are a helpful assistant."},
724
+ {"role": "user", "content": prompt}
725
+ ],
726
+ )
 
727
 
728
+ response_text = []
729
+ for chunk in completion:
730
+ if chunk.choices[0].delta.content is not None:
731
+ response_text.append(chunk.choices[0].delta.content)
732
+
733
+ return "".join(response_text)
734
 
735
+ except Exception as e:
736
+ logging.error(f"GLHF API error: {e}")
737
+ return f"Error with GLHF API: {str(e)}"
738
+
739
+ return apply_rate_limit(_send, 384) if use_rate_limit else _send()
740
 
741
  def estimate_tokens(text: str) -> int:
742
  """Rough token estimation: ~4 characters per token on average"""
 
1115
  first_model = list(ctx_size.keys())[0]
1116
  ctx_size = ctx_size[first_model]
1117
 
 
1118
  if choice == "OpenAI ChatGPT":
1119
  model_choices = list(MODEL_CONTEXT_SIZES["OpenAI ChatGPT"].keys())
1120
  return [
1121
  gr.update(visible=False), # hf_options
1122
  gr.update(visible=False), # groq_options
1123
  gr.update(visible=True), # openai_options
1124
+ gr.update(visible=False), # cohere_options
1125
+ gr.update(visible=False), # glhf_options
1126
  gr.update(value=ctx_size), # context_size
1127
  gr.Dropdown(choices=model_choices, value=first_model) # openai_model
1128
  ]
 
1132
  gr.update(visible=True), # hf_options
1133
  gr.update(visible=False), # groq_options
1134
  gr.update(visible=False), # openai_options
1135
+ gr.update(visible=False), # cohere_options
1136
+ gr.update(visible=False), # glhf_options
1137
  gr.update(value=ctx_size), # context_size
1138
+ gr.Dropdown(choices=model_choices, value="Mixtral 7B") # Update default value
1139
  ]
1140
  elif choice == "Groq API":
1141
  model_choices = list(model_registry.groq_models.keys())
 
1143
  gr.update(visible=False), # hf_options
1144
  gr.update(visible=True), # groq_options
1145
  gr.update(visible=False), # openai_options
1146
+ gr.update(visible=False), # cohere_options
1147
+ gr.update(visible=False), # glhf_options
1148
+ gr.update(value=ctx_size), # context_size
1149
+ gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)
1150
+ ]
1151
+ elif choice == "Cohere API":
1152
+ return [
1153
+ gr.update(visible=False), # hf_options
1154
+ gr.update(visible=False), # groq_options
1155
+ gr.update(visible=False), # openai_options
1156
+ gr.update(visible=True), # cohere_options
1157
+ gr.update(visible=False), # glhf_options
1158
+ gr.update(value=ctx_size), # context_size
1159
+ gr.Dropdown(choices=[]) # not used
1160
+ ]
1161
+ elif choice == "GLHF API":
1162
+ return [
1163
+ gr.update(visible=False), # hf_options
1164
+ gr.update(visible=False), # groq_options
1165
+ gr.update(visible=False), # openai_options
1166
+ gr.update(visible=False), # cohere_options
1167
+ gr.update(visible=True), # glhf_options
1168
  gr.update(value=ctx_size), # context_size
1169
+ gr.Dropdown(choices=[]) # not used
1170
  ]
1171
 
1172
  # Default return for "Clipboard only" or other options
 
1174
  gr.update(visible=False), # hf_options
1175
  gr.update(visible=False), # groq_options
1176
  gr.update(visible=False), # openai_options
1177
+ gr.update(visible=False), # cohere_options
1178
+ gr.update(visible=False), # glhf_options
1179
  gr.update(value=4096), # context_size
1180
+ gr.Dropdown(choices=[]) # not used
1181
  ]
1182
 
1183
  # PDF Processing Handlers
 
1289
  outputs=[progress_status, generated_prompt, download_snippet] # Connect download_snippet
1290
  )
1291
 
1292
+ # Model selection
1293
  # Model selection
1294
  model_choice.change(
1295
  handle_model_selection,
 
1298
  hf_options,
1299
  groq_options,
1300
  openai_options,
1301
+ cohere_options,
1302
+ glhf_options,
1303
  context_size,
1304
  openai_model
1305
  ]