Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -108,11 +108,10 @@ MODEL_CONTEXT_SIZES = {
|
|
108 |
"GLHF API": {
|
109 |
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
110 |
"microsoft/phi-3-mini-4k-instruct": 4096,
|
|
|
111 |
"microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
|
112 |
"HuggingFaceH4/zephyr-7b-beta": 8192,
|
113 |
-
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
114 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
115 |
-
"microsoft/Phi-3.5-mini-instruct": 4096,
|
116 |
"google/gemma-2-2b-it": 2048,
|
117 |
"microsoft/phi-2": 2048,
|
118 |
# Add other model contexts here
|
@@ -225,6 +224,12 @@ class ModelRegistry:
|
|
225 |
"""Refresh the list of available Groq models"""
|
226 |
self.groq_models = self._fetch_groq_models()
|
227 |
return self.groq_models
|
|
|
|
|
|
|
|
|
|
|
|
|
228 |
|
229 |
class PDFProcessor:
|
230 |
"""Handles PDF conversion to text and markdown using different methods"""
|
@@ -382,7 +387,7 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
|
|
382 |
groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
|
383 |
cohere_api_key=None, cohere_model=None, glhf_api_key=None, glhf_model=None,
|
384 |
glhf_custom_model=None):
|
385 |
-
"""
|
386 |
|
387 |
logging.info("send to model starting...")
|
388 |
|
@@ -393,29 +398,54 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
|
|
393 |
logging.info("sending to model preparation.")
|
394 |
|
395 |
# Basic input validation
|
396 |
-
valid_selections = ["Clipboard only", "HuggingFace Inference", "Groq API",
|
|
|
397 |
if model_selection not in valid_selections:
|
398 |
return "Error: Invalid model selection", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
-
# Model-specific validation
|
401 |
if model_selection == "Groq API" and not groq_api_key:
|
402 |
-
|
|
|
|
|
|
|
|
|
403 |
elif model_selection == "OpenAI ChatGPT" and not openai_api_key:
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
return "Error:
|
408 |
-
|
409 |
-
|
|
|
|
|
410 |
else:
|
411 |
-
|
412 |
-
return "Error: Custom model ID required", None
|
413 |
-
model_id = glhf_custom_model.strip()
|
414 |
-
summary = send_to_glhf(prompt, glhf_model == "Use HuggingFace Model", model_id, glhf_custom_model, glhf_api_key)
|
415 |
|
416 |
-
#
|
417 |
try:
|
418 |
logging.info("calling send_to_model_impl.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
summary, download_file = send_to_model_impl(
|
420 |
prompt=prompt.strip(),
|
421 |
model_selection=model_selection,
|
@@ -426,13 +456,13 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
|
|
426 |
groq_api_key=groq_api_key,
|
427 |
openai_api_key=openai_api_key,
|
428 |
openai_model_choice=openai_model_choice,
|
429 |
-
cohere_api_key=cohere_api_key,
|
430 |
cohere_model=cohere_model,
|
431 |
glhf_api_key=glhf_api_key,
|
432 |
glhf_model=glhf_model,
|
433 |
-
glhf_custom_model=glhf_custom_model
|
|
|
434 |
)
|
435 |
-
logging.info("summary received:", summary)
|
436 |
|
437 |
if summary is None or not isinstance(summary, str):
|
438 |
return "Error: No response from model", None
|
@@ -452,10 +482,13 @@ def send_to_model(prompt, model_selection, hf_model_choice, hf_custom_model, hf_
|
|
452 |
error_msg = "Unknown error occurred"
|
453 |
logging.error(f"Error in send_to_model: {error_msg}")
|
454 |
return f"Error: {error_msg}", None
|
|
|
|
|
455 |
|
456 |
def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model, hf_api_key,
|
457 |
groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
|
458 |
-
cohere_api_key=None, cohere_model=None, glhf_api_key=None
|
|
|
459 |
"""Implementation of model sending with all providers."""
|
460 |
logging.info("send to model impl commencing...")
|
461 |
|
@@ -463,33 +496,50 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
|
|
463 |
if model_selection == "Clipboard only":
|
464 |
return "Text copied to clipboard. Use paste for processing.", None
|
465 |
|
|
|
466 |
if model_selection == "HuggingFace Inference":
|
467 |
model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
|
|
|
468 |
summary = send_to_hf_inference(prompt, model_id)
|
469 |
-
if summary.startswith("Error") and hf_api_key:
|
470 |
-
|
|
|
471 |
|
472 |
elif model_selection == "Groq API":
|
473 |
-
|
|
|
|
|
474 |
|
475 |
elif model_selection == "OpenAI ChatGPT":
|
476 |
-
|
|
|
|
|
|
|
477 |
|
478 |
elif model_selection == "Cohere API":
|
479 |
-
summary = send_to_cohere(prompt, cohere_api_key, cohere_model)
|
480 |
|
481 |
elif model_selection == "GLHF API":
|
482 |
if not glhf_api_key:
|
483 |
return "Error: GLHF API key required", None
|
484 |
-
|
485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
486 |
|
487 |
else:
|
488 |
return "Error: Invalid model selection", None
|
489 |
|
490 |
# Validate response
|
491 |
-
if not summary
|
492 |
-
return "Error:
|
|
|
|
|
|
|
493 |
|
494 |
# Create download file for valid responses
|
495 |
if not summary.startswith("Error"):
|
@@ -506,34 +556,16 @@ def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model
|
|
506 |
logging.error(f"Error in send_to_model_impl: {error_msg}")
|
507 |
return f"Error: {error_msg}", None
|
508 |
|
509 |
-
def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None) -> str:
|
510 |
-
"""Send prompt to HuggingFace Inference API
|
511 |
-
|
512 |
# Check token limits first
|
513 |
is_within_limits, error_msg = check_token_limits(prompt, model_name)
|
514 |
if not is_within_limits:
|
515 |
return error_msg
|
516 |
|
517 |
-
client = InferenceClient(token=api_key) if api_key else InferenceClient()
|
518 |
-
response = client.text_generation(
|
519 |
-
prompt,
|
520 |
-
model=model_name,
|
521 |
-
max_new_tokens=500,
|
522 |
-
temperature=0.7,
|
523 |
-
top_p=0.95,
|
524 |
-
repetition_penalty=1.1
|
525 |
-
)
|
526 |
-
return str(response)
|
527 |
-
except Exception as e:
|
528 |
-
logging.error(f"HuggingFace inference error: {e}")
|
529 |
-
return f"Error with HuggingFace inference: {str(e)}" # Return error message instead of raising
|
530 |
-
|
531 |
-
def send_to_hf_inference_old(prompt: str, model_name: str, api_key: str = None) -> str:
|
532 |
-
"""Send prompt to HuggingFace Inference API with optional authentication."""
|
533 |
-
try:
|
534 |
-
# First try without authentication
|
535 |
try:
|
536 |
-
client = InferenceClient()
|
537 |
response = client.text_generation(
|
538 |
prompt,
|
539 |
model=model_name,
|
@@ -543,142 +575,168 @@ def send_to_hf_inference_old(prompt: str, model_name: str, api_key: str = None)
|
|
543 |
repetition_penalty=1.1
|
544 |
)
|
545 |
return str(response)
|
546 |
-
except Exception as
|
547 |
-
logging.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
548 |
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
repetition_penalty=1.1
|
559 |
)
|
560 |
-
return
|
561 |
-
|
562 |
-
|
563 |
-
return "Error: This model requires authentication. Please enter your HuggingFace API key."
|
564 |
|
565 |
-
|
566 |
-
|
567 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
568 |
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
|
588 |
-
def send_to_openai(prompt: str, api_key: str, model: str = "gpt-3.5-turbo") -> str:
|
589 |
"""Send prompt to OpenAI API."""
|
590 |
-
|
591 |
-
from openai import OpenAI
|
592 |
-
client = OpenAI(api_key=api_key)
|
593 |
-
response = client.chat.completions.create(
|
594 |
-
model=model,
|
595 |
-
messages=[
|
596 |
-
{"role": "system", "content": "You are a helpful assistant that provides detailed responses."},
|
597 |
-
{"role": "user", "content": prompt}
|
598 |
-
],
|
599 |
-
temperature=0.7,
|
600 |
-
max_tokens=500,
|
601 |
-
top_p=0.95
|
602 |
-
)
|
603 |
-
|
604 |
-
if response.choices and len(response.choices) > 0:
|
605 |
-
return response.choices[0].message.content
|
606 |
-
else:
|
607 |
-
raise Exception("No response generated")
|
608 |
-
|
609 |
-
except ImportError:
|
610 |
-
raise Exception("Please install the latest version of openai package (pip install --upgrade openai)")
|
611 |
-
except Exception as e:
|
612 |
-
logging.error(f"OpenAI API error: {e}")
|
613 |
-
raise # Re-raise to be handled by caller
|
614 |
-
|
615 |
-
@RateLimit(calls_per_min=16) # 80% of 20 calls/min
|
616 |
-
def send_to_cohere(prompt: str, api_key: str = None) -> str:
|
617 |
-
"""Send prompt to Cohere API with V2 and V1 fallback."""
|
618 |
-
try:
|
619 |
-
# Try V2 first
|
620 |
try:
|
621 |
-
import
|
622 |
-
client =
|
623 |
-
response = client.chat(
|
624 |
-
model=
|
625 |
-
messages=[
|
626 |
-
"role": "
|
627 |
-
"content": prompt
|
628 |
-
|
629 |
-
temperature=0.7,
|
630 |
-
)
|
631 |
-
return response.message.content[0].text
|
632 |
-
except Exception as v2_error:
|
633 |
-
logging.warning(f"Cohere V2 failed, trying V1: {v2_error}")
|
634 |
-
|
635 |
-
# Fallback to V1
|
636 |
-
client = cohere.Client(api_key) if api_key else cohere.Client()
|
637 |
-
response = client.chat(
|
638 |
-
message=prompt,
|
639 |
temperature=0.7,
|
640 |
max_tokens=500,
|
|
|
641 |
)
|
642 |
-
return response.text
|
643 |
|
644 |
-
|
645 |
-
|
646 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
647 |
|
648 |
-
|
649 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
650 |
"""Send prompt to GLHF API with model selection."""
|
651 |
-
|
652 |
-
|
653 |
-
|
654 |
-
|
655 |
-
|
656 |
-
|
657 |
-
|
658 |
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
|
663 |
-
|
664 |
-
|
665 |
-
|
666 |
-
|
667 |
-
|
668 |
-
|
669 |
-
|
670 |
-
)
|
671 |
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
676 |
-
|
677 |
-
|
678 |
|
679 |
-
|
680 |
-
|
681 |
-
|
|
|
|
|
682 |
|
683 |
def estimate_tokens(text: str) -> int:
|
684 |
"""Rough token estimation: ~4 characters per token on average"""
|
@@ -1057,13 +1115,14 @@ with gr.Blocks(css="""
|
|
1057 |
first_model = list(ctx_size.keys())[0]
|
1058 |
ctx_size = ctx_size[first_model]
|
1059 |
|
1060 |
-
# Prepare dropdown choices based on provider
|
1061 |
if choice == "OpenAI ChatGPT":
|
1062 |
model_choices = list(MODEL_CONTEXT_SIZES["OpenAI ChatGPT"].keys())
|
1063 |
return [
|
1064 |
gr.update(visible=False), # hf_options
|
1065 |
gr.update(visible=False), # groq_options
|
1066 |
gr.update(visible=True), # openai_options
|
|
|
|
|
1067 |
gr.update(value=ctx_size), # context_size
|
1068 |
gr.Dropdown(choices=model_choices, value=first_model) # openai_model
|
1069 |
]
|
@@ -1073,8 +1132,10 @@ with gr.Blocks(css="""
|
|
1073 |
gr.update(visible=True), # hf_options
|
1074 |
gr.update(visible=False), # groq_options
|
1075 |
gr.update(visible=False), # openai_options
|
|
|
|
|
1076 |
gr.update(value=ctx_size), # context_size
|
1077 |
-
gr.Dropdown(choices=model_choices, value="
|
1078 |
]
|
1079 |
elif choice == "Groq API":
|
1080 |
model_choices = list(model_registry.groq_models.keys())
|
@@ -1082,8 +1143,30 @@ with gr.Blocks(css="""
|
|
1082 |
gr.update(visible=False), # hf_options
|
1083 |
gr.update(visible=True), # groq_options
|
1084 |
gr.update(visible=False), # openai_options
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1085 |
gr.update(value=ctx_size), # context_size
|
1086 |
-
gr.Dropdown(choices=
|
1087 |
]
|
1088 |
|
1089 |
# Default return for "Clipboard only" or other options
|
@@ -1091,8 +1174,10 @@ with gr.Blocks(css="""
|
|
1091 |
gr.update(visible=False), # hf_options
|
1092 |
gr.update(visible=False), # groq_options
|
1093 |
gr.update(visible=False), # openai_options
|
|
|
|
|
1094 |
gr.update(value=4096), # context_size
|
1095 |
-
gr.Dropdown(choices=[]) #
|
1096 |
]
|
1097 |
|
1098 |
# PDF Processing Handlers
|
@@ -1204,6 +1289,7 @@ with gr.Blocks(css="""
|
|
1204 |
outputs=[progress_status, generated_prompt, download_snippet] # Connect download_snippet
|
1205 |
)
|
1206 |
|
|
|
1207 |
# Model selection
|
1208 |
model_choice.change(
|
1209 |
handle_model_selection,
|
@@ -1212,6 +1298,8 @@ with gr.Blocks(css="""
|
|
1212 |
hf_options,
|
1213 |
groq_options,
|
1214 |
openai_options,
|
|
|
|
|
1215 |
context_size,
|
1216 |
openai_model
|
1217 |
]
|
|
|
108 |
"GLHF API": {
|
109 |
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
110 |
"microsoft/phi-3-mini-4k-instruct": 4096,
|
111 |
+
"microsoft/Phi-3.5-mini-instruct": 4096,
|
112 |
"microsoft/Phi-3-mini-128k-instruct": 131072, # Added Phi-3 128k
|
113 |
"HuggingFaceH4/zephyr-7b-beta": 8192,
|
|
|
114 |
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
|
|
|
115 |
"google/gemma-2-2b-it": 2048,
|
116 |
"microsoft/phi-2": 2048,
|
117 |
# Add other model contexts here
|
|
|
224 |
"""Refresh the list of available Groq models"""
|
225 |
self.groq_models = self._fetch_groq_models()
|
226 |
return self.groq_models
|
227 |
+
|
228 |
+
def apply_rate_limit(func, calls_per_min, *args, **kwargs):
|
229 |
+
"""Apply rate limiting only when needed."""
|
230 |
+
rate_decorator = RateLimit(calls_per_min)
|
231 |
+
wrapped_func = rate_decorator(func)
|
232 |
+
return wrapped_func(*args, **kwargs)
|
233 |
|
234 |
class PDFProcessor:
|
235 |
"""Handles PDF conversion to text and markdown using different methods"""
|
|
|
387 |
groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
|
388 |
cohere_api_key=None, cohere_model=None, glhf_api_key=None, glhf_model=None,
|
389 |
glhf_custom_model=None):
|
390 |
+
"""Primary wrapper for model interactions with error handling."""
|
391 |
|
392 |
logging.info("send to model starting...")
|
393 |
|
|
|
398 |
logging.info("sending to model preparation.")
|
399 |
|
400 |
# Basic input validation
|
401 |
+
valid_selections = ["Clipboard only", "HuggingFace Inference", "Groq API",
|
402 |
+
"OpenAI ChatGPT", "Cohere API", "GLHF API"]
|
403 |
if model_selection not in valid_selections:
|
404 |
return "Error: Invalid model selection", None
|
405 |
+
|
406 |
+
# Check environment API keys
|
407 |
+
env_api_keys = {
|
408 |
+
"GROQ_API_KEY": os.getenv('GROQ_API_KEY'),
|
409 |
+
"OPENAI_API_KEY": os.getenv('OPENAI_API_KEY'),
|
410 |
+
"COHERE_API_KEY": os.getenv('COHERE_API_KEY'),
|
411 |
+
"GLHF_API_KEY": os.getenv('GLHF_API_KEY')
|
412 |
+
}
|
413 |
+
|
414 |
+
for key_name, key_value in env_api_keys.items():
|
415 |
+
if not key_value:
|
416 |
+
logging.warning(f"No {key_name} found in environment")
|
417 |
|
418 |
+
# Model-specific validation - check only required keys
|
419 |
if model_selection == "Groq API" and not groq_api_key:
|
420 |
+
if env_api_keys["GROQ_API_KEY"]:
|
421 |
+
groq_api_key = env_api_keys["GROQ_API_KEY"]
|
422 |
+
else:
|
423 |
+
return "Error: Groq API key required", None
|
424 |
+
|
425 |
elif model_selection == "OpenAI ChatGPT" and not openai_api_key:
|
426 |
+
if env_api_keys["OPENAI_API_KEY"]:
|
427 |
+
openai_api_key = env_api_keys["OPENAI_API_KEY"]
|
428 |
+
else:
|
429 |
+
return "Error: OpenAI API key required", None
|
430 |
+
|
431 |
+
elif model_selection == "GLHF API" and not glhf_api_key:
|
432 |
+
if env_api_keys["GLHF_API_KEY"]:
|
433 |
+
glhf_api_key = env_api_keys["GLHF_API_KEY"]
|
434 |
else:
|
435 |
+
return "Error: GLHF API key required", None
|
|
|
|
|
|
|
436 |
|
437 |
+
# Try implementation
|
438 |
try:
|
439 |
logging.info("calling send_to_model_impl.")
|
440 |
+
|
441 |
+
# Use rate limits only with environment API keys
|
442 |
+
use_rate_limits = {
|
443 |
+
"Groq API": groq_api_key == env_api_keys["GROQ_API_KEY"],
|
444 |
+
"OpenAI ChatGPT": openai_api_key == env_api_keys["OPENAI_API_KEY"],
|
445 |
+
"Cohere API": cohere_api_key == env_api_keys["COHERE_API_KEY"],
|
446 |
+
"GLHF API": glhf_api_key == env_api_keys["GLHF_API_KEY"]
|
447 |
+
}.get(model_selection, False)
|
448 |
+
|
449 |
summary, download_file = send_to_model_impl(
|
450 |
prompt=prompt.strip(),
|
451 |
model_selection=model_selection,
|
|
|
456 |
groq_api_key=groq_api_key,
|
457 |
openai_api_key=openai_api_key,
|
458 |
openai_model_choice=openai_model_choice,
|
459 |
+
cohere_api_key=cohere_api_key or env_api_keys["COHERE_API_KEY"],
|
460 |
cohere_model=cohere_model,
|
461 |
glhf_api_key=glhf_api_key,
|
462 |
glhf_model=glhf_model,
|
463 |
+
glhf_custom_model=glhf_custom_model,
|
464 |
+
use_rate_limits=use_rate_limits
|
465 |
)
|
|
|
466 |
|
467 |
if summary is None or not isinstance(summary, str):
|
468 |
return "Error: No response from model", None
|
|
|
482 |
error_msg = "Unknown error occurred"
|
483 |
logging.error(f"Error in send_to_model: {error_msg}")
|
484 |
return f"Error: {error_msg}", None
|
485 |
+
finally:
|
486 |
+
logging.info("send to model completed.")
|
487 |
|
488 |
def send_to_model_impl(prompt, model_selection, hf_model_choice, hf_custom_model, hf_api_key,
|
489 |
groq_model_choice, groq_api_key, openai_api_key, openai_model_choice,
|
490 |
+
cohere_api_key=None, cohere_model=None, glhf_api_key=None, glhf_model=None,
|
491 |
+
glhf_custom_model=None, use_rate_limits=False):
|
492 |
"""Implementation of model sending with all providers."""
|
493 |
logging.info("send to model impl commencing...")
|
494 |
|
|
|
496 |
if model_selection == "Clipboard only":
|
497 |
return "Text copied to clipboard. Use paste for processing.", None
|
498 |
|
499 |
+
# Get the summary based on model selection
|
500 |
if model_selection == "HuggingFace Inference":
|
501 |
model_id = hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice]
|
502 |
+
# Always try without API key first
|
503 |
summary = send_to_hf_inference(prompt, model_id)
|
504 |
+
if summary.startswith("Error: This model requires authentication") and hf_api_key:
|
505 |
+
# Only try with API key if the model specifically requires it
|
506 |
+
summary = send_to_hf_inference(prompt, model_id, hf_api_key, use_rate_limits)
|
507 |
|
508 |
elif model_selection == "Groq API":
|
509 |
+
if not groq_api_key:
|
510 |
+
return "Error: Groq API key required", None
|
511 |
+
summary = send_to_groq(prompt, groq_model_choice, groq_api_key, use_rate_limits)
|
512 |
|
513 |
elif model_selection == "OpenAI ChatGPT":
|
514 |
+
if not openai_api_key:
|
515 |
+
return "Error: OpenAI API key required", None
|
516 |
+
summary = send_to_openai(prompt, openai_api_key, model=openai_model_choice,
|
517 |
+
use_rate_limit=use_rate_limits)
|
518 |
|
519 |
elif model_selection == "Cohere API":
|
520 |
+
summary = send_to_cohere(prompt, cohere_api_key, cohere_model, use_rate_limits)
|
521 |
|
522 |
elif model_selection == "GLHF API":
|
523 |
if not glhf_api_key:
|
524 |
return "Error: GLHF API key required", None
|
525 |
+
summary = send_to_glhf(
|
526 |
+
prompt,
|
527 |
+
glhf_model == "Use HuggingFace Model",
|
528 |
+
hf_custom_model if hf_model_choice == "Custom Model" else model_registry.hf_models[hf_model_choice],
|
529 |
+
glhf_custom_model,
|
530 |
+
glhf_api_key,
|
531 |
+
use_rate_limits
|
532 |
+
)
|
533 |
|
534 |
else:
|
535 |
return "Error: Invalid model selection", None
|
536 |
|
537 |
# Validate response
|
538 |
+
if not summary:
|
539 |
+
return "Error: No response from model", None
|
540 |
+
|
541 |
+
if not isinstance(summary, str):
|
542 |
+
return "Error: Invalid response type from model", None
|
543 |
|
544 |
# Create download file for valid responses
|
545 |
if not summary.startswith("Error"):
|
|
|
556 |
logging.error(f"Error in send_to_model_impl: {error_msg}")
|
557 |
return f"Error: {error_msg}", None
|
558 |
|
559 |
+
def send_to_hf_inference(prompt: str, model_name: str, api_key: str = None, use_rate_limit: bool = False) -> str:
|
560 |
+
"""Send prompt to HuggingFace Inference API."""
|
561 |
+
def _send():
|
562 |
# Check token limits first
|
563 |
is_within_limits, error_msg = check_token_limits(prompt, model_name)
|
564 |
if not is_within_limits:
|
565 |
return error_msg
|
566 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
567 |
try:
|
568 |
+
client = InferenceClient(token=api_key) if api_key else InferenceClient()
|
569 |
response = client.text_generation(
|
570 |
prompt,
|
571 |
model=model_name,
|
|
|
575 |
repetition_penalty=1.1
|
576 |
)
|
577 |
return str(response)
|
578 |
+
except Exception as e:
|
579 |
+
logging.error(f"HuggingFace inference error: {e}")
|
580 |
+
return f"Error with HuggingFace inference: {str(e)}"
|
581 |
+
|
582 |
+
return apply_rate_limit(_send, 16) if use_rate_limit else _send()
|
583 |
+
|
584 |
+
def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
|
585 |
+
api_key: str, use_rate_limit: bool = False) -> str:
|
586 |
+
"""Send prompt to GLHF API with model selection and proper stream handling."""
|
587 |
+
def _send():
|
588 |
+
try:
|
589 |
+
import openai
|
590 |
+
client = openai.OpenAI(
|
591 |
+
api_key=api_key,
|
592 |
+
base_url="https://glhf.chat/api/openai/v1",
|
593 |
+
)
|
594 |
+
|
595 |
+
model_id = f"hf:{model_name if use_hf_model else custom_model}"
|
596 |
|
597 |
+
try:
|
598 |
+
# First try without streaming
|
599 |
+
completion = client.chat.completions.create(
|
600 |
+
stream=False,
|
601 |
+
model=model_id,
|
602 |
+
messages=[
|
603 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
604 |
+
{"role": "user", "content": prompt}
|
605 |
+
],
|
|
|
606 |
)
|
607 |
+
return completion.choices[0].message.content
|
608 |
+
except Exception as non_stream_error:
|
609 |
+
logging.warning(f"Non-streaming GLHF failed, trying streaming: {non_stream_error}")
|
|
|
610 |
|
611 |
+
# Fallback to streaming if needed
|
612 |
+
completion = client.chat.completions.create(
|
613 |
+
stream=True,
|
614 |
+
model=model_id,
|
615 |
+
messages=[
|
616 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
617 |
+
{"role": "user", "content": prompt}
|
618 |
+
],
|
619 |
+
)
|
620 |
|
621 |
+
response_text = []
|
622 |
+
try:
|
623 |
+
for chunk in completion:
|
624 |
+
if chunk.choices and chunk.choices[0].delta.content is not None:
|
625 |
+
response_text.append(chunk.choices[0].delta.content)
|
626 |
+
except Exception as stream_error:
|
627 |
+
if response_text: # If we got partial response, return it
|
628 |
+
logging.warning(f"Streaming interrupted but got partial response: {stream_error}")
|
629 |
+
return "".join(response_text)
|
630 |
+
raise # Re-raise if we got nothing
|
631 |
+
|
632 |
+
return "".join(response_text)
|
633 |
+
|
634 |
+
except Exception as e:
|
635 |
+
logging.error(f"GLHF API error: {e}")
|
636 |
+
return f"Error with GLHF API: {str(e)}"
|
637 |
+
|
638 |
+
return apply_rate_limit(_send, 384) if use_rate_limit else _send()
|
639 |
|
640 |
+
def send_to_openai(prompt: str, api_key: str, model: str = "gpt-3.5-turbo", use_rate_limit: bool = False) -> str:
|
641 |
"""Send prompt to OpenAI API."""
|
642 |
+
def _send():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
643 |
try:
|
644 |
+
from openai import OpenAI
|
645 |
+
client = OpenAI(api_key=api_key)
|
646 |
+
response = client.chat.completions.create(
|
647 |
+
model=model,
|
648 |
+
messages=[
|
649 |
+
{"role": "system", "content": "You are a helpful assistant that provides detailed responses."},
|
650 |
+
{"role": "user", "content": prompt}
|
651 |
+
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
652 |
temperature=0.7,
|
653 |
max_tokens=500,
|
654 |
+
top_p=0.95
|
655 |
)
|
|
|
656 |
|
657 |
+
if response.choices and len(response.choices) > 0:
|
658 |
+
return response.choices[0].message.content
|
659 |
+
return "Error: No response generated"
|
660 |
+
|
661 |
+
except ImportError:
|
662 |
+
return "Error: Please install the latest version of openai package"
|
663 |
+
except Exception as e:
|
664 |
+
logging.error(f"OpenAI API error: {e}")
|
665 |
+
return f"Error with OpenAI API: {str(e)}"
|
666 |
+
|
667 |
+
return apply_rate_limit(_send, 3000/60) if use_rate_limit else _send()
|
668 |
|
669 |
+
def send_to_cohere(prompt: str, api_key: str = None, model: str = None, use_rate_limit: bool = False) -> str:
|
670 |
+
"""Send prompt to Cohere API with V2 and V1 fallback."""
|
671 |
+
def _send():
|
672 |
+
try:
|
673 |
+
import cohere
|
674 |
+
# Try V2 first
|
675 |
+
try:
|
676 |
+
client = cohere.ClientV2(api_key) if api_key else cohere.ClientV2()
|
677 |
+
response = client.chat(
|
678 |
+
model=model or "command-r-plus-08-2024",
|
679 |
+
messages=[{
|
680 |
+
"role": "user",
|
681 |
+
"content": prompt
|
682 |
+
}],
|
683 |
+
temperature=0.7,
|
684 |
+
)
|
685 |
+
return response.message.content[0].text
|
686 |
+
except Exception as v2_error:
|
687 |
+
logging.warning(f"Cohere V2 failed, trying V1: {v2_error}")
|
688 |
+
|
689 |
+
# Fallback to V1
|
690 |
+
client = cohere.Client(api_key) if api_key else cohere.Client()
|
691 |
+
response = client.chat(
|
692 |
+
message=prompt,
|
693 |
+
model=model or "command-r-plus-08-2024",
|
694 |
+
temperature=0.7,
|
695 |
+
max_tokens=500,
|
696 |
+
)
|
697 |
+
return response.text
|
698 |
+
|
699 |
+
except Exception as e:
|
700 |
+
logging.error(f"Cohere API error: {e}")
|
701 |
+
return f"Error with Cohere API: {str(e)}"
|
702 |
+
|
703 |
+
return apply_rate_limit(_send, 16) if use_rate_limit else _send()
|
704 |
+
|
705 |
+
def send_to_glhf(prompt: str, use_hf_model: bool, model_name: str, custom_model: str,
|
706 |
+
api_key: str, use_rate_limit: bool = False) -> str:
|
707 |
"""Send prompt to GLHF API with model selection."""
|
708 |
+
def _send():
|
709 |
+
try:
|
710 |
+
import openai
|
711 |
+
client = openai.OpenAI(
|
712 |
+
api_key=api_key,
|
713 |
+
base_url="https://glhf.chat/api/openai/v1",
|
714 |
+
)
|
715 |
|
716 |
+
model_id = f"hf:{model_name if use_hf_model else custom_model}"
|
717 |
+
|
718 |
+
# For GLHF, always use streaming for reliability
|
719 |
+
completion = client.chat.completions.create(
|
720 |
+
stream=True,
|
721 |
+
model=model_id,
|
722 |
+
messages=[
|
723 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
724 |
+
{"role": "user", "content": prompt}
|
725 |
+
],
|
726 |
+
)
|
|
|
727 |
|
728 |
+
response_text = []
|
729 |
+
for chunk in completion:
|
730 |
+
if chunk.choices[0].delta.content is not None:
|
731 |
+
response_text.append(chunk.choices[0].delta.content)
|
732 |
+
|
733 |
+
return "".join(response_text)
|
734 |
|
735 |
+
except Exception as e:
|
736 |
+
logging.error(f"GLHF API error: {e}")
|
737 |
+
return f"Error with GLHF API: {str(e)}"
|
738 |
+
|
739 |
+
return apply_rate_limit(_send, 384) if use_rate_limit else _send()
|
740 |
|
741 |
def estimate_tokens(text: str) -> int:
|
742 |
"""Rough token estimation: ~4 characters per token on average"""
|
|
|
1115 |
first_model = list(ctx_size.keys())[0]
|
1116 |
ctx_size = ctx_size[first_model]
|
1117 |
|
|
|
1118 |
if choice == "OpenAI ChatGPT":
|
1119 |
model_choices = list(MODEL_CONTEXT_SIZES["OpenAI ChatGPT"].keys())
|
1120 |
return [
|
1121 |
gr.update(visible=False), # hf_options
|
1122 |
gr.update(visible=False), # groq_options
|
1123 |
gr.update(visible=True), # openai_options
|
1124 |
+
gr.update(visible=False), # cohere_options
|
1125 |
+
gr.update(visible=False), # glhf_options
|
1126 |
gr.update(value=ctx_size), # context_size
|
1127 |
gr.Dropdown(choices=model_choices, value=first_model) # openai_model
|
1128 |
]
|
|
|
1132 |
gr.update(visible=True), # hf_options
|
1133 |
gr.update(visible=False), # groq_options
|
1134 |
gr.update(visible=False), # openai_options
|
1135 |
+
gr.update(visible=False), # cohere_options
|
1136 |
+
gr.update(visible=False), # glhf_options
|
1137 |
gr.update(value=ctx_size), # context_size
|
1138 |
+
gr.Dropdown(choices=model_choices, value="Mixtral 7B") # Update default value
|
1139 |
]
|
1140 |
elif choice == "Groq API":
|
1141 |
model_choices = list(model_registry.groq_models.keys())
|
|
|
1143 |
gr.update(visible=False), # hf_options
|
1144 |
gr.update(visible=True), # groq_options
|
1145 |
gr.update(visible=False), # openai_options
|
1146 |
+
gr.update(visible=False), # cohere_options
|
1147 |
+
gr.update(visible=False), # glhf_options
|
1148 |
+
gr.update(value=ctx_size), # context_size
|
1149 |
+
gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)
|
1150 |
+
]
|
1151 |
+
elif choice == "Cohere API":
|
1152 |
+
return [
|
1153 |
+
gr.update(visible=False), # hf_options
|
1154 |
+
gr.update(visible=False), # groq_options
|
1155 |
+
gr.update(visible=False), # openai_options
|
1156 |
+
gr.update(visible=True), # cohere_options
|
1157 |
+
gr.update(visible=False), # glhf_options
|
1158 |
+
gr.update(value=ctx_size), # context_size
|
1159 |
+
gr.Dropdown(choices=[]) # not used
|
1160 |
+
]
|
1161 |
+
elif choice == "GLHF API":
|
1162 |
+
return [
|
1163 |
+
gr.update(visible=False), # hf_options
|
1164 |
+
gr.update(visible=False), # groq_options
|
1165 |
+
gr.update(visible=False), # openai_options
|
1166 |
+
gr.update(visible=False), # cohere_options
|
1167 |
+
gr.update(visible=True), # glhf_options
|
1168 |
gr.update(value=ctx_size), # context_size
|
1169 |
+
gr.Dropdown(choices=[]) # not used
|
1170 |
]
|
1171 |
|
1172 |
# Default return for "Clipboard only" or other options
|
|
|
1174 |
gr.update(visible=False), # hf_options
|
1175 |
gr.update(visible=False), # groq_options
|
1176 |
gr.update(visible=False), # openai_options
|
1177 |
+
gr.update(visible=False), # cohere_options
|
1178 |
+
gr.update(visible=False), # glhf_options
|
1179 |
gr.update(value=4096), # context_size
|
1180 |
+
gr.Dropdown(choices=[]) # not used
|
1181 |
]
|
1182 |
|
1183 |
# PDF Processing Handlers
|
|
|
1289 |
outputs=[progress_status, generated_prompt, download_snippet] # Connect download_snippet
|
1290 |
)
|
1291 |
|
1292 |
+
# Model selection
|
1293 |
# Model selection
|
1294 |
model_choice.change(
|
1295 |
handle_model_selection,
|
|
|
1298 |
hf_options,
|
1299 |
groq_options,
|
1300 |
openai_options,
|
1301 |
+
cohere_options,
|
1302 |
+
glhf_options,
|
1303 |
context_size,
|
1304 |
openai_model
|
1305 |
]
|