Spaces:
Sleeping
Sleeping
modified example, added rate limit warning
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ from datetime import datetime
|
|
8 |
import json
|
9 |
|
10 |
retrieve_results = 10
|
|
|
11 |
|
12 |
generate_kwargs = dict(
|
13 |
temperature = None,
|
@@ -38,10 +39,14 @@ try:
|
|
38 |
except:
|
39 |
pass
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
45 |
|
46 |
|
47 |
def rag_cleaner(inp):
|
@@ -106,7 +111,11 @@ with gr.Blocks(theme = gr.themes.Soft()) as demo:
|
|
106 |
|
107 |
client = InferenceClient(llm_model_picked)
|
108 |
#output = client.text_generation(prompt, **generate_kwargs, stream=False, details=False, return_full_text=False)
|
109 |
-
|
|
|
|
|
|
|
|
|
110 |
#output = output.lstrip(' \n') if output.lstrip().startswith('\n') else output
|
111 |
|
112 |
|
|
|
8 |
import json
|
9 |
|
10 |
retrieve_results = 10
|
11 |
+
show_examples = False
|
12 |
|
13 |
generate_kwargs = dict(
|
14 |
temperature = None,
|
|
|
39 |
except:
|
40 |
pass
|
41 |
|
42 |
+
if show_examples:
|
43 |
+
with open("sample_outputs.json", "r") as f:
|
44 |
+
sample_outputs = json.load(f)
|
45 |
+
output_placeholder = sample_outputs['output_placeholder']
|
46 |
+
md_text_initial = sample_outputs['search_placeholder']
|
47 |
+
else:
|
48 |
+
output_placeholder = None
|
49 |
+
md_text_initial = ''
|
50 |
|
51 |
|
52 |
def rag_cleaner(inp):
|
|
|
111 |
|
112 |
client = InferenceClient(llm_model_picked)
|
113 |
#output = client.text_generation(prompt, **generate_kwargs, stream=False, details=False, return_full_text=False)
|
114 |
+
try:
|
115 |
+
stream = client.text_generation(prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
116 |
+
except:
|
117 |
+
gr.Warning("LLM Inference rate limit reached, try again later!")
|
118 |
+
return ""
|
119 |
#output = output.lstrip(' \n') if output.lstrip().startswith('\n') else output
|
120 |
|
121 |
|