Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -1,161 +1,124 @@
|
|
1 |
from huggingface_hub import InferenceClient
|
2 |
import os
|
|
|
|
|
|
|
3 |
|
4 |
# HF Inference Endpoints parameter
|
5 |
-
endpoint_url = "https://
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
hf_token = os.getenv("TOKEN_HF")
|
8 |
|
9 |
# Streaming Client
|
10 |
client = InferenceClient(endpoint_url, token=hf_token)
|
11 |
|
12 |
-
# generation parameter
|
13 |
-
gen_kwargs = dict(
|
14 |
-
max_new_tokens=512,
|
15 |
-
top_k=30,
|
16 |
-
top_p=0.9,
|
17 |
-
temperature=0.2,
|
18 |
-
repetition_penalty=1.02,
|
19 |
-
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
|
20 |
-
)
|
21 |
-
# prompt
|
22 |
-
prompt = "What can you do in Nuremberg, Germany? Give me 3 Tips"
|
23 |
-
|
24 |
-
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
|
25 |
-
|
26 |
-
# yield each generated token
|
27 |
-
for r in stream:
|
28 |
-
# skip special tokens
|
29 |
-
if r.token.special:
|
30 |
-
continue
|
31 |
-
# stop if we encounter a stop sequence
|
32 |
-
if r.token.text in gen_kwargs["stop_sequences"]:
|
33 |
-
break
|
34 |
-
# yield the generated token
|
35 |
-
print(r.token.text, end = "")
|
36 |
-
# yield r.token.text
|
37 |
|
38 |
|
39 |
-
|
40 |
-
#
|
41 |
-
|
42 |
-
with open("custom.css", "r", encoding="utf-8") as f:
|
43 |
-
customCSS = f.read()
|
44 |
-
|
45 |
-
with gr.Blocks(theme=small_and_beautiful_theme) as demo:
|
46 |
-
history = gr.State([])
|
47 |
-
user_question = gr.State("")
|
48 |
-
gr.Markdown("KIs am LI - wähle aus, was du bzgl. KI-Bots ausprobieren möchtest!")
|
49 |
-
with gr.Tabs():
|
50 |
-
with gr.TabItem("LI-Chat"):
|
51 |
-
with gr.Row():
|
52 |
-
gr.HTML(title)
|
53 |
-
status_display = gr.Markdown("Erfolg", elem_id="status_display")
|
54 |
-
gr.Markdown(description_top)
|
55 |
-
with gr.Row(scale=1).style(equal_height=True):
|
56 |
-
with gr.Column(scale=5):
|
57 |
-
with gr.Row(scale=1):
|
58 |
-
chatbotGr = gr.Chatbot(elem_id="LI_chatbot").style(height="100%")
|
59 |
-
with gr.Row(scale=1):
|
60 |
-
with gr.Column(scale=12):
|
61 |
-
user_input = gr.Textbox(
|
62 |
-
show_label=False, placeholder="Gib deinen Text / Frage ein."
|
63 |
-
).style(container=False)
|
64 |
-
with gr.Column(min_width=100, scale=1):
|
65 |
-
submitBtn = gr.Button("Absenden")
|
66 |
-
with gr.Column(min_width=100, scale=1):
|
67 |
-
cancelBtn = gr.Button("Stoppen")
|
68 |
-
with gr.Row(scale=1):
|
69 |
-
emptyBtn = gr.Button(
|
70 |
-
"🧹 Neuer Chat",
|
71 |
-
)
|
72 |
-
with gr.Column():
|
73 |
-
with gr.Column(min_width=50, scale=1):
|
74 |
-
with gr.Tab(label="Nur zum Testen:"):
|
75 |
-
gr.Markdown("# Parameter")
|
76 |
-
top_p = gr.Slider(
|
77 |
-
minimum=-0,
|
78 |
-
maximum=1.0,
|
79 |
-
value=0.95,
|
80 |
-
step=0.05,
|
81 |
-
interactive=True,
|
82 |
-
label="Top-p",
|
83 |
-
)
|
84 |
-
temperature = gr.Slider(
|
85 |
-
minimum=0.1,
|
86 |
-
maximum=2.0,
|
87 |
-
value=1,
|
88 |
-
step=0.1,
|
89 |
-
interactive=True,
|
90 |
-
label="Temperature",
|
91 |
-
)
|
92 |
-
max_length_tokens = gr.Slider(
|
93 |
-
minimum=0,
|
94 |
-
maximum=512,
|
95 |
-
value=512,
|
96 |
-
step=8,
|
97 |
-
interactive=True,
|
98 |
-
label="Max Generation Tokens",
|
99 |
-
)
|
100 |
-
max_context_length_tokens = gr.Slider(
|
101 |
-
minimum=0,
|
102 |
-
maximum=4096,
|
103 |
-
value=2048,
|
104 |
-
step=128,
|
105 |
-
interactive=True,
|
106 |
-
label="Max History Tokens",
|
107 |
-
)
|
108 |
-
gr.Markdown(description)
|
109 |
-
|
110 |
-
with gr.TabItem("Übersetzungen"):
|
111 |
-
with gr.Row():
|
112 |
-
gr.Textbox(
|
113 |
-
show_label=False, placeholder="Ist noch in Arbeit..."
|
114 |
-
).style(container=False)
|
115 |
-
with gr.TabItem("Code-Generierungen"):
|
116 |
-
with gr.Row():
|
117 |
-
gr.Textbox(
|
118 |
-
show_label=False, placeholder="Ist noch in Arbeit..."
|
119 |
-
).style(container=False)
|
120 |
-
|
121 |
-
predict_args = dict(
|
122 |
-
fn=predict,
|
123 |
-
inputs=[
|
124 |
-
user_question,
|
125 |
chatbotGr,
|
126 |
history,
|
127 |
top_p,
|
128 |
temperature,
|
129 |
max_length_tokens,
|
130 |
-
max_context_length_tokens,
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
|
136 |
-
#neuer Chat
|
137 |
-
reset_args = dict(
|
138 |
-
#fn=reset_chat, inputs=[], outputs=[user_input, status_display]
|
139 |
-
fn=reset_textbox, inputs=[], outputs=[user_input, status_display]
|
140 |
-
)
|
141 |
-
|
142 |
-
# Chatbot
|
143 |
-
transfer_input_args = dict(
|
144 |
-
fn=transfer_input, inputs=[user_input], outputs=[user_question, user_input, submitBtn], show_progress=True
|
145 |
-
)
|
146 |
|
147 |
-
#Listener auf Start-Click auf Button oder Return
|
148 |
-
predict_event1 = user_input.submit(**transfer_input_args).then(**predict_args)
|
149 |
-
predict_event2 = submitBtn.click(**transfer_input_args).then(**predict_args)
|
150 |
|
151 |
-
#Listener, Wenn reset...
|
152 |
-
emptyBtn.click(
|
153 |
-
reset_state,
|
154 |
-
outputs=[chatbotGr, history, status_display],
|
155 |
-
show_progress=True,
|
156 |
-
)
|
157 |
-
emptyBtn.click(**reset_args)
|
158 |
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from huggingface_hub import InferenceClient
|
2 |
import os
|
3 |
+
import gradio as gr
|
4 |
+
import random
|
5 |
+
import time
|
6 |
|
7 |
# HF Inference Endpoints parameter
|
8 |
+
endpoint_url = "https://qrh4fv8e7x3fw9w3.us-east-1.aws.endpoints.huggingface.cloud" #\
|
9 |
+
'''
|
10 |
+
-X POST \
|
11 |
+
-d '{"inputs":"My name is Teven and I am"}' \
|
12 |
+
-H "Authorization: Bearer <hf_token>" \
|
13 |
+
-H "Content-Type: application/json""
|
14 |
+
'''
|
15 |
|
16 |
hf_token = os.getenv("TOKEN_HF")
|
17 |
|
18 |
# Streaming Client
|
19 |
client = InferenceClient(endpoint_url, token=hf_token)
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
|
23 |
+
########################################################################
|
24 |
+
#Chat KI nutzen, um Text zu generieren...
|
25 |
+
def predict(text,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
chatbotGr,
|
27 |
history,
|
28 |
top_p,
|
29 |
temperature,
|
30 |
max_length_tokens,
|
31 |
+
max_context_length_tokens,):
|
32 |
+
if text=="":
|
33 |
+
yield chatbotGr,history,"Empty context."
|
34 |
+
return
|
35 |
+
try:
|
36 |
+
client
|
37 |
+
except:
|
38 |
+
yield [[text,"No Model Found"]],[],"No Endpoint Found"
|
39 |
+
return
|
40 |
+
|
41 |
+
# generation parameter
|
42 |
+
gen_kwargs = dict(
|
43 |
+
max_new_tokens=max_length_tokens,
|
44 |
+
top_k=30,
|
45 |
+
top_p=top_p,
|
46 |
+
temperature=temperature,
|
47 |
+
repetition_penalty=1.02,
|
48 |
+
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
|
49 |
)
|
50 |
+
# prompt
|
51 |
+
prompt = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
|
|
|
|
|
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
+
|
66 |
+
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
#######################################################################
|
71 |
+
#Darstellung mit Gradio
|
72 |
+
|
73 |
+
with open("custom.css", "r", encoding="utf-8") as f:
|
74 |
+
customCSS = f.read()
|
75 |
+
|
76 |
+
with gr.Blocks() as demo:
|
77 |
+
chatbot = gr.Chatbot()
|
78 |
+
msg = gr.Textbox()
|
79 |
+
clear = gr.Button("Clear")
|
80 |
+
|
81 |
+
def user(user_message, history):
|
82 |
+
return "", history + [[user_message, None]]
|
83 |
+
|
84 |
+
def bot(history):
|
85 |
+
# generation parameter
|
86 |
+
gen_kwargs = dict(
|
87 |
+
max_new_tokens=512,
|
88 |
+
top_k=30,
|
89 |
+
top_p=0.9,
|
90 |
+
temperature=0.2,
|
91 |
+
repetition_penalty=1.02,
|
92 |
+
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
|
93 |
+
)
|
94 |
+
prompt = generate_prompt_with_history(text,history,max_length=max_context_length_tokens)
|
95 |
+
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
|
96 |
+
|
97 |
+
history[-1][1] = ""
|
98 |
+
for character in stream:
|
99 |
+
history[-1][1] += character
|
100 |
+
time.sleep(0.05)
|
101 |
+
yield history
|
102 |
+
|
103 |
+
'''
|
104 |
+
# yield each generated token
|
105 |
+
for r in stream:
|
106 |
+
# skip special tokens
|
107 |
+
if r.token.special:
|
108 |
+
continue
|
109 |
+
# stop if we encounter a stop sequence
|
110 |
+
if r.token.text in gen_kwargs["stop_sequences"]:
|
111 |
+
break
|
112 |
+
# yield the generated token
|
113 |
+
#print(r.token.text, end = "")
|
114 |
+
yield r.token.text
|
115 |
+
'''
|
116 |
+
|
117 |
+
|
118 |
+
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
|
119 |
+
bot, chatbot, chatbot
|
120 |
+
)
|
121 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
122 |
+
|
123 |
+
demo.queue()
|
124 |
+
demo.launch()
|