alexkueck commited on
Commit
4d596ca
·
1 Parent(s): 3e72064

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -139
app.py CHANGED
@@ -1,161 +1,124 @@
1
  from huggingface_hub import InferenceClient
2
  import os
 
 
 
3
 
4
  # HF Inference Endpoints parameter
5
- endpoint_url = "https://YOUR_ENDPOINT.endpoints.huggingface.cloud"
 
 
 
 
 
 
6
 
7
  hf_token = os.getenv("TOKEN_HF")
8
 
9
  # Streaming Client
10
  client = InferenceClient(endpoint_url, token=hf_token)
11
 
12
- # generation parameter
13
- gen_kwargs = dict(
14
- max_new_tokens=512,
15
- top_k=30,
16
- top_p=0.9,
17
- temperature=0.2,
18
- repetition_penalty=1.02,
19
- stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
20
- )
21
- # prompt
22
- prompt = "What can you do in Nuremberg, Germany? Give me 3 Tips"
23
-
24
- stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
25
-
26
- # yield each generated token
27
- for r in stream:
28
- # skip special tokens
29
- if r.token.special:
30
- continue
31
- # stop if we encounter a stop sequence
32
- if r.token.text in gen_kwargs["stop_sequences"]:
33
- break
34
- # yield the generated token
35
- print(r.token.text, end = "")
36
- # yield r.token.text
37
 
38
 
39
- #######################################################################
40
- #Darstellung mit Gradio
41
-
42
- with open("custom.css", "r", encoding="utf-8") as f:
43
- customCSS = f.read()
44
-
45
- with gr.Blocks(theme=small_and_beautiful_theme) as demo:
46
- history = gr.State([])
47
- user_question = gr.State("")
48
- gr.Markdown("KIs am LI - wähle aus, was du bzgl. KI-Bots ausprobieren möchtest!")
49
- with gr.Tabs():
50
- with gr.TabItem("LI-Chat"):
51
- with gr.Row():
52
- gr.HTML(title)
53
- status_display = gr.Markdown("Erfolg", elem_id="status_display")
54
- gr.Markdown(description_top)
55
- with gr.Row(scale=1).style(equal_height=True):
56
- with gr.Column(scale=5):
57
- with gr.Row(scale=1):
58
- chatbotGr = gr.Chatbot(elem_id="LI_chatbot").style(height="100%")
59
- with gr.Row(scale=1):
60
- with gr.Column(scale=12):
61
- user_input = gr.Textbox(
62
- show_label=False, placeholder="Gib deinen Text / Frage ein."
63
- ).style(container=False)
64
- with gr.Column(min_width=100, scale=1):
65
- submitBtn = gr.Button("Absenden")
66
- with gr.Column(min_width=100, scale=1):
67
- cancelBtn = gr.Button("Stoppen")
68
- with gr.Row(scale=1):
69
- emptyBtn = gr.Button(
70
- "🧹 Neuer Chat",
71
- )
72
- with gr.Column():
73
- with gr.Column(min_width=50, scale=1):
74
- with gr.Tab(label="Nur zum Testen:"):
75
- gr.Markdown("# Parameter")
76
- top_p = gr.Slider(
77
- minimum=-0,
78
- maximum=1.0,
79
- value=0.95,
80
- step=0.05,
81
- interactive=True,
82
- label="Top-p",
83
- )
84
- temperature = gr.Slider(
85
- minimum=0.1,
86
- maximum=2.0,
87
- value=1,
88
- step=0.1,
89
- interactive=True,
90
- label="Temperature",
91
- )
92
- max_length_tokens = gr.Slider(
93
- minimum=0,
94
- maximum=512,
95
- value=512,
96
- step=8,
97
- interactive=True,
98
- label="Max Generation Tokens",
99
- )
100
- max_context_length_tokens = gr.Slider(
101
- minimum=0,
102
- maximum=4096,
103
- value=2048,
104
- step=128,
105
- interactive=True,
106
- label="Max History Tokens",
107
- )
108
- gr.Markdown(description)
109
-
110
- with gr.TabItem("Übersetzungen"):
111
- with gr.Row():
112
- gr.Textbox(
113
- show_label=False, placeholder="Ist noch in Arbeit..."
114
- ).style(container=False)
115
- with gr.TabItem("Code-Generierungen"):
116
- with gr.Row():
117
- gr.Textbox(
118
- show_label=False, placeholder="Ist noch in Arbeit..."
119
- ).style(container=False)
120
-
121
- predict_args = dict(
122
- fn=predict,
123
- inputs=[
124
- user_question,
125
  chatbotGr,
126
  history,
127
  top_p,
128
  temperature,
129
  max_length_tokens,
130
- max_context_length_tokens,
131
- ],
132
- outputs=[chatbotGr, history, status_display],
133
- show_progress=True,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  )
 
 
 
 
 
 
 
 
 
 
 
135
 
136
- #neuer Chat
137
- reset_args = dict(
138
- #fn=reset_chat, inputs=[], outputs=[user_input, status_display]
139
- fn=reset_textbox, inputs=[], outputs=[user_input, status_display]
140
- )
141
-
142
- # Chatbot
143
- transfer_input_args = dict(
144
- fn=transfer_input, inputs=[user_input], outputs=[user_question, user_input, submitBtn], show_progress=True
145
- )
146
 
147
- #Listener auf Start-Click auf Button oder Return
148
- predict_event1 = user_input.submit(**transfer_input_args).then(**predict_args)
149
- predict_event2 = submitBtn.click(**transfer_input_args).then(**predict_args)
150
 
151
- #Listener, Wenn reset...
152
- emptyBtn.click(
153
- reset_state,
154
- outputs=[chatbotGr, history, status_display],
155
- show_progress=True,
156
- )
157
- emptyBtn.click(**reset_args)
158
 
159
- demo.title = "LI Chat"
160
- #demo.queue(concurrency_count=1).launch(share=True)
161
- demo.queue(concurrency_count=1).launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from huggingface_hub import InferenceClient
2
  import os
3
+ import gradio as gr
4
+ import random
5
+ import time
6
 
7
  # HF Inference Endpoints parameter
8
+ endpoint_url = "https://qrh4fv8e7x3fw9w3.us-east-1.aws.endpoints.huggingface.cloud" #\
9
+ '''
10
+ -X POST \
11
+ -d '{"inputs":"My name is Teven and I am"}' \
12
+ -H "Authorization: Bearer <hf_token>" \
13
+ -H "Content-Type: application/json""
14
+ '''
15
 
16
  hf_token = os.getenv("TOKEN_HF")
17
 
18
  # Streaming Client
19
  client = InferenceClient(endpoint_url, token=hf_token)
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
 
23
+ ########################################################################
24
+ #Chat KI nutzen, um Text zu generieren...
25
+ def predict(text,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  chatbotGr,
27
  history,
28
  top_p,
29
  temperature,
30
  max_length_tokens,
31
+ max_context_length_tokens,):
32
+ if text=="":
33
+ yield chatbotGr,history,"Empty context."
34
+ return
35
+ try:
36
+ client
37
+ except:
38
+ yield [[text,"No Model Found"]],[],"No Endpoint Found"
39
+ return
40
+
41
+ # generation parameter
42
+ gen_kwargs = dict(
43
+ max_new_tokens=max_length_tokens,
44
+ top_k=30,
45
+ top_p=top_p,
46
+ temperature=temperature,
47
+ repetition_penalty=1.02,
48
+ stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
49
  )
50
+ # prompt
51
+ prompt = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)
52
+
53
+
54
+
55
+
56
+
57
+
58
+
59
+
60
+
61
 
 
 
 
 
 
 
 
 
 
 
62
 
 
 
 
63
 
 
 
 
 
 
 
 
64
 
65
+
66
+
67
+
68
+
69
+
70
+ #######################################################################
71
+ #Darstellung mit Gradio
72
+
73
+ with open("custom.css", "r", encoding="utf-8") as f:
74
+ customCSS = f.read()
75
+
76
+ with gr.Blocks() as demo:
77
+ chatbot = gr.Chatbot()
78
+ msg = gr.Textbox()
79
+ clear = gr.Button("Clear")
80
+
81
+ def user(user_message, history):
82
+ return "", history + [[user_message, None]]
83
+
84
+ def bot(history):
85
+ # generation parameter
86
+ gen_kwargs = dict(
87
+ max_new_tokens=512,
88
+ top_k=30,
89
+ top_p=0.9,
90
+ temperature=0.2,
91
+ repetition_penalty=1.02,
92
+ stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
93
+ )
94
+ prompt = generate_prompt_with_history(text,history,max_length=max_context_length_tokens)
95
+ stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
96
+
97
+ history[-1][1] = ""
98
+ for character in stream:
99
+ history[-1][1] += character
100
+ time.sleep(0.05)
101
+ yield history
102
+
103
+ '''
104
+ # yield each generated token
105
+ for r in stream:
106
+ # skip special tokens
107
+ if r.token.special:
108
+ continue
109
+ # stop if we encounter a stop sequence
110
+ if r.token.text in gen_kwargs["stop_sequences"]:
111
+ break
112
+ # yield the generated token
113
+ #print(r.token.text, end = "")
114
+ yield r.token.text
115
+ '''
116
+
117
+
118
+ msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
119
+ bot, chatbot, chatbot
120
+ )
121
+ clear.click(lambda: None, None, chatbot, queue=False)
122
+
123
+ demo.queue()
124
+ demo.launch()