Spaces:
Paused
Paused
from huggingface_hub import InferenceClient | |
import os | |
import gradio as gr | |
import random | |
import time | |
# HF Inference Endpoints parameter | |
endpoint_url = "https://qrh4fv8e7x3fw9w3.us-east-1.aws.endpoints.huggingface.cloud" #\ | |
''' | |
-X POST \ | |
-d '{"inputs":"My name is Teven and I am"}' \ | |
-H "Authorization: Bearer <hf_token>" \ | |
-H "Content-Type: application/json"" | |
''' | |
hf_token = os.getenv("TOKEN_HF") | |
# Streaming Client | |
client = InferenceClient(endpoint_url, token=hf_token) | |
######################################################################## | |
#Chat KI nutzen, um Text zu generieren... | |
def predict(text, | |
chatbotGr, | |
history, | |
top_p, | |
temperature, | |
max_length_tokens, | |
max_context_length_tokens,): | |
if text=="": | |
yield chatbotGr,history,"Empty context." | |
return | |
try: | |
client | |
except: | |
yield [[text,"No Model Found"]],[],"No Endpoint Found" | |
return | |
# generation parameter | |
gen_kwargs = dict( | |
max_new_tokens=max_length_tokens, | |
top_k=30, | |
top_p=top_p, | |
temperature=temperature, | |
repetition_penalty=1.02, | |
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"], | |
) | |
# prompt | |
prompt = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens) | |
####################################################################### | |
#Darstellung mit Gradio | |
with open("custom.css", "r", encoding="utf-8") as f: | |
customCSS = f.read() | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox() | |
clear = gr.Button("Clear") | |
def user(user_message, history): | |
return "", history + [[user_message, None]] | |
def bot(history): | |
# generation parameter | |
gen_kwargs = dict( | |
max_new_tokens=512, | |
top_k=30, | |
top_p=0.9, | |
temperature=0.2, | |
repetition_penalty=1.02, | |
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"], | |
) | |
prompt = generate_prompt_with_history(text,history,max_length=max_context_length_tokens) | |
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs) | |
history[-1][1] = "" | |
for character in stream: | |
history[-1][1] += character | |
time.sleep(0.05) | |
yield history | |
''' | |
# yield each generated token | |
for r in stream: | |
# skip special tokens | |
if r.token.special: | |
continue | |
# stop if we encounter a stop sequence | |
if r.token.text in gen_kwargs["stop_sequences"]: | |
break | |
# yield the generated token | |
#print(r.token.text, end = "") | |
yield r.token.text | |
''' | |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( | |
bot, chatbot, chatbot | |
) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
demo.queue() | |
demo.launch() |