alexkueck's picture
Update app.py
4d596ca
raw
history blame
3.11 kB
from huggingface_hub import InferenceClient
import os
import gradio as gr
import random
import time
# HF Inference Endpoints parameter
endpoint_url = "https://qrh4fv8e7x3fw9w3.us-east-1.aws.endpoints.huggingface.cloud" #\
'''
-X POST \
-d '{"inputs":"My name is Teven and I am"}' \
-H "Authorization: Bearer <hf_token>" \
-H "Content-Type: application/json""
'''
hf_token = os.getenv("TOKEN_HF")
# Streaming Client
client = InferenceClient(endpoint_url, token=hf_token)
########################################################################
#Chat KI nutzen, um Text zu generieren...
def predict(text,
chatbotGr,
history,
top_p,
temperature,
max_length_tokens,
max_context_length_tokens,):
if text=="":
yield chatbotGr,history,"Empty context."
return
try:
client
except:
yield [[text,"No Model Found"]],[],"No Endpoint Found"
return
# generation parameter
gen_kwargs = dict(
max_new_tokens=max_length_tokens,
top_k=30,
top_p=top_p,
temperature=temperature,
repetition_penalty=1.02,
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
)
# prompt
prompt = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)
#######################################################################
#Darstellung mit Gradio
with open("custom.css", "r", encoding="utf-8") as f:
customCSS = f.read()
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.Button("Clear")
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(history):
# generation parameter
gen_kwargs = dict(
max_new_tokens=512,
top_k=30,
top_p=0.9,
temperature=0.2,
repetition_penalty=1.02,
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
)
prompt = generate_prompt_with_history(text,history,max_length=max_context_length_tokens)
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
history[-1][1] = ""
for character in stream:
history[-1][1] += character
time.sleep(0.05)
yield history
'''
# yield each generated token
for r in stream:
# skip special tokens
if r.token.special:
continue
# stop if we encounter a stop sequence
if r.token.text in gen_kwargs["stop_sequences"]:
break
# yield the generated token
#print(r.token.text, end = "")
yield r.token.text
'''
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue()
demo.launch()