from huggingface_hub import InferenceClient import os import gradio as gr import random import time # HF Inference Endpoints parameter endpoint_url = "https://qrh4fv8e7x3fw9w3.us-east-1.aws.endpoints.huggingface.cloud" #\ ''' -X POST \ -d '{"inputs":"My name is Teven and I am"}' \ -H "Authorization: Bearer " \ -H "Content-Type: application/json"" ''' hf_token = os.getenv("TOKEN_HF") # Streaming Client client = InferenceClient(endpoint_url, token=hf_token) ######################################################################## #Chat KI nutzen, um Text zu generieren... def predict(text, chatbotGr, history, top_p, temperature, max_length_tokens, max_context_length_tokens,): if text=="": yield chatbotGr,history,"Empty context." return try: client except: yield [[text,"No Model Found"]],[],"No Endpoint Found" return # generation parameter gen_kwargs = dict( max_new_tokens=max_length_tokens, top_k=30, top_p=top_p, temperature=temperature, repetition_penalty=1.02, stop_sequences=["\nUser:", "<|endoftext|>", ""], ) # prompt prompt = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens) ####################################################################### #Darstellung mit Gradio with open("custom.css", "r", encoding="utf-8") as f: customCSS = f.read() with gr.Blocks() as demo: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") def user(user_message, history): return "", history + [[user_message, None]] def bot(history): # generation parameter gen_kwargs = dict( max_new_tokens=512, top_k=30, top_p=0.9, temperature=0.2, repetition_penalty=1.02, stop_sequences=["\nUser:", "<|endoftext|>", ""], ) prompt = generate_prompt_with_history(text,history,max_length=max_context_length_tokens) stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs) history[-1][1] = "" for character in stream: history[-1][1] += character time.sleep(0.05) yield history ''' # yield each generated token for r in stream: # skip special tokens if r.token.special: continue # stop if we encounter a stop sequence if r.token.text in gen_kwargs["stop_sequences"]: break # yield the generated token #print(r.token.text, end = "") yield r.token.text ''' msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) demo.queue() demo.launch()