Spaces:
Paused
Paused
File size: 3,108 Bytes
28d01d6 ddf0e4f 4d596ca 28d01d6 4d596ca ddf0e4f 28d01d6 a4c44c9 4d596ca a4c44c9 4d596ca a4c44c9 4d596ca a4c44c9 4d596ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
from huggingface_hub import InferenceClient
import os
import gradio as gr
import random
import time
# HF Inference Endpoints parameter
endpoint_url = "https://qrh4fv8e7x3fw9w3.us-east-1.aws.endpoints.huggingface.cloud" #\
'''
-X POST \
-d '{"inputs":"My name is Teven and I am"}' \
-H "Authorization: Bearer <hf_token>" \
-H "Content-Type: application/json""
'''
hf_token = os.getenv("TOKEN_HF")
# Streaming Client
client = InferenceClient(endpoint_url, token=hf_token)
########################################################################
#Chat KI nutzen, um Text zu generieren...
def predict(text,
chatbotGr,
history,
top_p,
temperature,
max_length_tokens,
max_context_length_tokens,):
if text=="":
yield chatbotGr,history,"Empty context."
return
try:
client
except:
yield [[text,"No Model Found"]],[],"No Endpoint Found"
return
# generation parameter
gen_kwargs = dict(
max_new_tokens=max_length_tokens,
top_k=30,
top_p=top_p,
temperature=temperature,
repetition_penalty=1.02,
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
)
# prompt
prompt = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)
#######################################################################
#Darstellung mit Gradio
with open("custom.css", "r", encoding="utf-8") as f:
customCSS = f.read()
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.Button("Clear")
def user(user_message, history):
return "", history + [[user_message, None]]
def bot(history):
# generation parameter
gen_kwargs = dict(
max_new_tokens=512,
top_k=30,
top_p=0.9,
temperature=0.2,
repetition_penalty=1.02,
stop_sequences=["\nUser:", "<|endoftext|>", "</s>"],
)
prompt = generate_prompt_with_history(text,history,max_length=max_context_length_tokens)
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
history[-1][1] = ""
for character in stream:
history[-1][1] += character
time.sleep(0.05)
yield history
'''
# yield each generated token
for r in stream:
# skip special tokens
if r.token.special:
continue
# stop if we encounter a stop sequence
if r.token.text in gen_kwargs["stop_sequences"]:
break
# yield the generated token
#print(r.token.text, end = "")
yield r.token.text
'''
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
bot, chatbot, chatbot
)
clear.click(lambda: None, None, chatbot, queue=False)
demo.queue()
demo.launch() |