Spaces:

alexkueck
/

test-endpoint-li

Paused

App Files Files Community

test-endpoint-li / app.py

alexkueck

Update app.py

4d596ca over 1 year ago

raw

history blame

3.11 kB

	from huggingface_hub import InferenceClient
	import os
	import gradio as gr
	import random
	import time

	# HF Inference Endpoints parameter
	endpoint_url = "https://qrh4fv8e7x3fw9w3.us-east-1.aws.endpoints.huggingface.cloud" #\
	'''
	-X POST \
	-d '{"inputs":"My name is Teven and I am"}' \
	-H "Authorization: Bearer <hf_token>" \
	-H "Content-Type: application/json""
	'''

	hf_token = os.getenv("TOKEN_HF")

	# Streaming Client
	client = InferenceClient(endpoint_url, token=hf_token)



	########################################################################
	#Chat KI nutzen, um Text zu generieren...
	def predict(text,
	chatbotGr,
	history,
	top_p,
	temperature,
	max_length_tokens,
	max_context_length_tokens,):
	if text=="":
	yield chatbotGr,history,"Empty context."
	return
	try:
	client
	except:
	yield [[text,"No Model Found"]],[],"No Endpoint Found"
	return

	# generation parameter
	gen_kwargs = dict(
	max_new_tokens=max_length_tokens,
	top_k=30,
	top_p=top_p,
	temperature=temperature,
	repetition_penalty=1.02,
	stop_sequences=["\nUser:", "<\|endoftext\|>", "</s>"],
	)
	# prompt
	prompt = generate_prompt_with_history(text,history,tokenizer,max_length=max_context_length_tokens)


















	#######################################################################
	#Darstellung mit Gradio

	with open("custom.css", "r", encoding="utf-8") as f:
	customCSS = f.read()

	with gr.Blocks() as demo:
	chatbot = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.Button("Clear")

	def user(user_message, history):
	return "", history + [[user_message, None]]

	def bot(history):
	# generation parameter
	gen_kwargs = dict(
	max_new_tokens=512,
	top_k=30,
	top_p=0.9,
	temperature=0.2,
	repetition_penalty=1.02,
	stop_sequences=["\nUser:", "<\|endoftext\|>", "</s>"],
	)
	prompt = generate_prompt_with_history(text,history,max_length=max_context_length_tokens)
	stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)

	history[-1][1] = ""
	for character in stream:
	history[-1][1] += character
	time.sleep(0.05)
	yield history

	'''
	# yield each generated token
	for r in stream:
	# skip special tokens
	if r.token.special:
	continue
	# stop if we encounter a stop sequence
	if r.token.text in gen_kwargs["stop_sequences"]:
	break
	# yield the generated token
	#print(r.token.text, end = "")
	yield r.token.text
	'''


	msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
	bot, chatbot, chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)

	demo.queue()
	demo.launch()