from huggingface_hub import InferenceClient import gradio as gr client = InferenceClient() def respond( prompt: str, history, ): if not history: history = [{"role": "system", "content": "You are a friendly chatbot"}] history.append({"role": "user", "content": prompt}) yield history response = {"role": "assistant", "content": ""} for message in client.chat_completion( # type: ignore history, temperature=0.95, top_p=0.9, max_tokens=512, stream=True, model="HuggingFaceH4/zephyr-7b-beta" ): response["content"] += message.choices[0].delta.content or "" yield history + [response] def handle_undo(history, undo_data: gr.UndoData): return history[:undo_data.index], history[undo_data.index]['content'] def handle_retry(history, retry_data: gr.RetryData): new_history = history[:retry_data.index] previous_prompt = history[retry_data.index]['content'] yield from respond(previous_prompt, new_history) def handle_like(data: gr.LikeData): if data.liked: print("You upvoted this response: ", data.value) else: print("You downvoted this response: ", data.value) with gr.Blocks() as demo: gr.Markdown("# Chat with Hugging Face Zephyr 7b 🤗") chatbot = gr.Chatbot( label="Agent", type="messages", avatar_images=( None, "https://em-content.zobj.net/source/twitter/376/hugging-face_1f917.png", ), ) prompt = gr.Textbox(max_lines=1, label="Chat Message") prompt.submit(respond, [prompt, chatbot], [chatbot]) prompt.submit(lambda: "", None, [prompt]) chatbot.undo(handle_undo, chatbot, [chatbot, prompt]) chatbot.retry(handle_retry, chatbot, [chatbot]) chatbot.like(handle_like, None, None) if __name__ == "__main__": demo.launch()