import gradio as gr from transformers import pipeline import requests import os start_token = "" start_completion = "\nRisposta:" start_instruction_token, end_instruction_token = "[INST]", "[/INST]" system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente: " API_URL = "https://cyk11dj2ce5ybyjq.us-east-1.aws.endpoints.huggingface.cloud" token = "Bearer " + os.getenv("ITACA_TOKEN") headers = { "Accept": "application/json", "Authorization": token, "Content-Type": "application/json" } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def predict(message, history): new_message = start_instruction_token + system_prompt + \ message + end_instruction_token + start_completion output = query({ "inputs": new_message, "parameters": { "max_new_tokens": 256, "return_full_text": False } }) return output iface = gr.ChatInterface(predict) iface.launch()