import gradio as gr from transformers import pipeline import requests import os start_token = "" start_instruction_token = "[INST] " end_instruction_token = " [/INST]" system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente:\n" start_completion = "\nRisposta:" API_URL = "https://jadvy64czlx56190.us-east-1.aws.endpoints.huggingface.cloud" token = "Bearer " + os.getenv("ITACA_TOKEN") headers = { "Accept": "application/json", "Authorization": token, "Content-Type": "application/json" } def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def predict(message, history): new_message = start_token + start_instruction_token + system_prompt + message + end_instruction_token + start_completion print(new_message) output = query({ "inputs": new_message, "parameters": { "temperature": 0.7, "max_new_tokens": 512, "return_full_text": False } }) return output[0]["generated_text"] iface = gr.ChatInterface(predict) iface.launch(share=True)