Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import requests | |
import os | |
start_token = "<s>" | |
start_completion = "\nRisposta:" | |
start_instruction_token, end_instruction_token = "[INST]", "[/INST]" | |
system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente: " | |
API_URL = "https://cyk11dj2ce5ybyjq.us-east-1.aws.endpoints.huggingface.cloud" | |
token = "Bearer " + os.getenv("ITACA_TOKEN") | |
headers = { | |
"Accept": "application/json", | |
"Authorization": token, | |
"Content-Type": "application/json" | |
} | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json() | |
def predict(message, history): | |
new_message = start_instruction_token + system_prompt + \ | |
message + end_instruction_token + start_completion | |
output = query({ | |
"inputs": new_message, | |
"parameters": { | |
"max_new_tokens": 256, | |
"return_full_text": False | |
} | |
}) | |
return output | |
iface = gr.ChatInterface(predict) | |
iface.launch() | |