Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import pipeline | |
import requests | |
import os | |
start_token = "<s>" | |
start_instruction_token = "[INST] " | |
end_instruction_token = " [/INST]" | |
system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente:\n" | |
start_completion = "\nRisposta:" | |
API_URL = "https://jadvy64czlx56190.us-east-1.aws.endpoints.huggingface.cloud" | |
token = "Bearer " + os.getenv("ITACA_TOKEN") | |
headers = { | |
"Accept": "application/json", | |
"Authorization": token, | |
"Content-Type": "application/json" | |
} | |
def query(payload): | |
response = requests.post(API_URL, headers=headers, json=payload) | |
return response.json() | |
def predict(message, history): | |
new_message = start_token + start_instruction_token + system_prompt + message + end_instruction_token + start_completion | |
print(new_message) | |
output = query({ | |
"inputs": new_message, | |
"parameters": { | |
"temperature": 0.7, | |
"max_new_tokens": 512, | |
"return_full_text": False | |
} | |
}) | |
return output[0]["generated_text"] | |
iface = gr.ChatInterface(predict) | |
iface.launch(share=True) | |