Spaces:
Sleeping
Sleeping
File size: 1,132 Bytes
bddad90 9114b27 be00868 94ce827 9114b27 923d372 7052081 e09300e bddad90 743270a b1da324 9934ede be00868 9934ede be00868 9114b27 1cf9fb0 e09300e 3526238 e09300e 120fd01 e09300e bddad90 9114b27 327dfaa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import gradio as gr
from transformers import pipeline
import requests
import os
start_token = "<s>"
start_instruction_token = "[INST] "
end_instruction_token = " [/INST]"
system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente:\n"
start_completion = "\nRisposta:"
API_URL = "https://jadvy64czlx56190.us-east-1.aws.endpoints.huggingface.cloud"
token = "Bearer " + os.getenv("ITACA_TOKEN")
headers = {
"Accept": "application/json",
"Authorization": token,
"Content-Type": "application/json"
}
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.json()
def predict(message, history):
new_message = start_token + start_instruction_token + system_prompt + message + end_instruction_token + start_completion
print(new_message)
output = query({
"inputs": new_message,
"parameters": {
"temperature": 0.7,
"max_new_tokens": 512,
"return_full_text": False
}
})
return output[0]["generated_text"]
iface = gr.ChatInterface(predict)
iface.launch(share=True)
|