File size: 1,132 Bytes
bddad90
9114b27
be00868
94ce827
9114b27
 
923d372
 
7052081
e09300e
bddad90
743270a
b1da324
9934ede
be00868
 
9934ede
be00868
 
 
 
 
 
 
 
 
9114b27
1cf9fb0
e09300e
 
3526238
e09300e
ce8c297
e09300e
 
 
 
 
bddad90
9114b27
327dfaa
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import gradio as gr
from transformers import pipeline
import requests
import os

start_token = "<s>"
start_instruction_token = "[INST] "
end_instruction_token = " [/INST]"
system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente:\n"
start_completion = "\nRisposta:"

API_URL = "https://jadvy64czlx56190.us-east-1.aws.endpoints.huggingface.cloud"
token = "Bearer " + os.getenv("ITACA_TOKEN")

headers = {
    "Accept": "application/json",
    "Authorization": token,
    "Content-Type": "application/json"
}


def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()


def predict(message, history):
    new_message = start_token + start_instruction_token + system_prompt + message + end_instruction_token + start_completion
    print(new_message)
    output = query({
        "inputs": new_message,
        "parameters": {
    		"temperature": 0.9,
    		"max_new_tokens": 512,
    		"return_full_text": False
    	}
    })
    return output[0]["generated_text"]

iface = gr.ChatInterface(predict)
iface.launch(share=True)