Spaces:

michelebasilico
/

mistral7b_itaca

Sleeping

File size: 1,132 Bytes

bddad90
9114b27
be00868
94ce827
9114b27
 
923d372
 
7052081
e09300e
bddad90
743270a
b1da324
9934ede
be00868
 
9934ede
be00868
 
 
 
 
 
 
 
 
9114b27
1cf9fb0
e09300e
 
3526238
e09300e
ce8c297
e09300e
 
 
 
 
bddad90
9114b27
327dfaa

import gradio as gr
from transformers import pipeline
import requests
import os

start_token = "<s>"
start_instruction_token = "[INST] "
end_instruction_token = " [/INST]"
system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente:\n"
start_completion = "\nRisposta:"

API_URL = "https://jadvy64czlx56190.us-east-1.aws.endpoints.huggingface.cloud"
token = "Bearer " + os.getenv("ITACA_TOKEN")

headers = {
    "Accept": "application/json",
    "Authorization": token,
    "Content-Type": "application/json"
}


def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json()


def predict(message, history):
    new_message = start_token + start_instruction_token + system_prompt + message + end_instruction_token + start_completion
    print(new_message)
    output = query({
        "inputs": new_message,
        "parameters": {
    		"temperature": 0.9,
    		"max_new_tokens": 512,
    		"return_full_text": False
    	}
    })
    return output[0]["generated_text"]

iface = gr.ChatInterface(predict)
iface.launch(share=True)