Spaces:

michelebasilico
/

mistral7b_itaca

Sleeping

michelebasilico commited on Mar 28

Commit

be00868

•

1 Parent(s): f4a906d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 from transformers import pipeline
 # model="michelebasilico/itaca-mistral-7b-v2-4bit")
 model = pipeline("text-generation",
@@ -10,12 +11,29 @@ start_instruction_token, end_instruction_token = "[INST]", "[/INST]"
 system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente: "
 def predict(message, history):
     new_message = start_instruction_token + system_prompt + \
         message + end_instruction_token + start_completion
-    outputs = model(new_message, max_new_tokens=254,
-                    return_full_text=False)[0]["generated_text"]
-    return outputs
 iface = gr.ChatInterface(predict)

 import gradio as gr
 from transformers import pipeline
+import requests
 # model="michelebasilico/itaca-mistral-7b-v2-4bit")
 model = pipeline("text-generation",
 system_prompt = "Sei un assistente utile ed affidabile. Rispondi in maniera adeguata alla domanda seguente: "
+API_URL = "https://cyk11dj2ce5ybyjq.us-east-1.aws.endpoints.huggingface.cloud"
+headers = {
+    "Accept": "application/json",
+    "Content-Type": "application/json"
+}
+def query(payload):
+    response = requests.post(API_URL, headers=headers, json=payload)
+    return response.json()
 def predict(message, history):
     new_message = start_instruction_token + system_prompt + \
         message + end_instruction_token + start_completion
+    output = query({
+        "inputs": new_message,
+        "parameters": {
+            "max_new_tokens": 256,
+            "return_full_text": False
+        }
+    })
+    return output
 iface = gr.ChatInterface(predict)