Spaces:

Ibrahimarain
/

llama-2-13B

Runtime error

App Files Files Community

Ibrahimarain commited on Apr 30, 2024

Commit

ac1af52

1 Parent(s): 319bf3d

added local.py

Browse files

Files changed (1) hide show

app.py +189 -4

app.py CHANGED Viewed

@@ -1,7 +1,192 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import llama_cpp
+import llama_cpp.llama_tokenizer
+from llama_cpp import Llama
 import gradio as gr
+from loguru import logger
+import psutil
+from ctransformers import AutoModelForCausalLM,AutoTokenizer
+prompt_template = """[INST] <<SYS>>
+You are a helpful assistant for a crowdfunding platform called GiveSendGo. Your goal is to gather essential information for campaign and generate a title and sample pitch of atleast 1000 words for the campaign.
+<</SYS>>
+{question} [/INST]
+"""
+model_loc = "models/llama-2-13b-chat.Q5_K_M.gguf"
+# llama = AutoModelForCausalLM.from_pretrained(
+#     model_loc,
+#     model_type="llama",
+#     context_length=4096,
+#     max_new_tokens=2048,
+#     hf=True
+#     # threads=cpu_count,
+# )
+# llama = llama_cpp.Llama.from_pretrained(
+#     #repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
+#     #filename="*q8_0.gguf",
+#     mode_path=model_loc,
+#     model_type="llama",
+#     context_length=4096,
+#     max_new_tokens=2048,
+#     filename="llama-2-13b-chat.Q5_K_M.gguf",
+#     tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
+#     verbose=False
+# )
+llama = Llama(
+model_path=model_loc,
+max_tokens=4096,
+n_ctx=4096,
+verbose=False,
+)
+_ = [elm for elm in prompt_template.splitlines() if elm.strip()]
+stop_string = [elm.split(":")[0] + ":" for elm in _][-2]
+logger.debug(f"{stop_string=}")
+_ = psutil.cpu_count(logical=False) - 1
+cpu_count: int = int(_) if _ else 1
+logger.debug(f"{cpu_count=}")
+model = "gpt-3.5-turbo"
+def predict(message, history):
+    messages = []
+    prompt = prompt_template.format(question=message)
+    for user_message, assistant_message in history:
+        messages.append({"role": "system", "content": prompt},)
+        messages.append({"role": "user", "content": user_message})
+        messages.append({"role": "assistant", "content": assistant_message})
+        messages.append({"role": "user", "content": message})
+    response = llama.create_chat_completion_openai_v1(
+        model=model,
+        messages=messages,
+        response_format={
+        "type": "json_object",
+            "schema": {
+                "type": "object",
+                "properties": {"title": {"type": "string"},
+                               #"description": {"type": "string"},
+                               "sample_pitch": {"type": "string"},
+                               "amount": {"type": "string"},
+                               "location": {"type": "string"}},
+                "required": ["title","sample_pitch","amount","location"], #description
+            },
+        },
+        temperature=0.7,
+        stream=True
+    )
+    text = ""
+    for chunk in response:
+        content = chunk.choices[0].delta.content
+        if content:
+            text += content
+            yield text
+def generate(message):
+    try:
+        messages = []
+        prompt = prompt_template.format(question=message)
+        #for user_message, assistant_message in history:
+        messages.append({"role": "system", "content": prompt},)
+        #messages.append({"role": "user", "content": user_message})
+        #messages.append({"role": "assistant", "content": assistant_message})
+        messages.append({"role": "user", "content": message})
+        response = llama.create_chat_completion_openai_v1(
+            model=model,
+            messages=messages,
+            response_format={
+            "type": "json_object",
+                "schema": {
+                    "type": "object",
+                    "properties": {"title": {"type": "string"},
+                                #"description": {"type": "string"},
+                                "sample_pitch": {"type": "string"},
+                                "amount": {"type": "string"},
+                                "location": {"type": "string"}},
+                    "required": ["title","sample_pitch","amount","location"], #description
+                },
+            },
+            temperature=0.7,
+            stream=False)
+        # text = ""
+        # for chunk in response:
+        #     content = chunk.choices[0].delta.content
+        #     if content:
+        #         text += content
+        #         logger.debug(f"api: {content=}")
+        #         yield text
+        logger.debug(f"{response}")
+        return response.choices[0].delta.content
+    except Exception as exc:
+        logger.error(exc)
+        response = f"{exc=}"
+def predict_api(message):
+    logger.debug(f"{message=}")
+    text = generate(message)
+    logger.debug(f"text::{text=}")
+    return f"json: {text=}"
+js = """function () {
+  gradioURL = window.location.href
+  if (!gradioURL.endsWith('?__theme=dark')) {
+    window.location.replace(gradioURL + '?__theme=dark');
+  }
+}"""
+css = """
+footer {
+    visibility: hidden;
+}
+full-height {
+    height: 100%;
+}
+"""
+with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css) as demo:
+    gr.ChatInterface(predict, examples=["What is the capital of France?", "Who was the first person on the moon?"])
+    with gr.Accordion("For Chat/Translation API", open=False, visible=False):
+        input_text = gr.Text()
+        api_btn = gr.Button("Go", variant="primary")
+        out_text = gr.Text()
+    api_btn.click(
+        predict_api,
+        input_text,
+        out_text,
+        api_name="api",
+    )
+if __name__ == "__main__":
+    demo.queue().launch(debug=True, share=True)