Spaces:
Running
on
L4
Running
on
L4
Update app.py
Browse filesAdd langfuse log.
app.py
CHANGED
@@ -3,6 +3,8 @@ from threading import Thread
|
|
3 |
from typing import Iterator
|
4 |
|
5 |
import gradio as gr
|
|
|
|
|
6 |
import spaces
|
7 |
import torch
|
8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
@@ -44,6 +46,16 @@ pre, code {
|
|
44 |
|
45 |
system_prompt = str(os.getenv("SYSTEM_PROMPT"))
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
def execution_time_calculator(start_time, log=True):
|
49 |
delta = time.time() - start_time
|
@@ -70,6 +82,10 @@ def get_generation_speed():
|
|
70 |
|
71 |
return generation_speed
|
72 |
|
|
|
|
|
|
|
|
|
73 |
|
74 |
@spaces.GPU
|
75 |
def generate(
|
@@ -128,9 +144,13 @@ def generate(
|
|
128 |
|
129 |
generation_speed = token_per_second_calculator(sum_tokens, time_delta)
|
130 |
|
|
|
|
|
131 |
print(f"generation_speed: {generation_speed}")
|
132 |
|
133 |
|
|
|
|
|
134 |
chatbot = gr.Chatbot(placeholder=PLACEHOLDER, scale=1, show_copy_button=True, height="68%", rtl=True) #, elem_classes=["chatbot"])
|
135 |
chat_input = gr.Textbox(show_label=False, lines=2, rtl=True, placeholder="ورودی", show_copy_button=True, scale=4)
|
136 |
submit_btn = gr.Button(variant="primary", value="ارسال", size="sm", scale=1, elem_classes=["_button"])
|
|
|
3 |
from typing import Iterator
|
4 |
|
5 |
import gradio as gr
|
6 |
+
from langfuse import Langfuse
|
7 |
+
from langfuse.decorators import observe
|
8 |
import spaces
|
9 |
import torch
|
10 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
|
|
46 |
|
47 |
system_prompt = str(os.getenv("SYSTEM_PROMPT"))
|
48 |
|
49 |
+
secret_key = str(os.getenv("LANGFUSE_SECRET_KEY"))
|
50 |
+
public_key = str(os.getenv("LANGFUSE_PUBLIC_KEY"))
|
51 |
+
host = str(os.getenv("LANGFUSE_HOST"))
|
52 |
+
|
53 |
+
langfuse = Langfuse(
|
54 |
+
secret_key=secret_key,
|
55 |
+
public_key=public_key,
|
56 |
+
host=host
|
57 |
+
)
|
58 |
+
|
59 |
|
60 |
def execution_time_calculator(start_time, log=True):
|
61 |
delta = time.time() - start_time
|
|
|
82 |
|
83 |
return generation_speed
|
84 |
|
85 |
+
@observe()
|
86 |
+
def log_to_langfuse(message, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_sample, model_outputs, generation_speed):
|
87 |
+
return "".join(model_outputs)
|
88 |
+
|
89 |
|
90 |
@spaces.GPU
|
91 |
def generate(
|
|
|
144 |
|
145 |
generation_speed = token_per_second_calculator(sum_tokens, time_delta)
|
146 |
|
147 |
+
log_function = log_to_langfuse(message, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_sample, outputs, generation_speed)
|
148 |
+
|
149 |
print(f"generation_speed: {generation_speed}")
|
150 |
|
151 |
|
152 |
+
|
153 |
+
|
154 |
chatbot = gr.Chatbot(placeholder=PLACEHOLDER, scale=1, show_copy_button=True, height="68%", rtl=True) #, elem_classes=["chatbot"])
|
155 |
chat_input = gr.Textbox(show_label=False, lines=2, rtl=True, placeholder="ورودی", show_copy_button=True, scale=4)
|
156 |
submit_btn = gr.Button(variant="primary", value="ارسال", size="sm", scale=1, elem_classes=["_button"])
|