tabedini commited on
Commit
308a95d
1 Parent(s): c4e4f64

Update app.py

Browse files

Add langfuse log.

Files changed (1) hide show
  1. app.py +20 -0
app.py CHANGED
@@ -3,6 +3,8 @@ from threading import Thread
3
  from typing import Iterator
4
 
5
  import gradio as gr
 
 
6
  import spaces
7
  import torch
8
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
@@ -44,6 +46,16 @@ pre, code {
44
 
45
  system_prompt = str(os.getenv("SYSTEM_PROMPT"))
46
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def execution_time_calculator(start_time, log=True):
49
  delta = time.time() - start_time
@@ -70,6 +82,10 @@ def get_generation_speed():
70
 
71
  return generation_speed
72
 
 
 
 
 
73
 
74
  @spaces.GPU
75
  def generate(
@@ -128,9 +144,13 @@ def generate(
128
 
129
  generation_speed = token_per_second_calculator(sum_tokens, time_delta)
130
 
 
 
131
  print(f"generation_speed: {generation_speed}")
132
 
133
 
 
 
134
  chatbot = gr.Chatbot(placeholder=PLACEHOLDER, scale=1, show_copy_button=True, height="68%", rtl=True) #, elem_classes=["chatbot"])
135
  chat_input = gr.Textbox(show_label=False, lines=2, rtl=True, placeholder="ورودی", show_copy_button=True, scale=4)
136
  submit_btn = gr.Button(variant="primary", value="ارسال", size="sm", scale=1, elem_classes=["_button"])
 
3
  from typing import Iterator
4
 
5
  import gradio as gr
6
+ from langfuse import Langfuse
7
+ from langfuse.decorators import observe
8
  import spaces
9
  import torch
10
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
46
 
47
  system_prompt = str(os.getenv("SYSTEM_PROMPT"))
48
 
49
+ secret_key = str(os.getenv("LANGFUSE_SECRET_KEY"))
50
+ public_key = str(os.getenv("LANGFUSE_PUBLIC_KEY"))
51
+ host = str(os.getenv("LANGFUSE_HOST"))
52
+
53
+ langfuse = Langfuse(
54
+ secret_key=secret_key,
55
+ public_key=public_key,
56
+ host=host
57
+ )
58
+
59
 
60
  def execution_time_calculator(start_time, log=True):
61
  delta = time.time() - start_time
 
82
 
83
  return generation_speed
84
 
85
+ @observe()
86
+ def log_to_langfuse(message, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_sample, model_outputs, generation_speed):
87
+ return "".join(model_outputs)
88
+
89
 
90
  @spaces.GPU
91
  def generate(
 
144
 
145
  generation_speed = token_per_second_calculator(sum_tokens, time_delta)
146
 
147
+ log_function = log_to_langfuse(message, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty, do_sample, outputs, generation_speed)
148
+
149
  print(f"generation_speed: {generation_speed}")
150
 
151
 
152
+
153
+
154
  chatbot = gr.Chatbot(placeholder=PLACEHOLDER, scale=1, show_copy_button=True, height="68%", rtl=True) #, elem_classes=["chatbot"])
155
  chat_input = gr.Textbox(show_label=False, lines=2, rtl=True, placeholder="ورودی", show_copy_button=True, scale=4)
156
  submit_btn = gr.Button(variant="primary", value="ارسال", size="sm", scale=1, elem_classes=["_button"])