lobrien001 commited on
Commit
08127ac
·
verified ·
1 Parent(s): be6e8c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -4
app.py CHANGED
@@ -2,7 +2,7 @@ import logging
2
  import gradio as gr
3
  from queue import Queue
4
  import time
5
- from prometheus_client import start_http_server, Counter, Histogram
6
  import threading
7
  import psutil
8
  import random
@@ -14,6 +14,11 @@ ner_pipeline = pipeline("ner", model="Sevixdd/roberta-base-finetuned-ner")
14
  # --- Prometheus Metrics Setup ---
15
  REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests')
16
  REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds')
 
 
 
 
 
17
 
18
  # --- Logging Setup ---
19
  logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -30,15 +35,29 @@ def chat_function(message, history):
30
  chat_queue.put(message)
31
  logging.info(f"User: {message}")
32
 
33
- ner_result = ner_pipeline(message)
34
- response = f"Response from NER model: {ner_result}"
 
 
 
 
 
 
 
 
 
 
35
  logging.info(f"Bot: {response}")
36
 
 
 
 
37
  time.sleep(random.uniform(0.5, 2.5)) # Simulate processing time
38
 
39
  chat_queue.get()
40
  return response
41
  except Exception as e:
 
42
  logging.error(f"Error in chat processing: {e}")
43
  return "An error occurred. Please try again."
44
 
@@ -84,6 +103,8 @@ body {
84
  while True:
85
  cpu_usage_display.value = psutil.cpu_percent()
86
  mem_usage_display.value = psutil.virtual_memory().percent
 
 
87
  time.sleep(5)
88
 
89
  def update_logs(logs_display):
@@ -100,12 +121,18 @@ body {
100
  model_params_display.value = model_params_str
101
  time.sleep(10) # Update every 10 seconds
102
 
 
 
 
 
 
103
  # --- Start Threads ---
104
  threading.Thread(target=start_http_server, args=(8000,), daemon=True).start()
105
  threading.Thread(target=update_metrics, args=(request_count_display, avg_latency_display), daemon=True).start()
106
  threading.Thread(target=update_usage, args=(cpu_usage_display, mem_usage_display), daemon=True).start()
107
  threading.Thread(target=update_logs, args=(logs_display,), daemon=True).start()
108
  threading.Thread(target=display_model_params, args=(model_params_display,), daemon=True).start()
 
109
 
110
  # --- Simulate Chat Interactions ---
111
  def simulate_interactions():
@@ -118,4 +145,3 @@ body {
118
 
119
  # Launch the app
120
  demo.launch(share=True)
121
-
 
2
  import gradio as gr
3
  from queue import Queue
4
  import time
5
+ from prometheus_client import start_http_server, Counter, Histogram, Gauge
6
  import threading
7
  import psutil
8
  import random
 
14
  # --- Prometheus Metrics Setup ---
15
  REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests')
16
  REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds')
17
+ ERROR_COUNT = Counter('gradio_error_count', 'Total number of errors')
18
+ RESPONSE_SIZE = Histogram('gradio_response_size_bytes', 'Size of responses in bytes')
19
+ CPU_USAGE = Gauge('system_cpu_usage_percent', 'System CPU usage in percent')
20
+ MEM_USAGE = Gauge('system_memory_usage_percent', 'System memory usage in percent')
21
+ QUEUE_LENGTH = Gauge('chat_queue_length', 'Length of the chat queue')
22
 
23
  # --- Logging Setup ---
24
  logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
35
  chat_queue.put(message)
36
  logging.info(f"User: {message}")
37
 
38
+ ner_results = ner_pipeline(message)
39
+
40
+ detailed_response = []
41
+ for result in ner_results:
42
+ token = result['word']
43
+ score = result['score']
44
+ entity = result['entity']
45
+ start = result['start']
46
+ end = result['end']
47
+ detailed_response.append(f"Token: {token}, Entity: {entity}, Score: {score:.4f}, Start: {start}, End: {end}")
48
+
49
+ response = "\n".join(detailed_response)
50
  logging.info(f"Bot: {response}")
51
 
52
+ response_size = len(response.encode('utf-8'))
53
+ RESPONSE_SIZE.observe(response_size)
54
+
55
  time.sleep(random.uniform(0.5, 2.5)) # Simulate processing time
56
 
57
  chat_queue.get()
58
  return response
59
  except Exception as e:
60
+ ERROR_COUNT.inc()
61
  logging.error(f"Error in chat processing: {e}")
62
  return "An error occurred. Please try again."
63
 
 
103
  while True:
104
  cpu_usage_display.value = psutil.cpu_percent()
105
  mem_usage_display.value = psutil.virtual_memory().percent
106
+ CPU_USAGE.set(psutil.cpu_percent())
107
+ MEM_USAGE.set(psutil.virtual_memory().percent)
108
  time.sleep(5)
109
 
110
  def update_logs(logs_display):
 
121
  model_params_display.value = model_params_str
122
  time.sleep(10) # Update every 10 seconds
123
 
124
+ def update_queue_length():
125
+ while True:
126
+ QUEUE_LENGTH.set(chat_queue.qsize())
127
+ time.sleep(1) # Update every second
128
+
129
  # --- Start Threads ---
130
  threading.Thread(target=start_http_server, args=(8000,), daemon=True).start()
131
  threading.Thread(target=update_metrics, args=(request_count_display, avg_latency_display), daemon=True).start()
132
  threading.Thread(target=update_usage, args=(cpu_usage_display, mem_usage_display), daemon=True).start()
133
  threading.Thread(target=update_logs, args=(logs_display,), daemon=True).start()
134
  threading.Thread(target=display_model_params, args=(model_params_display,), daemon=True).start()
135
+ threading.Thread(target=update_queue_length, daemon=True).start()
136
 
137
  # --- Simulate Chat Interactions ---
138
  def simulate_interactions():
 
145
 
146
  # Launch the app
147
  demo.launch(share=True)