Update app.py
Browse files
app.py
CHANGED
@@ -2,7 +2,7 @@ import logging
|
|
2 |
import gradio as gr
|
3 |
from queue import Queue
|
4 |
import time
|
5 |
-
from prometheus_client import start_http_server, Counter, Histogram
|
6 |
import threading
|
7 |
import psutil
|
8 |
import random
|
@@ -14,6 +14,11 @@ ner_pipeline = pipeline("ner", model="Sevixdd/roberta-base-finetuned-ner")
|
|
14 |
# --- Prometheus Metrics Setup ---
|
15 |
REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests')
|
16 |
REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds')
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
# --- Logging Setup ---
|
19 |
logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
@@ -30,15 +35,29 @@ def chat_function(message, history):
|
|
30 |
chat_queue.put(message)
|
31 |
logging.info(f"User: {message}")
|
32 |
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
logging.info(f"Bot: {response}")
|
36 |
|
|
|
|
|
|
|
37 |
time.sleep(random.uniform(0.5, 2.5)) # Simulate processing time
|
38 |
|
39 |
chat_queue.get()
|
40 |
return response
|
41 |
except Exception as e:
|
|
|
42 |
logging.error(f"Error in chat processing: {e}")
|
43 |
return "An error occurred. Please try again."
|
44 |
|
@@ -84,6 +103,8 @@ body {
|
|
84 |
while True:
|
85 |
cpu_usage_display.value = psutil.cpu_percent()
|
86 |
mem_usage_display.value = psutil.virtual_memory().percent
|
|
|
|
|
87 |
time.sleep(5)
|
88 |
|
89 |
def update_logs(logs_display):
|
@@ -100,12 +121,18 @@ body {
|
|
100 |
model_params_display.value = model_params_str
|
101 |
time.sleep(10) # Update every 10 seconds
|
102 |
|
|
|
|
|
|
|
|
|
|
|
103 |
# --- Start Threads ---
|
104 |
threading.Thread(target=start_http_server, args=(8000,), daemon=True).start()
|
105 |
threading.Thread(target=update_metrics, args=(request_count_display, avg_latency_display), daemon=True).start()
|
106 |
threading.Thread(target=update_usage, args=(cpu_usage_display, mem_usage_display), daemon=True).start()
|
107 |
threading.Thread(target=update_logs, args=(logs_display,), daemon=True).start()
|
108 |
threading.Thread(target=display_model_params, args=(model_params_display,), daemon=True).start()
|
|
|
109 |
|
110 |
# --- Simulate Chat Interactions ---
|
111 |
def simulate_interactions():
|
@@ -118,4 +145,3 @@ body {
|
|
118 |
|
119 |
# Launch the app
|
120 |
demo.launch(share=True)
|
121 |
-
|
|
|
2 |
import gradio as gr
|
3 |
from queue import Queue
|
4 |
import time
|
5 |
+
from prometheus_client import start_http_server, Counter, Histogram, Gauge
|
6 |
import threading
|
7 |
import psutil
|
8 |
import random
|
|
|
14 |
# --- Prometheus Metrics Setup ---
|
15 |
REQUEST_COUNT = Counter('gradio_request_count', 'Total number of requests')
|
16 |
REQUEST_LATENCY = Histogram('gradio_request_latency_seconds', 'Request latency in seconds')
|
17 |
+
ERROR_COUNT = Counter('gradio_error_count', 'Total number of errors')
|
18 |
+
RESPONSE_SIZE = Histogram('gradio_response_size_bytes', 'Size of responses in bytes')
|
19 |
+
CPU_USAGE = Gauge('system_cpu_usage_percent', 'System CPU usage in percent')
|
20 |
+
MEM_USAGE = Gauge('system_memory_usage_percent', 'System memory usage in percent')
|
21 |
+
QUEUE_LENGTH = Gauge('chat_queue_length', 'Length of the chat queue')
|
22 |
|
23 |
# --- Logging Setup ---
|
24 |
logging.basicConfig(filename="chat_log.txt", level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
35 |
chat_queue.put(message)
|
36 |
logging.info(f"User: {message}")
|
37 |
|
38 |
+
ner_results = ner_pipeline(message)
|
39 |
+
|
40 |
+
detailed_response = []
|
41 |
+
for result in ner_results:
|
42 |
+
token = result['word']
|
43 |
+
score = result['score']
|
44 |
+
entity = result['entity']
|
45 |
+
start = result['start']
|
46 |
+
end = result['end']
|
47 |
+
detailed_response.append(f"Token: {token}, Entity: {entity}, Score: {score:.4f}, Start: {start}, End: {end}")
|
48 |
+
|
49 |
+
response = "\n".join(detailed_response)
|
50 |
logging.info(f"Bot: {response}")
|
51 |
|
52 |
+
response_size = len(response.encode('utf-8'))
|
53 |
+
RESPONSE_SIZE.observe(response_size)
|
54 |
+
|
55 |
time.sleep(random.uniform(0.5, 2.5)) # Simulate processing time
|
56 |
|
57 |
chat_queue.get()
|
58 |
return response
|
59 |
except Exception as e:
|
60 |
+
ERROR_COUNT.inc()
|
61 |
logging.error(f"Error in chat processing: {e}")
|
62 |
return "An error occurred. Please try again."
|
63 |
|
|
|
103 |
while True:
|
104 |
cpu_usage_display.value = psutil.cpu_percent()
|
105 |
mem_usage_display.value = psutil.virtual_memory().percent
|
106 |
+
CPU_USAGE.set(psutil.cpu_percent())
|
107 |
+
MEM_USAGE.set(psutil.virtual_memory().percent)
|
108 |
time.sleep(5)
|
109 |
|
110 |
def update_logs(logs_display):
|
|
|
121 |
model_params_display.value = model_params_str
|
122 |
time.sleep(10) # Update every 10 seconds
|
123 |
|
124 |
+
def update_queue_length():
|
125 |
+
while True:
|
126 |
+
QUEUE_LENGTH.set(chat_queue.qsize())
|
127 |
+
time.sleep(1) # Update every second
|
128 |
+
|
129 |
# --- Start Threads ---
|
130 |
threading.Thread(target=start_http_server, args=(8000,), daemon=True).start()
|
131 |
threading.Thread(target=update_metrics, args=(request_count_display, avg_latency_display), daemon=True).start()
|
132 |
threading.Thread(target=update_usage, args=(cpu_usage_display, mem_usage_display), daemon=True).start()
|
133 |
threading.Thread(target=update_logs, args=(logs_display,), daemon=True).start()
|
134 |
threading.Thread(target=display_model_params, args=(model_params_display,), daemon=True).start()
|
135 |
+
threading.Thread(target=update_queue_length, daemon=True).start()
|
136 |
|
137 |
# --- Simulate Chat Interactions ---
|
138 |
def simulate_interactions():
|
|
|
145 |
|
146 |
# Launch the app
|
147 |
demo.launch(share=True)
|
|