Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
update utils
Browse files- auditqa/utils.py +112 -6
auditqa/utils.py
CHANGED
@@ -10,11 +10,47 @@ def save_logs(scheduler, JSON_DATASET_PATH, logs) -> None:
|
|
10 |
""" Every interaction with app saves the log of question and answer,
|
11 |
this is to get the usage statistics of app and evaluate model performances
|
12 |
"""
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def get_message_template(type, SYSTEM_PROMPT, USER_PROMPT):
|
20 |
if type == 'NVIDIA':
|
@@ -69,4 +105,74 @@ def parse_output_llm_with_sources(output):
|
|
69 |
else:
|
70 |
parts.append(part)
|
71 |
content_parts = "".join(parts)
|
72 |
-
return content_parts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
""" Every interaction with app saves the log of question and answer,
|
11 |
this is to get the usage statistics of app and evaluate model performances
|
12 |
"""
|
13 |
+
try:
|
14 |
+
# We get the timestamp here now because we are simply recording time of logging
|
15 |
+
current_time = datetime.now().timestamp()
|
16 |
+
logs["time"] = str(current_time)
|
17 |
+
|
18 |
+
# Save feedback (if any)
|
19 |
+
if feedback:
|
20 |
+
logs["feedback"] = feedback
|
21 |
+
logs["record_id"] = str(uuid4())
|
22 |
+
|
23 |
+
# Do some reordering to keep things clean (time up front)
|
24 |
+
field_order = [
|
25 |
+
"record_id",
|
26 |
+
"session_id",
|
27 |
+
"time", # current log time
|
28 |
+
"session_duration_seconds",
|
29 |
+
"client_location",
|
30 |
+
"platform",
|
31 |
+
"system_prompt",
|
32 |
+
"sources",
|
33 |
+
"reports",
|
34 |
+
"subtype",
|
35 |
+
"year",
|
36 |
+
"question",
|
37 |
+
"retriever",
|
38 |
+
"endpoint_type",
|
39 |
+
"reader",
|
40 |
+
"docs",
|
41 |
+
"answer",
|
42 |
+
"feedback"
|
43 |
+
]
|
44 |
+
ordered_logs = {k: logs.get(k) for k in field_order if k in logs}
|
45 |
+
|
46 |
+
with scheduler.lock:
|
47 |
+
with open(JSON_DATASET_PATH, 'a') as f:
|
48 |
+
json.dump(ordered_logs, f)
|
49 |
+
f.write("\n")
|
50 |
+
logging.info("logging done")
|
51 |
+
except Exception as e:
|
52 |
+
raise
|
53 |
+
|
54 |
|
55 |
def get_message_template(type, SYSTEM_PROMPT, USER_PROMPT):
|
56 |
if type == 'NVIDIA':
|
|
|
105 |
else:
|
106 |
parts.append(part)
|
107 |
content_parts = "".join(parts)
|
108 |
+
return content_parts
|
109 |
+
|
110 |
+
|
111 |
+
def get_client_ip(request=None):
|
112 |
+
"""Get the client IP address from the request context"""
|
113 |
+
try:
|
114 |
+
if request:
|
115 |
+
# Try different headers that might contain the real IP
|
116 |
+
ip = request.client.host
|
117 |
+
# Check for proxy headers
|
118 |
+
forwarded_for = request.headers.get('X-Forwarded-For')
|
119 |
+
if forwarded_for:
|
120 |
+
# X-Forwarded-For can contain multiple IPs - first one is the client
|
121 |
+
ip = forwarded_for.split(',')[0].strip()
|
122 |
+
|
123 |
+
logging.debug(f"Client IP detected: {ip}")
|
124 |
+
return ip
|
125 |
+
except Exception as e:
|
126 |
+
logging.error(f"Error getting client IP: {e}")
|
127 |
+
return "127.0.0.1"
|
128 |
+
|
129 |
+
|
130 |
+
def get_client_location(ip_address) -> dict | None:
|
131 |
+
"""Get geolocation info using ipapi.co"""
|
132 |
+
# Add headers so we don't get blocked...
|
133 |
+
headers = {
|
134 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
135 |
+
}
|
136 |
+
try:
|
137 |
+
response = requests.get(
|
138 |
+
f'https://ipapi.co/{ip_address}/json/',
|
139 |
+
headers=headers,
|
140 |
+
timeout=5
|
141 |
+
)
|
142 |
+
if response.status_code == 200:
|
143 |
+
data = response.json()
|
144 |
+
# Add random noise between -0.01 and 0.01 degrees (roughly ±1km)
|
145 |
+
lat = data.get('latitude')
|
146 |
+
lon = data.get('longitude')
|
147 |
+
if lat is not None and lon is not None:
|
148 |
+
lat += random.uniform(-0.01, 0.01)
|
149 |
+
lon += random.uniform(-0.01, 0.01)
|
150 |
+
|
151 |
+
return {
|
152 |
+
'city': data.get('city'),
|
153 |
+
'region': data.get('region'),
|
154 |
+
'country': data.get('country_name'),
|
155 |
+
'latitude': lat,
|
156 |
+
'longitude': lon
|
157 |
+
}
|
158 |
+
elif response.status_code == 429:
|
159 |
+
logging.warning(f"Rate limit exceeded. Response: {response.text}")
|
160 |
+
return None
|
161 |
+
else:
|
162 |
+
logging.error(f"Error: Status code {response.status_code}. Response: {response.text}")
|
163 |
+
return None
|
164 |
+
|
165 |
+
except requests.exceptions.RequestException as e:
|
166 |
+
logging.error(f"Request failed: {str(e)}")
|
167 |
+
return None
|
168 |
+
|
169 |
+
|
170 |
+
def get_platform_info(user_agent: str) -> str:
|
171 |
+
"""Get platform info"""
|
172 |
+
# Make a best guess at the device type
|
173 |
+
if any(mobile_keyword in user_agent.lower() for mobile_keyword in ['mobile', 'android', 'iphone', 'ipad', 'ipod']):
|
174 |
+
platform_info = 'mobile'
|
175 |
+
else:
|
176 |
+
platform_info = 'desktop'
|
177 |
+
|
178 |
+
return platform_info
|