Spaces:
Running
Running
Update api_usage.py
Browse files- api_usage.py +57 -21
api_usage.py
CHANGED
@@ -16,13 +16,13 @@ GPT_TYPES = ["gpt-3.5-turbo", "gpt-4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4o",
|
|
16 |
|
17 |
TOKEN_LIMIT_PER_TIER_TURBO = {
|
18 |
"free": 40000,
|
19 |
-
"tier-1":
|
20 |
"tier-1(old?)": 90000,
|
21 |
-
"tier-2":
|
22 |
-
"tier-3":
|
23 |
-
"tier-4":
|
24 |
-
"tier-5-old":
|
25 |
-
"tier-5":
|
26 |
}
|
27 |
TOKEN_LIMIT_PER_TIER_GPT4 = {
|
28 |
"tier-1": 10000,
|
@@ -61,12 +61,18 @@ def get_subscription(key, session, org_list):
|
|
61 |
list_models_avai = set()
|
62 |
|
63 |
for org_in in org_list:
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
65 |
if org_in['id']:
|
66 |
if org_in['is_default']:
|
67 |
default_org = org_in['name']
|
68 |
org_description.append(f"{org_in['description']} (Created: {datetime.utcfromtimestamp(org_in['created'])} UTC" + (", personal)" if org_in['personal'] else ")"))
|
69 |
-
|
|
|
70 |
has_gpt4_32k = True if GPT_TYPES[2] in available_models else False
|
71 |
has_gpt4_32k_0314 = True if GPT_TYPES[3] in available_models else False
|
72 |
has_gpt4 = True if GPT_TYPES[1] in available_models else False
|
@@ -76,18 +82,47 @@ def get_subscription(key, session, org_list):
|
|
76 |
org.append(f"{org_in['id']} ({org_in['name']}, {org_in['title']}, {org_in['role']})")
|
77 |
if has_gpt4_32k:
|
78 |
list_models_avai.update(GPT_TYPES)
|
79 |
-
|
|
|
|
|
|
|
80 |
rpm.append(status_formated[0])
|
81 |
tpm.append(status_formated[1])
|
82 |
quota.append(status_formated[2])
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
else:
|
85 |
list_models_avai.update([GPT_TYPES[3], GPT_TYPES[1], GPT_TYPES[0]])
|
86 |
status_formated = format_status([GPT_TYPES[3], GPT_TYPES[4], GPT_TYPES[5], GPT_TYPES[1], GPT_TYPES[0]], session, headers)
|
87 |
rpm.append(status_formated[0])
|
88 |
tpm.append(status_formated[1])
|
89 |
quota.append(status_formated[2])
|
90 |
-
list_models.append(f"gpt-4-32k-0314, gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo ({len(available_models)} total)")
|
91 |
|
92 |
elif has_gpt4:
|
93 |
if org_in['id']:
|
@@ -123,6 +158,7 @@ def send_oai_completions(oai_stuff):
|
|
123 |
session = oai_stuff[0]
|
124 |
headers = oai_stuff[1]
|
125 |
model = oai_stuff[2]
|
|
|
126 |
try:
|
127 |
req_body = {"model": model, "max_tokens": 1}
|
128 |
rpm_string = ""
|
@@ -134,6 +170,8 @@ def send_oai_completions(oai_stuff):
|
|
134 |
e = result.get("error", {}).get("code", "")
|
135 |
if e == None or e == 'missing_required_parameter':
|
136 |
rpm_num = int(r.headers.get("x-ratelimit-limit-requests", 0))
|
|
|
|
|
137 |
tpm_num = int(r.headers.get('x-ratelimit-limit-tokens', 0))
|
138 |
tpm_left = int(r.headers.get('x-ratelimit-remaining-tokens', 0))
|
139 |
_rpm = '{:,}'.format(rpm_num).replace(',', ' ')
|
@@ -160,20 +198,22 @@ def send_oai_completions(oai_stuff):
|
|
160 |
rpm_string = f"0 ({model})"
|
161 |
tpm_string = f"0 ({model})"
|
162 |
quota_string = e
|
163 |
-
return rpm_string, tpm_string, quota_string
|
164 |
except Exception as e:
|
165 |
#print(e)
|
166 |
-
return "", "", ""
|
167 |
|
168 |
def format_status(list_models_avai, session, headers):
|
169 |
rpm = []
|
170 |
tpm = []
|
|
|
171 |
quota = ""
|
172 |
args = [(session, headers, model) for model in list_models_avai]
|
173 |
with concurrent.futures.ThreadPoolExecutor() as executer:
|
174 |
for result in executer.map(send_oai_completions, args):
|
175 |
rpm.append(result[0])
|
176 |
tpm.append(result[1])
|
|
|
177 |
if result[2]:
|
178 |
if quota == 'yes | custom-tier':
|
179 |
continue
|
@@ -184,7 +224,7 @@ def format_status(list_models_avai, session, headers):
|
|
184 |
for i in range(len(rpm)):
|
185 |
rpm_str += rpm[i] + (" | " if i < len(rpm)-1 else "")
|
186 |
tpm_str += tpm[i] + (" | " if i < len(rpm)-1 else "")
|
187 |
-
return rpm_str, tpm_str, quota
|
188 |
|
189 |
def check_key_tier(rpm, tpm, dict, headers):
|
190 |
dictItemsCount = len(dict)
|
@@ -216,6 +256,8 @@ def get_models(session, key, org: str = None):
|
|
216 |
try:
|
217 |
rq = session.get(f"{BASE_URL}/models", headers=headers, timeout=10)
|
218 |
avai_models = rq.json()
|
|
|
|
|
219 |
list_models = [model["id"] for model in avai_models["data"]] #[model["id"] for model in avai_models["data"] if model["id"] in GPT_TYPES]
|
220 |
except:
|
221 |
list_models = []
|
@@ -265,7 +307,6 @@ async def check_ant_rate_limit(key):
|
|
265 |
tasks = [fetch_ant(async_session, json_data) for _ in range(max_requests)]
|
266 |
results = await asyncio.gather(*tasks)
|
267 |
count = 0
|
268 |
-
#print(results)
|
269 |
for result in results:
|
270 |
if result:
|
271 |
count+=1
|
@@ -273,7 +314,6 @@ async def check_ant_rate_limit(key):
|
|
273 |
return f'{max_requests} or above'
|
274 |
return count
|
275 |
except Exception as e:
|
276 |
-
#print(e)
|
277 |
return 0
|
278 |
|
279 |
def check_ant_tier(rpm):
|
@@ -337,7 +377,6 @@ def check_key_gemini_availability(key):
|
|
337 |
else:
|
338 |
return False, None
|
339 |
except Exception as e:
|
340 |
-
#print(e)
|
341 |
return 'Error while making request.', None
|
342 |
|
343 |
def check_key_azure_availability(endpoint, api_key):
|
@@ -356,7 +395,6 @@ def check_key_azure_availability(endpoint, api_key):
|
|
356 |
models = [m["id"] for m in rq["data"] if len(m["capabilities"]["scale_types"])>0]
|
357 |
return True, models
|
358 |
except Exception as e:
|
359 |
-
#print(e)
|
360 |
return False, None
|
361 |
|
362 |
def get_azure_deploy(endpoint, api_key):
|
@@ -516,7 +554,6 @@ async def check_key_aws_availability(key):
|
|
516 |
iam = session.client('iam')
|
517 |
|
518 |
username = check_username(session)
|
519 |
-
#print(username)
|
520 |
if not username[0]:
|
521 |
return False, username[1]
|
522 |
|
@@ -578,7 +615,6 @@ def is_model_working(form_info, model_info):
|
|
578 |
return model_info['agreementAvailability']['errorMessage']
|
579 |
return "No"
|
580 |
except:
|
581 |
-
#print(form_status)
|
582 |
return "No"
|
583 |
|
584 |
async def get_model_status(session, key, secret, region, model_name, form_info):
|
@@ -621,7 +657,7 @@ async def check_bedrock_claude_status(session, key, secret):
|
|
621 |
if region and model_name:
|
622 |
if msg == "Maybe":
|
623 |
invoke_info = await send_signed_request_bedrock(session, payload, f"anthropic.{model_name}", key, secret, region)
|
624 |
-
if 'messages.0' in invoke_info.get('message'):
|
625 |
models[model_name].append(f'{region}: may be Unavailable if disabled')
|
626 |
else:
|
627 |
models[model_name].append(region)
|
|
|
16 |
|
17 |
TOKEN_LIMIT_PER_TIER_TURBO = {
|
18 |
"free": 40000,
|
19 |
+
"tier-1": 200000,
|
20 |
"tier-1(old?)": 90000,
|
21 |
+
"tier-2/tier-5-old": 2000000,
|
22 |
+
"tier-3": 4000000,
|
23 |
+
"tier-4": 10000000,
|
24 |
+
"tier-5-old": 15000000,
|
25 |
+
"tier-5": 50000000
|
26 |
}
|
27 |
TOKEN_LIMIT_PER_TIER_GPT4 = {
|
28 |
"tier-1": 10000,
|
|
|
61 |
list_models_avai = set()
|
62 |
|
63 |
for org_in in org_list:
|
64 |
+
if len(org_list) < 2: # mismatch_organization
|
65 |
+
headers = get_headers(key)
|
66 |
+
available_models = get_models(session, key)
|
67 |
+
else:
|
68 |
+
headers = get_headers(key, org_in['id'])
|
69 |
+
available_models = get_models(session, key, org_in['id'])
|
70 |
if org_in['id']:
|
71 |
if org_in['is_default']:
|
72 |
default_org = org_in['name']
|
73 |
org_description.append(f"{org_in['description']} (Created: {datetime.utcfromtimestamp(org_in['created'])} UTC" + (", personal)" if org_in['personal'] else ")"))
|
74 |
+
if 'No perm' in available_models:
|
75 |
+
available_models.extend(GPT_TYPES)
|
76 |
has_gpt4_32k = True if GPT_TYPES[2] in available_models else False
|
77 |
has_gpt4_32k_0314 = True if GPT_TYPES[3] in available_models else False
|
78 |
has_gpt4 = True if GPT_TYPES[1] in available_models else False
|
|
|
82 |
org.append(f"{org_in['id']} ({org_in['name']}, {org_in['title']}, {org_in['role']})")
|
83 |
if has_gpt4_32k:
|
84 |
list_models_avai.update(GPT_TYPES)
|
85 |
+
if 'No perm' in available_models:
|
86 |
+
status_formated = format_status(GPT_TYPES, session, headers)
|
87 |
+
else:
|
88 |
+
status_formated = format_status([GPT_TYPES[2], GPT_TYPES[4], GPT_TYPES[5], GPT_TYPES[1], GPT_TYPES[0]], session, headers)
|
89 |
rpm.append(status_formated[0])
|
90 |
tpm.append(status_formated[1])
|
91 |
quota.append(status_formated[2])
|
92 |
+
if 'No perm' in available_models:
|
93 |
+
lst_string = ""
|
94 |
+
length = len(status_formated[3])
|
95 |
+
count = 1
|
96 |
+
for k, v in status_formated[3].items():
|
97 |
+
if v:
|
98 |
+
if count < length:
|
99 |
+
lst_string += f'{k}, '
|
100 |
+
continue
|
101 |
+
else:
|
102 |
+
lst_string += f' {k} '
|
103 |
+
if v == False:
|
104 |
+
list_models_avai.remove(k)
|
105 |
+
if k == GPT_TYPES[2]:
|
106 |
+
has_gpt4_32k = False
|
107 |
+
elif k == GPT_TYPES[1]:
|
108 |
+
has_gpt4 = False
|
109 |
+
elif k == GPT_TYPES[0]:
|
110 |
+
has_35 = False
|
111 |
+
elif k == GPT_TYPES[4]:
|
112 |
+
has_4o = False
|
113 |
+
count += 1
|
114 |
+
lst_string += '(No get model permission)'
|
115 |
+
#list_models.append(f"gpt-4-32k, gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo (No get model permission)")
|
116 |
+
list_models.append(lst_string)
|
117 |
+
else:
|
118 |
+
list_models.append(f"gpt-4-32k, gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo ({len(available_models)} total)")
|
119 |
else:
|
120 |
list_models_avai.update([GPT_TYPES[3], GPT_TYPES[1], GPT_TYPES[0]])
|
121 |
status_formated = format_status([GPT_TYPES[3], GPT_TYPES[4], GPT_TYPES[5], GPT_TYPES[1], GPT_TYPES[0]], session, headers)
|
122 |
rpm.append(status_formated[0])
|
123 |
tpm.append(status_formated[1])
|
124 |
quota.append(status_formated[2])
|
125 |
+
list_models.append(f"gpt-4-32k-0314, gpt-4o, gpt-4-turbo, gpt-4, gpt-3.5-turbo ({len(available_models)} total)")
|
126 |
|
127 |
elif has_gpt4:
|
128 |
if org_in['id']:
|
|
|
158 |
session = oai_stuff[0]
|
159 |
headers = oai_stuff[1]
|
160 |
model = oai_stuff[2]
|
161 |
+
model_status = False
|
162 |
try:
|
163 |
req_body = {"model": model, "max_tokens": 1}
|
164 |
rpm_string = ""
|
|
|
170 |
e = result.get("error", {}).get("code", "")
|
171 |
if e == None or e == 'missing_required_parameter':
|
172 |
rpm_num = int(r.headers.get("x-ratelimit-limit-requests", 0))
|
173 |
+
if rpm_num > 0:
|
174 |
+
model_status = True
|
175 |
tpm_num = int(r.headers.get('x-ratelimit-limit-tokens', 0))
|
176 |
tpm_left = int(r.headers.get('x-ratelimit-remaining-tokens', 0))
|
177 |
_rpm = '{:,}'.format(rpm_num).replace(',', ' ')
|
|
|
198 |
rpm_string = f"0 ({model})"
|
199 |
tpm_string = f"0 ({model})"
|
200 |
quota_string = e
|
201 |
+
return rpm_string, tpm_string, quota_string, model, model_status
|
202 |
except Exception as e:
|
203 |
#print(e)
|
204 |
+
return "", "", "", model, model_status
|
205 |
|
206 |
def format_status(list_models_avai, session, headers):
|
207 |
rpm = []
|
208 |
tpm = []
|
209 |
+
model_status = {}
|
210 |
quota = ""
|
211 |
args = [(session, headers, model) for model in list_models_avai]
|
212 |
with concurrent.futures.ThreadPoolExecutor() as executer:
|
213 |
for result in executer.map(send_oai_completions, args):
|
214 |
rpm.append(result[0])
|
215 |
tpm.append(result[1])
|
216 |
+
model_status[result[3]] = result[4]
|
217 |
if result[2]:
|
218 |
if quota == 'yes | custom-tier':
|
219 |
continue
|
|
|
224 |
for i in range(len(rpm)):
|
225 |
rpm_str += rpm[i] + (" | " if i < len(rpm)-1 else "")
|
226 |
tpm_str += tpm[i] + (" | " if i < len(rpm)-1 else "")
|
227 |
+
return rpm_str, tpm_str, quota, model_status
|
228 |
|
229 |
def check_key_tier(rpm, tpm, dict, headers):
|
230 |
dictItemsCount = len(dict)
|
|
|
256 |
try:
|
257 |
rq = session.get(f"{BASE_URL}/models", headers=headers, timeout=10)
|
258 |
avai_models = rq.json()
|
259 |
+
if rq.status_code == 403:
|
260 |
+
return ['No perm']
|
261 |
list_models = [model["id"] for model in avai_models["data"]] #[model["id"] for model in avai_models["data"] if model["id"] in GPT_TYPES]
|
262 |
except:
|
263 |
list_models = []
|
|
|
307 |
tasks = [fetch_ant(async_session, json_data) for _ in range(max_requests)]
|
308 |
results = await asyncio.gather(*tasks)
|
309 |
count = 0
|
|
|
310 |
for result in results:
|
311 |
if result:
|
312 |
count+=1
|
|
|
314 |
return f'{max_requests} or above'
|
315 |
return count
|
316 |
except Exception as e:
|
|
|
317 |
return 0
|
318 |
|
319 |
def check_ant_tier(rpm):
|
|
|
377 |
else:
|
378 |
return False, None
|
379 |
except Exception as e:
|
|
|
380 |
return 'Error while making request.', None
|
381 |
|
382 |
def check_key_azure_availability(endpoint, api_key):
|
|
|
395 |
models = [m["id"] for m in rq["data"] if len(m["capabilities"]["scale_types"])>0]
|
396 |
return True, models
|
397 |
except Exception as e:
|
|
|
398 |
return False, None
|
399 |
|
400 |
def get_azure_deploy(endpoint, api_key):
|
|
|
554 |
iam = session.client('iam')
|
555 |
|
556 |
username = check_username(session)
|
|
|
557 |
if not username[0]:
|
558 |
return False, username[1]
|
559 |
|
|
|
615 |
return model_info['agreementAvailability']['errorMessage']
|
616 |
return "No"
|
617 |
except:
|
|
|
618 |
return "No"
|
619 |
|
620 |
async def get_model_status(session, key, secret, region, model_name, form_info):
|
|
|
657 |
if region and model_name:
|
658 |
if msg == "Maybe":
|
659 |
invoke_info = await send_signed_request_bedrock(session, payload, f"anthropic.{model_name}", key, secret, region)
|
660 |
+
if 'messages.0' in invoke_info.get('message') or 'many requests' in invoke_info.get('message'):
|
661 |
models[model_name].append(f'{region}: may be Unavailable if disabled')
|
662 |
else:
|
663 |
models[model_name].append(region)
|