shlomihod commited on
Commit
c8de088
·
1 Parent(s): 8cf313d

change api call to together async

Browse files
Files changed (2) hide show
  1. app.py +51 -31
  2. requirements.txt +1 -1
app.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
  import os
6
  import string
7
 
 
8
  import cohere
9
  import numpy as np
10
  import openai
@@ -19,7 +20,6 @@ from huggingface_hub.utils import (
19
  RepositoryNotFoundError,
20
  )
21
  from imblearn.under_sampling import RandomUnderSampler
22
- import requests
23
  from sklearn.metrics import (
24
  ConfusionMatrixDisplay,
25
  accuracy_score,
@@ -128,6 +128,7 @@ def build_api_call_function(model, hf_token=None, openai_api_key=None):
128
  @retry(
129
  wait=wait_random_exponential(min=RETRY_MIN_WAIT, max=RETRY_MAX_WAIT),
130
  stop=stop_after_attempt(RETRY_MAX_ATTEMPTS),
 
131
  )
132
  async def api_call_function(prompt, generation_config):
133
  if model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"):
@@ -168,35 +169,40 @@ def build_api_call_function(model, hf_token=None, openai_api_key=None):
168
  @retry(
169
  wait=wait_random_exponential(min=RETRY_MIN_WAIT, max=RETRY_MAX_WAIT),
170
  stop=stop_after_attempt(RETRY_MAX_ATTEMPTS),
 
171
  )
172
  async def api_call_function(prompt, generation_config):
173
- res = requests.post(
174
- TOGETHER_API_ENDPOINT,
175
- json={
176
- "model": model,
177
- "prompt": prompt,
178
- "temperature": generation_config["temperature"]
179
- if generation_config["do_sample"]
180
- else 0,
181
- "top_p": generation_config["top_p"]
182
- if generation_config["do_sample"]
183
- else 1,
184
- "top_k": generation_config["top_k"]
185
- if generation_config["do_sample"]
186
- else 0,
187
- "max_tokens": generation_config["max_new_tokens"],
188
- "stop": generation_config["stop_sequences"],
189
- },
190
- headers={
191
- "Authorization": f"Bearer {TOGETHER_API_KEY}",
192
- "User-Agent": "FM",
193
- },
194
- )
195
 
196
- output = res.json()["output"]["choices"][0]["text"]
197
- length = None
 
 
198
 
199
- return output, length
 
 
 
 
 
 
 
 
 
200
 
201
  elif model.startswith("cohere"):
202
  co = cohere.Client(COHERE_API_KEY)
@@ -205,6 +211,7 @@ def build_api_call_function(model, hf_token=None, openai_api_key=None):
205
  @retry(
206
  wait=wait_random_exponential(min=RETRY_MIN_WAIT, max=RETRY_MAX_WAIT),
207
  stop=stop_after_attempt(RETRY_MAX_ATTEMPTS),
 
208
  )
209
  def api_call_function(prompt, generation_config):
210
  response = co.generate(
@@ -229,6 +236,7 @@ def build_api_call_function(model, hf_token=None, openai_api_key=None):
229
  @retry(
230
  wait=wait_random_exponential(min=RETRY_MIN_WAIT, max=RETRY_MAX_WAIT),
231
  stop=stop_after_attempt(RETRY_MAX_ATTEMPTS),
 
232
  )
233
  async def api_call_function(prompt, generation_config):
234
  hf_client = AsyncInferenceClient(token=hf_token, model=model)
@@ -460,6 +468,8 @@ def measure(dataset, outputs, labels, label_column, input_columns, search_row):
460
  | dataset[input_columns].to_dict("list")
461
  )
462
 
 
 
463
  acc = accuracy_score(evaluation_df["annotation"], evaluation_df["inference"])
464
  bacc = balanced_accuracy_score(
465
  evaluation_df["annotation"], evaluation_df["inference"]
@@ -476,6 +486,7 @@ def measure(dataset, outputs, labels, label_column, input_columns, search_row):
476
  cm_display.figure_.autofmt_xdate(rotation=45)
477
 
478
  metrics = {
 
479
  "accuracy": acc,
480
  "balanced_accuracy": bacc,
481
  "mcc": mcc,
@@ -535,7 +546,9 @@ def main():
535
 
536
  if "api_call_function" not in st.session_state:
537
  st.session_state["api_call_function"] = build_api_call_function(
538
- model=HF_MODEL, hf_token=HF_TOKEN, openai_api_key=OPENAI_API_KEY
 
 
539
  )
540
 
541
  if "train_dataset" not in st.session_state:
@@ -682,7 +695,9 @@ def main():
682
  st.session_state["test_size"] = test_size
683
 
684
  st.session_state["api_call_function"] = build_api_call_function(
685
- model=model, hf_token=HF_TOKEN, openai_api_key=OPENAI_API_KEY
 
 
686
  )
687
 
688
  st.session_state["generation_config"] = generation_config
@@ -771,17 +786,22 @@ def main():
771
  st.error(e)
772
  st.stop()
773
 
774
- num_metric_cols = 1 if balancing else 3
775
  cols = st.columns(num_metric_cols)
776
  with cols[0]:
777
  st.metric("Accuracy", f"{100 * evaluation['accuracy']:.0f}%")
 
 
 
 
 
778
  if not balancing:
779
- with cols[1]:
780
  st.metric(
781
  "Balanced Accuracy",
782
  f"{100 * evaluation['balanced_accuracy']:.0f}%",
783
  )
784
- with cols[2]:
785
  st.metric("MCC", f"{evaluation['mcc']:.2f}")
786
 
787
  st.markdown("## Confusion Matrix")
 
5
  import os
6
  import string
7
 
8
+ import aiohttp
9
  import cohere
10
  import numpy as np
11
  import openai
 
20
  RepositoryNotFoundError,
21
  )
22
  from imblearn.under_sampling import RandomUnderSampler
 
23
  from sklearn.metrics import (
24
  ConfusionMatrixDisplay,
25
  accuracy_score,
 
128
  @retry(
129
  wait=wait_random_exponential(min=RETRY_MIN_WAIT, max=RETRY_MAX_WAIT),
130
  stop=stop_after_attempt(RETRY_MAX_ATTEMPTS),
131
+ reraise=True,
132
  )
133
  async def api_call_function(prompt, generation_config):
134
  if model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"):
 
169
  @retry(
170
  wait=wait_random_exponential(min=RETRY_MIN_WAIT, max=RETRY_MAX_WAIT),
171
  stop=stop_after_attempt(RETRY_MAX_ATTEMPTS),
172
+ reraise=True,
173
  )
174
  async def api_call_function(prompt, generation_config):
175
+ payload = {
176
+ "model": model,
177
+ "prompt": prompt,
178
+ "temperature": generation_config["temperature"]
179
+ if generation_config["do_sample"]
180
+ else 0,
181
+ "top_p": generation_config["top_p"]
182
+ if generation_config["do_sample"]
183
+ else 1,
184
+ "top_k": generation_config["top_k"]
185
+ if generation_config["do_sample"]
186
+ else 0,
187
+ "max_tokens": generation_config["max_new_tokens"],
188
+ "stop": generation_config["stop_sequences"],
189
+ }
 
 
 
 
 
 
 
190
 
191
+ headers = {
192
+ "Authorization": f"Bearer {TOGETHER_API_KEY}",
193
+ "User-Agent": "FM",
194
+ }
195
 
196
+ LOGGER.info(f"{payload=}")
197
+
198
+ async with aiohttp.ClientSession() as session:
199
+ async with session.post(
200
+ TOGETHER_API_ENDPOINT, json=payload, headers=headers
201
+ ) as response:
202
+ output = (await response.json())["output"]["choices"][0]["text"]
203
+ length = None
204
+
205
+ return output, length
206
 
207
  elif model.startswith("cohere"):
208
  co = cohere.Client(COHERE_API_KEY)
 
211
  @retry(
212
  wait=wait_random_exponential(min=RETRY_MIN_WAIT, max=RETRY_MAX_WAIT),
213
  stop=stop_after_attempt(RETRY_MAX_ATTEMPTS),
214
+ reraise=True,
215
  )
216
  def api_call_function(prompt, generation_config):
217
  response = co.generate(
 
236
  @retry(
237
  wait=wait_random_exponential(min=RETRY_MIN_WAIT, max=RETRY_MAX_WAIT),
238
  stop=stop_after_attempt(RETRY_MAX_ATTEMPTS),
239
+ reraise=True,
240
  )
241
  async def api_call_function(prompt, generation_config):
242
  hf_client = AsyncInferenceClient(token=hf_token, model=model)
 
468
  | dataset[input_columns].to_dict("list")
469
  )
470
 
471
+ unknown_proportion = (evaluation_df["inference"] == UNKNOWN_LABEL).mean()
472
+
473
  acc = accuracy_score(evaluation_df["annotation"], evaluation_df["inference"])
474
  bacc = balanced_accuracy_score(
475
  evaluation_df["annotation"], evaluation_df["inference"]
 
486
  cm_display.figure_.autofmt_xdate(rotation=45)
487
 
488
  metrics = {
489
+ "unknown_proportion": unknown_proportion,
490
  "accuracy": acc,
491
  "balanced_accuracy": bacc,
492
  "mcc": mcc,
 
546
 
547
  if "api_call_function" not in st.session_state:
548
  st.session_state["api_call_function"] = build_api_call_function(
549
+ model=HF_MODEL,
550
+ hf_token=HF_TOKEN,
551
+ openai_api_key=OPENAI_API_KEY,
552
  )
553
 
554
  if "train_dataset" not in st.session_state:
 
695
  st.session_state["test_size"] = test_size
696
 
697
  st.session_state["api_call_function"] = build_api_call_function(
698
+ model=model,
699
+ hf_token=HF_TOKEN,
700
+ openai_api_key=OPENAI_API_KEY,
701
  )
702
 
703
  st.session_state["generation_config"] = generation_config
 
786
  st.error(e)
787
  st.stop()
788
 
789
+ num_metric_cols = 2 if balancing else 4
790
  cols = st.columns(num_metric_cols)
791
  with cols[0]:
792
  st.metric("Accuracy", f"{100 * evaluation['accuracy']:.0f}%")
793
+ with cols[1]:
794
+ st.metric(
795
+ "Unknown Proportion",
796
+ f"{100 * evaluation['unknown_proportion']:.0f}%",
797
+ )
798
  if not balancing:
799
+ with cols[2]:
800
  st.metric(
801
  "Balanced Accuracy",
802
  f"{100 * evaluation['balanced_accuracy']:.0f}%",
803
  )
804
+ with cols[3]:
805
  st.metric("MCC", f"{evaluation['mcc']:.2f}")
806
 
807
  st.markdown("## Confusion Matrix")
requirements.txt CHANGED
@@ -1,3 +1,4 @@
 
1
  cohere
2
  datasets
3
  huggingface_hub[inference]
@@ -6,7 +7,6 @@ numpy
6
  pandas
7
  matplotlib
8
  openai
9
- requests
10
  scikit-learn
11
  spacy
12
  streamlit
 
1
+ aiohttp
2
  cohere
3
  datasets
4
  huggingface_hub[inference]
 
7
  pandas
8
  matplotlib
9
  openai
 
10
  scikit-learn
11
  spacy
12
  streamlit