cyberosa commited on
Commit
8db40cd
·
1 Parent(s): 4a24b3a

removing data folder and adding dataset import

Browse files
app.py CHANGED
@@ -1,7 +1,11 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import duckdb
 
 
 
4
  import logging
 
5
 
6
 
7
  from scripts.metrics import (
@@ -58,6 +62,82 @@ def get_logger():
58
  logger = get_logger()
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  def get_all_data():
62
  """
63
  Get parquet files from weekly stats and new generated
@@ -120,7 +200,8 @@ def prepare_data():
120
  unknown_traders,
121
  retention_df,
122
  active_traders,
123
- ) = get_all_data()
 
124
 
125
  all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date
126
 
@@ -168,6 +249,7 @@ def prepare_data():
168
  unknown_traders,
169
  retention_df,
170
  active_traders,
 
171
  )
172
 
173
 
@@ -178,6 +260,7 @@ def prepare_data():
178
  unknown_traders,
179
  raw_retention_df,
180
  active_traders,
 
181
  ) = prepare_data()
182
  retention_df = prepare_retention_dataset(
183
  retention_df=raw_retention_df, unknown_df=unknown_traders
@@ -188,21 +271,23 @@ print(max(retention_df.creation_timestamp))
188
  demo = gr.Blocks()
189
  # get weekly metrics by market creator: qs, pearl or all.
190
  weekly_metrics_by_market_creator = compute_weekly_metrics_by_market_creator(
191
- traders_data
192
  )
193
  weekly_o_metrics_by_market_creator = compute_weekly_metrics_by_market_creator(
194
- traders_data, trader_filter="Olas"
195
  )
196
  weekly_non_olas_metrics_by_market_creator = pd.DataFrame()
197
  if len(traders_data.loc[traders_data["staking"] == "non_Olas"]) > 0:
198
  weekly_non_olas_metrics_by_market_creator = (
199
- compute_weekly_metrics_by_market_creator(traders_data, trader_filter="non_Olas")
 
 
200
  )
201
  weekly_unknown_trader_metrics_by_market_creator = None
202
  if len(unknown_traders) > 0:
203
  weekly_unknown_trader_metrics_by_market_creator = (
204
  compute_weekly_metrics_by_market_creator(
205
- unknown_traders, trader_filter=None, unknown_trader=True
206
  )
207
  )
208
 
 
1
  import gradio as gr
2
  import pandas as pd
3
  import duckdb
4
+ import gzip
5
+ import shutil
6
+ import os
7
  import logging
8
+ from huggingface_hub import hf_hub_download
9
 
10
 
11
  from scripts.metrics import (
 
62
  logger = get_logger()
63
 
64
 
65
+ def load_all_data():
66
+
67
+ # all trades profitability
68
+ # Download the compressed file
69
+ gz_file_path = hf_hub_download(
70
+ repo_id="valory/Olas-predict-dataset",
71
+ filename="all_trades_profitability.parquet.gz",
72
+ repo_type="dataset",
73
+ )
74
+
75
+ parquet_file_path = gz_file_path.replace(".gz", "")
76
+
77
+ with gzip.open(gz_file_path, "rb") as f_in:
78
+ with open(parquet_file_path, "wb") as f_out:
79
+ shutil.copyfileobj(f_in, f_out)
80
+
81
+ # Now read the decompressed parquet file
82
+ df1 = pd.read_parquet(parquet_file_path)
83
+ os.remove(parquet_file_path)
84
+
85
+ # closed_markets_div
86
+ closed_markets_df = hf_hub_download(
87
+ repo_id="valory/Olas-predict-dataset",
88
+ filename="closed_markets_div.parquet",
89
+ repo_type="dataset",
90
+ )
91
+ df2 = pd.read_parquet(closed_markets_df)
92
+
93
+ # daily_info
94
+ daily_info_df = hf_hub_download(
95
+ repo_id="valory/Olas-predict-dataset",
96
+ filename="daily_info.parquet",
97
+ repo_type="dataset",
98
+ )
99
+ df3 = pd.read_parquet(daily_info_df)
100
+
101
+ # unknown traders
102
+ unknown_df = hf_hub_download(
103
+ repo_id="valory/Olas-predict-dataset",
104
+ filename="unknown_traders.parquet",
105
+ repo_type="dataset",
106
+ )
107
+ df4 = pd.read_parquet(unknown_df)
108
+
109
+ # retention activity
110
+ gz_file_path = hf_hub_download(
111
+ repo_id="valory/Olas-predict-dataset",
112
+ filename="retention_activity.parquet.gz",
113
+ repo_type="dataset",
114
+ )
115
+ parquet_file_path = gz_file_path.replace(".gz", "")
116
+
117
+ with gzip.open(gz_file_path, "rb") as f_in:
118
+ with open(parquet_file_path, "wb") as f_out:
119
+ shutil.copyfileobj(f_in, f_out)
120
+ df5 = pd.read_parquet(parquet_file_path)
121
+ os.remove(parquet_file_path)
122
+
123
+ # active_traders.parquet
124
+ active_traders_df = hf_hub_download(
125
+ repo_id="valory/Olas-predict-dataset",
126
+ filename="active_traders.parquet",
127
+ repo_type="dataset",
128
+ )
129
+ df6 = pd.read_parquet(active_traders_df)
130
+
131
+ # weekly_mech_calls.parquet
132
+ all_mech_calls_df = hf_hub_download(
133
+ repo_id="valory/Olas-predict-dataset",
134
+ filename="weekly_mech_calls.parquet",
135
+ repo_type="dataset",
136
+ )
137
+ df7 = pd.read_parquet(all_mech_calls_df)
138
+ return df1, df2, df3, df4, df5, df6, df7
139
+
140
+
141
  def get_all_data():
142
  """
143
  Get parquet files from weekly stats and new generated
 
200
  unknown_traders,
201
  retention_df,
202
  active_traders,
203
+ all_mech_calls,
204
+ ) = load_all_data()
205
 
206
  all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date
207
 
 
249
  unknown_traders,
250
  retention_df,
251
  active_traders,
252
+ all_mech_calls,
253
  )
254
 
255
 
 
260
  unknown_traders,
261
  raw_retention_df,
262
  active_traders,
263
+ all_mech_calls,
264
  ) = prepare_data()
265
  retention_df = prepare_retention_dataset(
266
  retention_df=raw_retention_df, unknown_df=unknown_traders
 
271
  demo = gr.Blocks()
272
  # get weekly metrics by market creator: qs, pearl or all.
273
  weekly_metrics_by_market_creator = compute_weekly_metrics_by_market_creator(
274
+ traders_data, all_mech_calls
275
  )
276
  weekly_o_metrics_by_market_creator = compute_weekly_metrics_by_market_creator(
277
+ traders_data, all_mech_calls, trader_filter="Olas"
278
  )
279
  weekly_non_olas_metrics_by_market_creator = pd.DataFrame()
280
  if len(traders_data.loc[traders_data["staking"] == "non_Olas"]) > 0:
281
  weekly_non_olas_metrics_by_market_creator = (
282
+ compute_weekly_metrics_by_market_creator(
283
+ traders_data, all_mech_calls, trader_filter="non_Olas"
284
+ )
285
  )
286
  weekly_unknown_trader_metrics_by_market_creator = None
287
  if len(unknown_traders) > 0:
288
  weekly_unknown_trader_metrics_by_market_creator = (
289
  compute_weekly_metrics_by_market_creator(
290
+ unknown_traders, all_mech_calls, trader_filter=None, unknown_trader=True
291
  )
292
  )
293
 
data/active_traders.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e3553505541a2a8c6173b6bf2eb6d4375e667103d3c9e6157e40b8fee1a24c6
3
- size 73479
 
 
 
 
data/all_trades_profitability.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f35e07f90679e6ef7241b3ddaffb3222e04eba8768c06d221358e5cec9a2dc7
3
- size 7968232
 
 
 
 
data/closed_markets_div.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:200149ef5bf001c3b3c10777b03887387074273dba0237333fae14421b699b88
3
- size 66133
 
 
 
 
data/daily_info.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a12927dc3b935cb3208aea26426063a569cab6c27afb588765f751af9037e7c1
3
- size 896490
 
 
 
 
data/retention_activity.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:162ffffff8161f2fabc1ccb32ba2c7d51c402fa373d998698b1a9e868ecce8c6
3
- size 10795901
 
 
 
 
data/unknown_daily_traders.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe6aaf7bc0c124ed833a9f59327685c5c4686d3114945afb3face7259780731f
3
- size 56402
 
 
 
 
data/unknown_traders.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cf113d76828b906deb1604877cdd9009074cfc7f5730d86a442329966269dd3
3
- size 274887
 
 
 
 
data/weekly_mech_calls.parquet DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:813e94540515d33c6dcef25d56bf80a8d48c97af19a4e434ba1dbc0e2144270f
3
- size 53556
 
 
 
 
requirements.txt CHANGED
@@ -10,5 +10,4 @@ pydantic
10
  pydantic_core
11
  nbformat
12
  pytz
13
- duckdb
14
  ipfshttpclient
 
10
  pydantic_core
11
  nbformat
12
  pytz
 
13
  ipfshttpclient
scripts/closed_markets_divergence.py DELETED
@@ -1,269 +0,0 @@
1
- import os
2
- import pandas as pd
3
- import numpy as np
4
- from typing import Any, Union
5
- from string import Template
6
- import requests
7
- import pickle
8
- from concurrent.futures import ThreadPoolExecutor, as_completed
9
- from tqdm import tqdm
10
- import time
11
- from datetime import datetime
12
- from utils import DATA_DIR, TMP_DIR
13
-
14
- NUM_WORKERS = 10
15
- IPFS_POLL_INTERVAL = 0.2
16
- INVALID_ANSWER_HEX = (
17
- "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
18
- )
19
- INVALID_ANSWER = -1
20
- SUBGRAPH_API_KEY = os.environ.get("SUBGRAPH_API_KEY", None)
21
- OMEN_SUBGRAPH_URL = Template(
22
- """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
23
- )
24
- get_token_amounts_query = Template(
25
- """
26
- {
27
-
28
- fpmmLiquidities(
29
- where: {
30
- fpmm_: {
31
- creator: "${fpmm_creator}",
32
- id: "${fpmm_id}",
33
- },
34
- id_gt: ""
35
- }
36
- orderBy: creationTimestamp
37
- orderDirection: asc
38
- )
39
- {
40
- id
41
- outcomeTokenAmounts
42
- creationTimestamp
43
- additionalLiquidityParameter
44
- }
45
- }
46
- """
47
- )
48
- CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
49
- PEARL_CREATOR = "0xFfc8029154ECD55ABED15BD428bA596E7D23f557"
50
- market_creators_map = {"quickstart": CREATOR, "pearl": PEARL_CREATOR}
51
- headers = {
52
- "Accept": "application/json, multipart/mixed",
53
- "Content-Type": "application/json",
54
- }
55
-
56
-
57
- def _to_content(q: str) -> dict[str, Any]:
58
- """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
59
- finalized_query = {
60
- "query": q,
61
- "variables": None,
62
- "extensions": {"headers": None},
63
- }
64
- return finalized_query
65
-
66
-
67
- def collect_liquidity_info(
68
- index: int, fpmm_id: str, market_creator: str
69
- ) -> dict[str, Any]:
70
- omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
71
- market_creator_id = market_creators_map[market_creator]
72
- query = get_token_amounts_query.substitute(
73
- fpmm_creator=market_creator_id.lower(),
74
- fpmm_id=fpmm_id,
75
- )
76
- content_json = _to_content(query)
77
- # print(f"Executing liquidity query {query}")
78
- res = requests.post(omen_subgraph, headers=headers, json=content_json)
79
- result_json = res.json()
80
- tokens_info = result_json.get("data", {}).get("fpmmLiquidities", [])
81
- if not tokens_info:
82
- return None
83
-
84
- # the last item is the final information of the market
85
- last_info = tokens_info[-1]
86
- token_amounts = [int(x) for x in last_info["outcomeTokenAmounts"]]
87
- time.sleep(IPFS_POLL_INTERVAL)
88
- return {fpmm_id: token_amounts}
89
-
90
-
91
- def convert_hex_to_int(x: Union[str, float]) -> Union[int, float]:
92
- """Convert hex to int"""
93
- if isinstance(x, float):
94
- return np.nan
95
- if isinstance(x, str):
96
- if x == INVALID_ANSWER_HEX:
97
- return "invalid"
98
- return "yes" if int(x, 16) == 0 else "no"
99
-
100
-
101
- def get_closed_markets():
102
- print("Reading parquet file with closed markets data from trades")
103
- try:
104
- markets = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
105
- except Exception:
106
- print("Error reading the parquet file")
107
-
108
- columns_of_interest = [
109
- "fpmm.currentAnswer",
110
- "fpmm.id",
111
- "fpmm.openingTimestamp",
112
- "market_creator",
113
- ]
114
- markets = markets[columns_of_interest]
115
- markets.rename(
116
- columns={
117
- "fpmm.currentAnswer": "currentAnswer",
118
- "fpmm.openingTimestamp": "openingTimestamp",
119
- "fpmm.id": "id",
120
- },
121
- inplace=True,
122
- )
123
- markets = markets.drop_duplicates(subset=["id"], keep="last")
124
- # remove invalid answers
125
- markets = markets.loc[markets["currentAnswer"] != INVALID_ANSWER_HEX]
126
- markets["currentAnswer"] = markets["currentAnswer"].apply(
127
- lambda x: convert_hex_to_int(x)
128
- )
129
- markets.dropna(inplace=True)
130
- markets["opening_datetime"] = markets["openingTimestamp"].apply(
131
- lambda x: datetime.fromtimestamp(int(x))
132
- )
133
- markets = markets.sort_values(by="opening_datetime", ascending=True)
134
- return markets
135
-
136
-
137
- def kl_divergence(P, Q):
138
- """
139
- Compute KL divergence for a single sample with two prob distributions.
140
-
141
- :param P: True distribution)
142
- :param Q: Approximating distribution)
143
- :return: KL divergence value
144
- """
145
- # Review edge cases
146
- if P[0] == Q[0]:
147
- return 0.0
148
- # If P is complete opposite of Q, divergence is some max value.
149
- # Here set to 20--allows for Q [\mu, 1-\mu] or Q[1-\mu, \mu] where \mu = 10^-8
150
- if P[0] == Q[1]:
151
- return 20
152
-
153
- nonzero = P > 0.0
154
- # Compute KL divergence
155
- kl_div = np.sum(P[nonzero] * np.log(P[nonzero] / Q[nonzero]))
156
-
157
- return kl_div
158
-
159
-
160
- def market_KL_divergence(market_row: pd.DataFrame) -> float:
161
- """Function to compute the divergence based on the formula
162
- Formula in https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence"""
163
- current_answer = market_row.currentAnswer # "yes", "no"
164
- approx_prob = market_row.first_outcome_prob
165
- true_prob = 1.0 # for yes outcome
166
- if current_answer == "no":
167
- true_prob = 0.0 # = 0% for yes outcome and 100% for no
168
-
169
- # we have only one sample, the final probability based on tokens
170
- # Ensure probabilities sum to 1
171
- P = np.array([true_prob, 1 - true_prob])
172
- Q = np.array([approx_prob, 1 - approx_prob])
173
- return kl_divergence(P, Q)
174
-
175
-
176
- def off_by_values(market_row: pd.DataFrame) -> float:
177
- current_answer = market_row.currentAnswer # "yes", "no"
178
- approx_prob = market_row.first_outcome_prob
179
- true_prob = 1.0 # for yes outcome
180
- if current_answer == "no":
181
- true_prob = 0.0 # = 0% for yes outcome and 100% for no
182
-
183
- # we have only one sample, the final probability based on tokens
184
- # Ensure probabilities sum to 1
185
- P = np.array([true_prob, 1 - true_prob])
186
- Q = np.array([approx_prob, 1 - approx_prob])
187
- return abs(P[0] - Q[0]) * 100.0
188
-
189
-
190
- def compute_tokens_prob(token_amounts: list) -> list:
191
- first_token_amounts = token_amounts[0]
192
- second_token_amounts = token_amounts[1]
193
- total_tokens = first_token_amounts + second_token_amounts
194
- first_token_prob = 1 - round((first_token_amounts / total_tokens), 4)
195
- return [first_token_prob, 1 - first_token_prob]
196
-
197
-
198
- def prepare_closed_markets_data():
199
- closed_markets = get_closed_markets()
200
- closed_markets["first_outcome_prob"] = -1.0
201
- closed_markets["second_outcome_prob"] = -1.0
202
- total_markets = len(closed_markets)
203
- markets_no_info = []
204
- no_info = 0
205
- with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
206
- futures = []
207
- for i in range(total_markets):
208
- futures.append(
209
- executor.submit(
210
- collect_liquidity_info,
211
- i,
212
- closed_markets.iloc[i].id,
213
- closed_markets.iloc[i].market_creator,
214
- )
215
- )
216
- markets_with_info = 0
217
- for future in tqdm(
218
- as_completed(futures),
219
- total=len(futures),
220
- desc=f"Fetching Market liquidity info",
221
- ):
222
- token_amounts_dict = future.result()
223
- if token_amounts_dict:
224
- fpmm_id, token_amounts = token_amounts_dict.popitem()
225
- if token_amounts:
226
- tokens_prob = compute_tokens_prob(token_amounts)
227
- closed_markets.loc[
228
- closed_markets["id"] == fpmm_id, "first_outcome_prob"
229
- ] = tokens_prob[0]
230
- closed_markets.loc[
231
- closed_markets["id"] == fpmm_id, "second_outcome_prob"
232
- ] = tokens_prob[1]
233
- markets_with_info += 1
234
- else:
235
- tqdm.write(f"Skipping market with no liquidity info")
236
- markets_no_info.append(i)
237
- else:
238
- tqdm.write(f"Skipping market with no liquidity info")
239
- no_info += 1
240
-
241
- print(f"Markets with info = {markets_with_info}")
242
- # Removing markets with no liq info
243
- closed_markets = closed_markets.loc[closed_markets["first_outcome_prob"] != -1.0]
244
- print(
245
- f"Finished computing all markets liquidity info. Final length = {len(closed_markets)}"
246
- )
247
- if len(markets_no_info) > 0:
248
- print(
249
- f"There were {len(markets_no_info)} markets with no liquidity info. Printing some index of the dataframe"
250
- )
251
- with open("no_liq_info.pickle", "wb") as file:
252
- pickle.dump(markets_no_info, file)
253
- print(markets_no_info[:1])
254
- print(closed_markets.head())
255
- # Add the Kullback–Leibler divergence values
256
- print("Computing Kullback–Leibler (KL) divergence")
257
- closed_markets["kl_divergence"] = closed_markets.apply(
258
- lambda x: market_KL_divergence(x), axis=1
259
- )
260
- closed_markets["off_by_perc"] = closed_markets.apply(
261
- lambda x: off_by_values(x), axis=1
262
- )
263
- closed_markets.to_parquet(DATA_DIR / "closed_markets_div.parquet", index=False)
264
- print("Finished preparing final dataset for visualization")
265
- print(closed_markets.head())
266
-
267
-
268
- if __name__ == "__main__":
269
- prepare_closed_markets_data()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/metrics.py CHANGED
@@ -1,16 +1,38 @@
1
  import pandas as pd
2
  from tqdm import tqdm
3
- from scripts.num_mech_calls import (
4
- get_weekly_total_mech_calls,
5
- )
6
  from scripts.utils import get_next_week
7
 
8
  DEFAULT_MECH_FEE = 0.01 # xDAI
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def compute_metrics(
12
  trader_address: str,
13
  trader_data: pd.DataFrame,
 
14
  live_metrics: bool = False,
15
  unknown_trader: bool = False,
16
  ) -> dict:
@@ -29,7 +51,9 @@ def compute_metrics(
29
  # num of mech calls is always zero
30
  total_nr_mech_calls_all_markets = 0
31
  else:
32
- total_nr_mech_calls_all_markets = get_weekly_total_mech_calls(trader_data)
 
 
33
 
34
  agg_metrics["bet_amount"] = total_bet_amounts
35
  agg_metrics["nr_mech_calls"] = total_nr_mech_calls_all_markets
@@ -56,6 +80,7 @@ def compute_metrics(
56
  def compute_trader_metrics_by_market_creator(
57
  trader_address: str,
58
  traders_data: pd.DataFrame,
 
59
  market_creator: str = "all",
60
  live_metrics: bool = False,
61
  unknown_trader: bool = False,
@@ -78,19 +103,28 @@ def compute_trader_metrics_by_market_creator(
78
  return {} # No Data
79
 
80
  metrics = compute_metrics(
81
- trader_address, filtered_traders_data, live_metrics, unknown_trader
 
 
 
 
82
  )
83
  return metrics
84
 
85
 
86
  def merge_trader_weekly_metrics(
87
- trader: str, weekly_data: pd.DataFrame, week: str, unknown_trader: bool = False
 
 
 
 
88
  ) -> pd.DataFrame:
89
  trader_metrics = []
90
  # computation as specification 1 for all types of markets
91
  weekly_metrics_all = compute_trader_metrics_by_market_creator(
92
  trader,
93
  weekly_data,
 
94
  market_creator="all",
95
  live_metrics=False,
96
  unknown_trader=unknown_trader,
@@ -103,6 +137,7 @@ def merge_trader_weekly_metrics(
103
  weekly_metrics_qs = compute_trader_metrics_by_market_creator(
104
  trader,
105
  weekly_data,
 
106
  market_creator="quickstart",
107
  live_metrics=False,
108
  unknown_trader=unknown_trader,
@@ -115,6 +150,7 @@ def merge_trader_weekly_metrics(
115
  weekly_metrics_pearl = compute_trader_metrics_by_market_creator(
116
  trader,
117
  weekly_data,
 
118
  market_creator="pearl",
119
  live_metrics=False,
120
  unknown_trader=unknown_trader,
@@ -128,12 +164,19 @@ def merge_trader_weekly_metrics(
128
 
129
 
130
  def merge_trader_daily_metrics(
131
- trader: str, daily_data: pd.DataFrame, day: str, live_metrics: bool = False
 
 
 
132
  ) -> pd.DataFrame:
133
  trader_metrics = []
134
  # computation as specification 1 for all types of markets
135
  daily_metrics_all = compute_trader_metrics_by_market_creator(
136
- trader, daily_data, market_creator="all", live_metrics=live_metrics
 
 
 
 
137
  )
138
  daily_metrics_all["creation_date"] = day
139
  # staking label is at the trader level
@@ -142,7 +185,11 @@ def merge_trader_daily_metrics(
142
 
143
  # computation as specification 1 for quickstart markets
144
  daily_metrics_qs = compute_trader_metrics_by_market_creator(
145
- trader, daily_data, market_creator="quickstart", live_metrics=live_metrics
 
 
 
 
146
  )
147
  if len(daily_metrics_qs) > 0:
148
  daily_metrics_qs["creation_date"] = day
@@ -150,7 +197,11 @@ def merge_trader_daily_metrics(
150
  trader_metrics.append(daily_metrics_qs)
151
  # computation as specification 1 for pearl markets
152
  daily_metrics_pearl = compute_trader_metrics_by_market_creator(
153
- trader, daily_data, market_creator="pearl", live_metrics=live_metrics
 
 
 
 
154
  )
155
  if len(daily_metrics_pearl) > 0:
156
  daily_metrics_pearl["creation_date"] = day
@@ -178,7 +229,10 @@ def win_metrics_trader_level(weekly_data):
178
 
179
 
180
  def compute_weekly_metrics_by_market_creator(
181
- traders_data: pd.DataFrame, trader_filter: str = None, unknown_trader: bool = False
 
 
 
182
  ) -> pd.DataFrame:
183
  """Function to compute the metrics at the trader level per week
184
  and with different categories by market creator"""
@@ -198,18 +252,22 @@ def compute_weekly_metrics_by_market_creator(
198
  if trader_filter is None:
199
  contents.append(
200
  merge_trader_weekly_metrics(
201
- trader, weekly_data, week, unknown_trader
202
  )
203
  )
204
  elif trader_filter == "Olas":
205
  filtered_data = weekly_data.loc[weekly_data["staking"] != "non_Olas"]
206
  contents.append(
207
- merge_trader_weekly_metrics(trader, filtered_data, week)
 
 
208
  )
209
  else: # non_Olas traders
210
  filtered_data = weekly_data.loc[weekly_data["staking"] == "non_Olas"]
211
  contents.append(
212
- merge_trader_weekly_metrics(trader, filtered_data, week)
 
 
213
  )
214
 
215
  print("End computing all weekly metrics by market creator")
 
1
  import pandas as pd
2
  from tqdm import tqdm
 
 
 
3
  from scripts.utils import get_next_week
4
 
5
  DEFAULT_MECH_FEE = 0.01 # xDAI
6
 
7
 
8
+ def get_weekly_total_mech_calls(
9
+ trader_data: pd.DataFrame, all_mech_calls_df: pd.DataFrame
10
+ ) -> int:
11
+ """Function to compute the total weekly number of mech calls for all markets
12
+ that the trader bet upon"""
13
+
14
+ trading_weeks = trader_data.month_year_week.unique()
15
+ trader_address = trader_data.trader_address.unique()[0]
16
+ if len(trading_weeks) > 1:
17
+ raise ValueError("The trader data should contain only one week information")
18
+ trading_week = trading_weeks[0]
19
+ try:
20
+ return all_mech_calls_df.loc[
21
+ (all_mech_calls_df["trader_address"] == trader_address)
22
+ & (all_mech_calls_df["month_year_week"] == trading_week),
23
+ "total_mech_calls",
24
+ ].iloc[0]
25
+ except Exception as e:
26
+ print(
27
+ f"Error getting the number of mech calls for the trader {trader_address} and week {trading_week}"
28
+ )
29
+ return 280 # average number 40 mech calls in 7 days
30
+
31
+
32
  def compute_metrics(
33
  trader_address: str,
34
  trader_data: pd.DataFrame,
35
+ all_mech_calls: pd.DataFrame,
36
  live_metrics: bool = False,
37
  unknown_trader: bool = False,
38
  ) -> dict:
 
51
  # num of mech calls is always zero
52
  total_nr_mech_calls_all_markets = 0
53
  else:
54
+ total_nr_mech_calls_all_markets = get_weekly_total_mech_calls(
55
+ trader_data=trader_data, all_mech_calls_df=all_mech_calls
56
+ )
57
 
58
  agg_metrics["bet_amount"] = total_bet_amounts
59
  agg_metrics["nr_mech_calls"] = total_nr_mech_calls_all_markets
 
80
  def compute_trader_metrics_by_market_creator(
81
  trader_address: str,
82
  traders_data: pd.DataFrame,
83
+ all_mech_calls: pd.DataFrame,
84
  market_creator: str = "all",
85
  live_metrics: bool = False,
86
  unknown_trader: bool = False,
 
103
  return {} # No Data
104
 
105
  metrics = compute_metrics(
106
+ trader_address,
107
+ filtered_traders_data,
108
+ all_mech_calls,
109
+ live_metrics,
110
+ unknown_trader,
111
  )
112
  return metrics
113
 
114
 
115
  def merge_trader_weekly_metrics(
116
+ trader: str,
117
+ weekly_data: pd.DataFrame,
118
+ all_mech_calls: pd.DataFrame,
119
+ week: str,
120
+ unknown_trader: bool = False,
121
  ) -> pd.DataFrame:
122
  trader_metrics = []
123
  # computation as specification 1 for all types of markets
124
  weekly_metrics_all = compute_trader_metrics_by_market_creator(
125
  trader,
126
  weekly_data,
127
+ all_mech_calls=all_mech_calls,
128
  market_creator="all",
129
  live_metrics=False,
130
  unknown_trader=unknown_trader,
 
137
  weekly_metrics_qs = compute_trader_metrics_by_market_creator(
138
  trader,
139
  weekly_data,
140
+ all_mech_calls=all_mech_calls,
141
  market_creator="quickstart",
142
  live_metrics=False,
143
  unknown_trader=unknown_trader,
 
150
  weekly_metrics_pearl = compute_trader_metrics_by_market_creator(
151
  trader,
152
  weekly_data,
153
+ all_mech_calls=all_mech_calls,
154
  market_creator="pearl",
155
  live_metrics=False,
156
  unknown_trader=unknown_trader,
 
164
 
165
 
166
  def merge_trader_daily_metrics(
167
+ trader: str,
168
+ daily_data: pd.DataFrame,
169
+ day: str,
170
+ live_metrics: bool = False,
171
  ) -> pd.DataFrame:
172
  trader_metrics = []
173
  # computation as specification 1 for all types of markets
174
  daily_metrics_all = compute_trader_metrics_by_market_creator(
175
+ trader,
176
+ daily_data,
177
+ all_mech_calls=None,
178
+ market_creator="all",
179
+ live_metrics=live_metrics,
180
  )
181
  daily_metrics_all["creation_date"] = day
182
  # staking label is at the trader level
 
185
 
186
  # computation as specification 1 for quickstart markets
187
  daily_metrics_qs = compute_trader_metrics_by_market_creator(
188
+ trader,
189
+ daily_data,
190
+ all_mech_calls=None,
191
+ market_creator="quickstart",
192
+ live_metrics=live_metrics,
193
  )
194
  if len(daily_metrics_qs) > 0:
195
  daily_metrics_qs["creation_date"] = day
 
197
  trader_metrics.append(daily_metrics_qs)
198
  # computation as specification 1 for pearl markets
199
  daily_metrics_pearl = compute_trader_metrics_by_market_creator(
200
+ trader,
201
+ daily_data,
202
+ all_mech_calls=None,
203
+ market_creator="pearl",
204
+ live_metrics=live_metrics,
205
  )
206
  if len(daily_metrics_pearl) > 0:
207
  daily_metrics_pearl["creation_date"] = day
 
229
 
230
 
231
  def compute_weekly_metrics_by_market_creator(
232
+ traders_data: pd.DataFrame,
233
+ all_mech_calls: pd.DataFrame,
234
+ trader_filter: str = None,
235
+ unknown_trader: bool = False,
236
  ) -> pd.DataFrame:
237
  """Function to compute the metrics at the trader level per week
238
  and with different categories by market creator"""
 
252
  if trader_filter is None:
253
  contents.append(
254
  merge_trader_weekly_metrics(
255
+ trader, weekly_data, all_mech_calls, week, unknown_trader
256
  )
257
  )
258
  elif trader_filter == "Olas":
259
  filtered_data = weekly_data.loc[weekly_data["staking"] != "non_Olas"]
260
  contents.append(
261
+ merge_trader_weekly_metrics(
262
+ trader, filtered_data, all_mech_calls, week
263
+ )
264
  )
265
  else: # non_Olas traders
266
  filtered_data = weekly_data.loc[weekly_data["staking"] == "non_Olas"]
267
  contents.append(
268
+ merge_trader_weekly_metrics(
269
+ trader, filtered_data, all_mech_calls, week
270
+ )
271
  )
272
 
273
  print("End computing all weekly metrics by market creator")
scripts/num_mech_calls.py DELETED
@@ -1,120 +0,0 @@
1
- import pandas as pd
2
-
3
- try:
4
- from utils import DATA_DIR, TMP_DIR
5
- except ImportError:
6
- from scripts.utils import DATA_DIR, TMP_DIR
7
-
8
- from datetime import datetime, timezone
9
- from tqdm import tqdm
10
-
11
-
12
- def transform_to_datetime(x):
13
- return datetime.fromtimestamp(int(x), tz=timezone.utc)
14
-
15
-
16
- def get_weekly_total_mech_calls(trader_data: pd.DataFrame) -> int:
17
- """Function to compute the total weekly number of mech calls for all markets
18
- that the trader bet upon"""
19
- try:
20
- all_mech_calls_df = pd.read_parquet(DATA_DIR / "weekly_mech_calls.parquet")
21
- except Exception:
22
- print("Error reading the weekly_mech_calls file")
23
-
24
- trading_weeks = trader_data.month_year_week.unique()
25
- trader_address = trader_data.trader_address.unique()[0]
26
- if len(trading_weeks) > 1:
27
- raise ValueError("The trader data should contain only one week information")
28
- trading_week = trading_weeks[0]
29
- try:
30
- return all_mech_calls_df.loc[
31
- (all_mech_calls_df["trader_address"] == trader_address)
32
- & (all_mech_calls_df["month_year_week"] == trading_week),
33
- "total_mech_calls",
34
- ].iloc[0]
35
- except Exception as e:
36
- print(
37
- f"Error getting the number of mech calls for the trader {trader_address} and week {trading_week}"
38
- )
39
- return 280 # average number 40 mech calls in 7 days
40
-
41
-
42
- def compute_weekly_total_mech_calls(
43
- trader: str, week: str, weekly_trades: pd.DataFrame, weekly_tools: pd.DataFrame
44
- ) -> dict:
45
- weekly_total_mech_calls_dict = {}
46
- weekly_total_mech_calls_dict["trader_address"] = trader
47
- weekly_total_mech_calls_dict["month_year_week"] = week
48
- weekly_total_mech_calls_dict["total_trades"] = len(weekly_trades)
49
- weekly_total_mech_calls_dict["total_mech_calls"] = len(weekly_tools)
50
- return weekly_total_mech_calls_dict
51
-
52
-
53
- def compute_total_mech_calls():
54
- """Function to compute the total number of mech calls for all traders and all markets
55
- at a weekly level"""
56
- try:
57
- print("Reading tools file")
58
- tools = pd.read_parquet(TMP_DIR / "tools.parquet")
59
- tools["request_time"] = pd.to_datetime(tools["request_time"])
60
- tools["request_date"] = tools["request_time"].dt.date
61
- tools = tools.sort_values(by="request_time", ascending=True)
62
- tools["month_year_week"] = (
63
- tools["request_time"]
64
- .dt.to_period("W")
65
- .dt.start_time.dt.strftime("%b-%d-%Y")
66
- )
67
-
68
- except Exception as e:
69
- print(f"Error updating the invalid trades parquet {e}")
70
-
71
- print("Reading trades weekly info file")
72
- fpmmTrades = pd.read_parquet(TMP_DIR / "fpmmTrades.parquet")
73
- try:
74
- fpmmTrades["creationTimestamp"] = fpmmTrades["creationTimestamp"].apply(
75
- lambda x: transform_to_datetime(x)
76
- )
77
- except Exception as e:
78
- print(f"Transformation not needed")
79
-
80
- fpmmTrades["creation_timestamp"] = pd.to_datetime(fpmmTrades["creationTimestamp"])
81
- fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
82
- fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
83
- fpmmTrades["month_year_week"] = (
84
- fpmmTrades["creation_timestamp"]
85
- .dt.to_period("W")
86
- .dt.start_time.dt.strftime("%b-%d-%Y")
87
- )
88
-
89
- nr_traders = len(fpmmTrades["trader_address"].unique())
90
- all_mech_calls = []
91
- for trader in tqdm(
92
- fpmmTrades["trader_address"].unique(),
93
- total=nr_traders,
94
- desc="creating weekly mech calls dataframe",
95
- ):
96
- # compute the mech calls estimations for each trader
97
- all_trades = fpmmTrades[fpmmTrades["trader_address"] == trader]
98
- all_tools = tools[tools["trader_address"] == trader]
99
- weeks = fpmmTrades.month_year_week.unique()
100
-
101
- for week in weeks:
102
- weekly_trades = all_trades.loc[all_trades["month_year_week"] == week]
103
- weekly_tools = all_tools.loc[all_tools["month_year_week"] == week]
104
-
105
- weekly_mech_calls_dict = compute_weekly_total_mech_calls(
106
- trader, week, weekly_trades, weekly_tools
107
- )
108
- all_mech_calls.append(weekly_mech_calls_dict)
109
-
110
- all_mech_calls_df: pd.DataFrame = pd.DataFrame.from_dict(
111
- all_mech_calls, orient="columns"
112
- )
113
- print("Saving weekly_mech_calls.parquet file")
114
- print(all_mech_calls_df.total_mech_calls.describe())
115
-
116
- all_mech_calls_df.to_parquet(DATA_DIR / "weekly_mech_calls.parquet", index=False)
117
-
118
-
119
- if __name__ == "__main__":
120
- compute_total_mech_calls()