cyberosa commited on
Commit
10cf834
·
1 Parent(s): 8d5221e

new scripts and new live data

Browse files
live_data/markets_live_data.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:234dbf259fdc32ce709e52ccf7a571af34045398c6853a579251bbe3ee682ac7
3
- size 52537
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6284205c09e9328083c363b6e7217bff4d318f9d1e366f191693e200f8b5810a
3
+ size 55976
scripts/closed_markets_data.py ADDED
@@ -0,0 +1,678 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ # ------------------------------------------------------------------------------
3
+ #
4
+ # Copyright 2024 Valory AG
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+ # ------------------------------------------------------------------------------
19
+
20
+ import functools
21
+ import warnings
22
+ from typing import Optional, Generator, Callable
23
+ import os
24
+ import logging
25
+ import pandas as pd
26
+ from collections import defaultdict
27
+ from typing import Any, Optional, Tuple, List, Dict
28
+ from tqdm import tqdm
29
+ from datetime import datetime, timedelta, UTC
30
+ import requests
31
+ from math import prod
32
+
33
+ from utils import (
34
+ SUBGRAPH_API_KEY,
35
+ measure_execution_time,
36
+ OMEN_SUBGRAPH_URL,
37
+ CREATOR,
38
+ BATCH_SIZE,
39
+ DATA_DIR,
40
+ _to_content,
41
+ )
42
+ from queries import (
43
+ FPMMS_CLOSED_MARKETS_QUERY,
44
+ ID_FIELD,
45
+ DATA_FIELD,
46
+ QUERY_FIELD,
47
+ TITLE_FIELD,
48
+ OPENING_TIMESTAMP_FIELD,
49
+ CREATION_TIMESTAMP_FIELD,
50
+ LIQUIDITY_FIELD,
51
+ LIQUIDIY_MEASURE_FIELD,
52
+ TOKEN_AMOUNTS_FIELD,
53
+ ERROR_FIELD,
54
+ FPMMS_FIELD,
55
+ omen_market_sorted_trades_query,
56
+ get_initial_amount_tokens_query,
57
+ )
58
+
59
+ headers = {
60
+ "Accept": "application/json, multipart/mixed",
61
+ "Content-Type": "application/json",
62
+ }
63
+ logger = logging.getLogger(__name__)
64
+
65
+ ResponseItemType = List[Dict[str, str]]
66
+ SubgraphResponseType = Dict[str, ResponseItemType]
67
+
68
+
69
+ class RetriesExceeded(Exception):
70
+ """Exception to raise when retries are exceeded during data-fetching."""
71
+
72
+ def __init__(
73
+ self, msg="Maximum retries were exceeded while trying to fetch the data!"
74
+ ):
75
+ super().__init__(msg)
76
+
77
+
78
+ def hacky_retry(func: Callable, n_retries: int = 3) -> Callable:
79
+ """Create a hacky retry strategy.
80
+ Unfortunately, we cannot use `requests.packages.urllib3.util.retry.Retry`,
81
+ because the subgraph does not return the appropriate status codes in case of failure.
82
+ Instead, it always returns code 200. Thus, we raise exceptions manually inside `make_request`,
83
+ catch those exceptions in the hacky retry decorator and try again.
84
+ Finally, if the allowed number of retries is exceeded, we raise a custom `RetriesExceeded` exception.
85
+
86
+ :param func: the input request function.
87
+ :param n_retries: the maximum allowed number of retries.
88
+ :return: The request method with the hacky retry strategy applied.
89
+ """
90
+
91
+ @functools.wraps(func)
92
+ def wrapper_hacky_retry(*args, **kwargs) -> SubgraphResponseType:
93
+ """The wrapper for the hacky retry.
94
+
95
+ :return: a response dictionary.
96
+ """
97
+ retried = 0
98
+
99
+ while retried <= n_retries:
100
+ try:
101
+ if retried > 0:
102
+ warnings.warn(f"Retrying {retried}/{n_retries}...")
103
+
104
+ return func(*args, **kwargs)
105
+ except (ValueError, ConnectionError) as e:
106
+ warnings.warn(e.args[0])
107
+ finally:
108
+ retried += 1
109
+
110
+ raise RetriesExceeded()
111
+
112
+ return wrapper_hacky_retry
113
+
114
+
115
+ @hacky_retry
116
+ def query_subgraph(url: str, query: str, key: str) -> SubgraphResponseType:
117
+ """Query a subgraph.
118
+
119
+ Args:
120
+ url: the subgraph's URL.
121
+ query: the query to be used.
122
+ key: the key to use in order to access the required data.
123
+
124
+ Returns:
125
+ a response dictionary.
126
+ """
127
+ content = {QUERY_FIELD: query}
128
+ headers = {
129
+ "Accept": "application/json",
130
+ "Content-Type": "application/json",
131
+ }
132
+ res = requests.post(url, json=content, headers=headers)
133
+
134
+ if res.status_code != 200:
135
+ raise ConnectionError(
136
+ "Something went wrong while trying to communicate with the subgraph "
137
+ f"(Error: {res.status_code})!\n{res.text}"
138
+ )
139
+
140
+ body = res.json()
141
+ if ERROR_FIELD in body.keys():
142
+ raise ValueError(f"The given query is not correct: {body[ERROR_FIELD]}")
143
+
144
+ data = body.get(DATA_FIELD, {}).get(key, None)
145
+ if data is None:
146
+ raise ValueError(f"Unknown error encountered!\nRaw response: \n{body}")
147
+
148
+ return data
149
+
150
+
151
+ def fpmms_fetcher(
152
+ start_timestamp: int, end_timestamp: int
153
+ ) -> Generator[ResponseItemType, int, None]:
154
+ """An indefinite fetcher for the FPMMs."""
155
+
156
+ omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
157
+ logger.debug(f"omen_subgraph = {omen_subgraph}")
158
+ while True:
159
+ fpmm_id = yield
160
+ fpmms_query = FPMMS_CLOSED_MARKETS_QUERY.substitute(
161
+ creator=CREATOR,
162
+ fpmm_id=fpmm_id,
163
+ start_timestamp=start_timestamp,
164
+ end_timestamp=end_timestamp,
165
+ fpmms_field=FPMMS_FIELD,
166
+ id_field=ID_FIELD,
167
+ title_field=TITLE_FIELD,
168
+ opening_timestamp_field=OPENING_TIMESTAMP_FIELD,
169
+ creation_timestamp_field=CREATION_TIMESTAMP_FIELD,
170
+ liquidity_field=LIQUIDITY_FIELD,
171
+ liquidity_measure_field=LIQUIDIY_MEASURE_FIELD,
172
+ token_amounts_field=TOKEN_AMOUNTS_FIELD,
173
+ )
174
+ logger.debug(f"Executing query {fpmms_query}")
175
+ yield query_subgraph(omen_subgraph, fpmms_query, FPMMS_FIELD)
176
+
177
+
178
+ def fetch_fpmms(start_timestamp: int, end_timestamp: int) -> pd.DataFrame:
179
+ """Fetch all the fpmms of the creator."""
180
+ logger.info("Fetching all markets")
181
+ latest_id = ""
182
+ fpmms = []
183
+ fetcher = fpmms_fetcher(start_timestamp, end_timestamp)
184
+ for _ in tqdm(fetcher, unit="fpmms", unit_scale=BATCH_SIZE):
185
+ batch = fetcher.send(latest_id)
186
+ if len(batch) == 0:
187
+ logger.debug("no data")
188
+ break
189
+
190
+ logger.debug(f"length of the data received = {len(batch)}")
191
+ latest_id = batch[-1].get(ID_FIELD, "")
192
+ if latest_id == "":
193
+ raise ValueError(f"Unexpected data format retrieved: {batch}")
194
+
195
+ fpmms.extend(batch)
196
+
197
+ logger.info("Finished collecting data")
198
+ return pd.DataFrame(fpmms)
199
+
200
+
201
+ def get_first_token_perc(row):
202
+ """To compute from the total amount of tokens bought how many are for first outcome"""
203
+ if row["total_tokens"] == 0.0:
204
+ return 0
205
+
206
+ return 100.0 - round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
207
+
208
+
209
+ def get_second_token_perc(row):
210
+ """To compute from the total amount of tokens bought how many are for second outcome"""
211
+ if row["total_tokens"] == 0.0:
212
+ return 0
213
+ return 100.0 - round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
214
+
215
+
216
+ def remove_testing_market(markets_data: pd.DataFrame) -> pd.DataFrame:
217
+ return markets_data.loc[
218
+ markets_data["id"] != "0xbfa584b29891941c8950ce975c1f7fa595ce1b99"
219
+ ]
220
+
221
+
222
+ def transform_fpmms(fpmms: pd.DataFrame) -> pd.DataFrame:
223
+ """Transform an FPMMS dataframe."""
224
+
225
+ fpmms["liquidityMeasure"] = fpmms["liquidityMeasure"].apply(lambda x: int(x))
226
+ fpmms["creationTimestamp"] = fpmms["creationTimestamp"].apply(
227
+ lambda x: datetime.fromtimestamp(int(x))
228
+ )
229
+ fpmms["openingTimestamp"] = fpmms["openingTimestamp"].apply(
230
+ lambda x: datetime.fromtimestamp(int(x))
231
+ )
232
+ fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
233
+ fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
234
+ fpmms["total_tokens"] = fpmms.apply(
235
+ lambda x: x.token_first_amount + x.token_second_amount, axis=1
236
+ )
237
+ fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
238
+ fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
239
+ fpmms.drop(
240
+ columns=[
241
+ "token_first_amount",
242
+ "token_second_amount",
243
+ "total_tokens",
244
+ ],
245
+ inplace=True,
246
+ )
247
+ return fpmms
248
+
249
+
250
+ @measure_execution_time
251
+ def collect_closed_markets(filename: Optional[str]) -> pd.DataFrame:
252
+ """Fetch, process, store and return the markets as a Dataframe."""
253
+
254
+ logger.info("fetching new markets information")
255
+ today = datetime.now(UTC)
256
+ delta = timedelta(days=150) # 4 months approx
257
+ ref_day = today - delta
258
+
259
+ start_timestamp = int(ref_day.timestamp()) # seconds
260
+ end_timestamp = int((today - timedelta(days=20)).timestamp())
261
+ fpmms = fetch_fpmms(start_timestamp, end_timestamp)
262
+ logger.debug("Finished markets data collection")
263
+ logger.debug(fpmms.info())
264
+
265
+ logger.info("transforming data")
266
+ fpmms = transform_fpmms(fpmms)
267
+
268
+ logger.info("saving the data")
269
+ # logger.debug(fpmms.info())
270
+ if filename:
271
+ fpmms.to_parquet(DATA_DIR / filename, index=False)
272
+
273
+ return fpmms
274
+
275
+
276
+ def collect_liquidity_info(fpmm_id: str, logger) -> dict[str, Any]:
277
+ omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
278
+ query = get_initial_amount_tokens_query.substitute(
279
+ fpmm_creator=CREATOR.lower(),
280
+ fpmm_id=fpmm_id,
281
+ )
282
+ content_json = _to_content(query)
283
+ logger.info(f"Executing liquidity query {query}")
284
+ res = requests.post(omen_subgraph, headers=headers, json=content_json)
285
+ result_json = res.json()
286
+ tokens_info = result_json.get("data", {}).get("fpmmLiquidities", [])
287
+ if not tokens_info:
288
+ return None
289
+
290
+ # the first item is the initial information of the market
291
+ first_info = tokens_info[0]
292
+ token_amounts = [int(x) for x in first_info["outcomeTokenAmounts"]]
293
+ return token_amounts
294
+
295
+
296
+ @measure_execution_time
297
+ def collect_trades_subgraph(fpmm_id: str, logger) -> dict[str, Any]:
298
+ """Query the subgraph."""
299
+ omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
300
+ logger.info(f"omen_subgraph = {omen_subgraph}")
301
+ grouped_results = defaultdict(list)
302
+ id_gt = ""
303
+
304
+ while True:
305
+ query = omen_market_sorted_trades_query.substitute(
306
+ fpmm_creator=CREATOR.lower(),
307
+ first=BATCH_SIZE,
308
+ id_gt=id_gt,
309
+ fpmm_id=fpmm_id,
310
+ )
311
+ # logger.debug(f"query for the omen to collect trades {query}")
312
+ content_json = _to_content(query)
313
+
314
+ res = requests.post(omen_subgraph, headers=headers, json=content_json)
315
+ result_json = res.json()
316
+ user_trades = result_json.get("data", {}).get("fpmmTrades", [])
317
+
318
+ if not user_trades:
319
+ break
320
+
321
+ for trade in user_trades:
322
+ fpmm_id = trade.get("fpmm", {}).get("id")
323
+ grouped_results[fpmm_id].append(trade)
324
+
325
+ id_gt = user_trades[len(user_trades) - 1]["id"]
326
+
327
+ all_results = {
328
+ "data": {
329
+ "fpmmTrades": [
330
+ trade
331
+ for trades_list in grouped_results.values()
332
+ for trade in trades_list
333
+ ]
334
+ }
335
+ }
336
+
337
+ return all_results
338
+
339
+
340
+ def compute_price_amounts(
341
+ trades_outcome_0: pd.DataFrame,
342
+ trades_outcome_1: pd.DataFrame,
343
+ prev_usd_outcome_0: float = None,
344
+ prev_usd_outcome_1: float = None,
345
+ ) -> Tuple:
346
+ total_usd_outcome_0 = trades_outcome_0.collateralAmountUSD.sum()
347
+ total_usd_outcome_1 = trades_outcome_1.collateralAmountUSD.sum()
348
+ if prev_usd_outcome_0 is not None:
349
+ total_usd_outcome_0 += prev_usd_outcome_0
350
+ total_usd_outcome_1 += prev_usd_outcome_1
351
+ return total_usd_outcome_0, total_usd_outcome_1
352
+
353
+
354
+ def compute_price_weighted_distributions(
355
+ total_usd_outcome_0, total_usd_outcome_1, logger
356
+ ) -> Tuple:
357
+ total_usd = total_usd_outcome_0 + total_usd_outcome_1
358
+ percentage_pwc_outcome_0 = round((total_usd_outcome_0 / total_usd) * 100, 2)
359
+ logger.debug(f"total amount for outcome 0 = {total_usd_outcome_0}")
360
+ logger.debug(f"total amount for outcome 1 = {total_usd_outcome_1}")
361
+ return percentage_pwc_outcome_0, 100 - percentage_pwc_outcome_0
362
+
363
+
364
+ def get_updated_amounts(
365
+ k: int,
366
+ price_amount_constant: float,
367
+ tokens_outcome_0: int,
368
+ tokens_outcome_1: int,
369
+ bet_per_token: int,
370
+ outcome_index: int,
371
+ old_price_outcome: float,
372
+ new_price_outcome: float,
373
+ first_trade: bool,
374
+ ):
375
+ if first_trade:
376
+ # first trade ever, same price both tokens
377
+ other_shares = bet_per_token / old_price_outcome
378
+ if outcome_index == 0:
379
+ # other is outcome 1
380
+ new_other_tokens_amount = tokens_outcome_1 + other_shares
381
+ return int(k / new_other_tokens_amount), new_other_tokens_amount
382
+ else:
383
+ # other is outcome 0
384
+ new_other_tokens_amount = tokens_outcome_0 + other_shares
385
+ return new_other_tokens_amount, int(k / new_other_tokens_amount)
386
+
387
+ # we can use the price_amount constant to get the new final amount of tokens in the market
388
+ # for the selected outcome
389
+ new_voted_tokens_amount = price_amount_constant / new_price_outcome
390
+ new_other_tokens_amount = k / new_voted_tokens_amount
391
+ if outcome_index == 0:
392
+ return new_voted_tokens_amount, new_other_tokens_amount
393
+ # outcome_index 1
394
+ return new_other_tokens_amount, new_voted_tokens_amount
395
+
396
+
397
+ def compute_token_amounts(
398
+ logger,
399
+ initial_amount_tokens: list,
400
+ initial_token_prices: list,
401
+ selected_trades: pd.DataFrame,
402
+ prev_tokens_first_outcome: int,
403
+ prev_tokens_second_outcome: int,
404
+ ) -> Tuple:
405
+ """Function to compute the final distribution of tokens depending on the trades done that day"""
406
+ # traverse the trades and adjust the initial amount of tokens
407
+ tokens_outcome_0, tokens_outcome_1 = initial_amount_tokens
408
+ # market constant
409
+ k = prod(initial_amount_tokens)
410
+ # price * token_amount constant, used to adjust the price of tokens
411
+ price_amount_constant = initial_amount_tokens[0] * initial_token_prices[0]
412
+ if prev_tokens_first_outcome != 0:
413
+ # reset the variables
414
+ tokens_outcome_0 = prev_tokens_first_outcome
415
+ tokens_outcome_1 = prev_tokens_second_outcome
416
+
417
+ for i in range(len(selected_trades)):
418
+ trade = selected_trades.iloc[i]
419
+ outcome_index = int(trade.outcomeIndex)
420
+ collateral_amount = int(trade.collateralAmount)
421
+ fee_amount = int(trade.feeAmount)
422
+ net_amount = collateral_amount - fee_amount
423
+ bet_per_token = int(net_amount / 2)
424
+ old_price_outcome = float(trade.oldOutcomeTokenMarginalPrice)
425
+ new_price_outcome = float(trade.outcomeTokenMarginalPrice)
426
+ tokens_outcome_0, tokens_outcome_1 = get_updated_amounts(
427
+ k,
428
+ price_amount_constant,
429
+ tokens_outcome_0,
430
+ tokens_outcome_1,
431
+ bet_per_token,
432
+ outcome_index,
433
+ old_price_outcome,
434
+ new_price_outcome,
435
+ prev_tokens_first_outcome == 0,
436
+ )
437
+
438
+ logger.info(f"New token amounts [{tokens_outcome_0}, {tokens_outcome_1}]")
439
+ return tokens_outcome_0, tokens_outcome_1
440
+
441
+
442
+ def compute_tokens_distribution(tokens_outcome_0, tokens_outcome_1) -> Tuple:
443
+ total_amount_tokens = tokens_outcome_0 + tokens_outcome_1
444
+ # the inverse of the final tokens distribution is the emergent probability
445
+ tokens_perc_first_outcome = 100.0 - round(
446
+ (tokens_outcome_0 / total_amount_tokens) * 100, 2
447
+ )
448
+ return tokens_perc_first_outcome, 100 - tokens_perc_first_outcome
449
+
450
+
451
+ def compute_daily_gap_metric(
452
+ market_trades: pd.DataFrame, market_id: str, logger
453
+ ) -> pd.DataFrame:
454
+
455
+ new_market_daily_metrics = pd.DataFrame()
456
+ # group by days, they are already sorted by creationtimestamp
457
+ market_trades["creation_date"] = market_trades["creationTimestamp"].apply(
458
+ lambda x: x.date()
459
+ )
460
+
461
+ # list of days to traverse
462
+ market_days = list(market_trades.creation_date.unique())
463
+ new_market_daily_metrics["market_day"] = market_days
464
+ new_market_daily_metrics["market_id"] = market_id
465
+ new_market_daily_metrics["daily_trades"] = 0
466
+ new_market_daily_metrics["mean_trade_size"] = 0
467
+ new_market_daily_metrics["total_bet_amount"] = 0.0
468
+ new_market_daily_metrics["dist_gap_perc"] = 0.0
469
+ new_market_daily_metrics["perc_total_trades"] = 0.0
470
+ new_market_daily_metrics["creator_id"] = (
471
+ "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
472
+ )
473
+
474
+ logger.info(f"Total days to compute the metrics = {len(market_days)}")
475
+ total_nr_trades = len(market_trades)
476
+
477
+ # compute the initial amount of tokens for that market
478
+ initial_amount_tokens = collect_liquidity_info(market_id, logger)
479
+ if initial_amount_tokens is None:
480
+ logger.info("No initial tokens information for this market. Skipping it")
481
+ return None
482
+
483
+ logger.info(f"Initial amount of tokens {initial_amount_tokens}")
484
+ total_cum_trades = 0
485
+ # traverse the market days
486
+ for i, row in tqdm(
487
+ new_market_daily_metrics.iterrows(),
488
+ total=len(new_market_daily_metrics),
489
+ desc="Computing daily distributions",
490
+ ):
491
+ day = row["market_day"]
492
+ selected_trades = market_trades.loc[market_trades["creation_date"] == day]
493
+ logger.info(
494
+ f"Adding aggregated metrics for day={day}. Number of trades = {len(selected_trades)}"
495
+ )
496
+ daily_trades = len(selected_trades)
497
+ total_cum_trades += daily_trades
498
+ new_market_daily_metrics.at[i, "daily_trades"] = daily_trades
499
+ mean_trade_size = selected_trades.collateralAmountUSD.mean()
500
+ new_market_daily_metrics.at[i, "mean_trade_size"] = round(mean_trade_size, 3)
501
+ total_bet_amount = sum(selected_trades.collateralAmountUSD)
502
+ new_market_daily_metrics.at[i, "total_bet_amount"] = total_bet_amount
503
+ new_market_daily_metrics.at[i, "perc_total_trades"] = round(
504
+ (total_cum_trades / total_nr_trades) * 100, 2
505
+ )
506
+ # trades for outcome 0
507
+ trades_outcome_0 = selected_trades.loc[selected_trades["outcomeIndex"] == 0]
508
+ logger.debug(f"Total trades for outcome 0 = {len(trades_outcome_0)}")
509
+ # trades for outcome 1
510
+ trades_outcome_1 = selected_trades.loc[selected_trades["outcomeIndex"] == 1]
511
+ logger.debug(f"Total trades for outcome 1 = {len(trades_outcome_1)}")
512
+ # initial prices for tokens in the market
513
+ first_trade = selected_trades.iloc[0]
514
+ price_token_0 = float(
515
+ first_trade.oldOutcomeTokenMarginalPrice
516
+ ) # same price both
517
+ initial_token_prices = [price_token_0, price_token_0]
518
+ try:
519
+
520
+ # token amounts and price amounts
521
+ if i == 0:
522
+ # first day no previous information
523
+ tokens_outcome_0, tokens_outcome_1 = compute_token_amounts(
524
+ logger,
525
+ initial_amount_tokens,
526
+ initial_token_prices,
527
+ selected_trades,
528
+ 0,
529
+ 0,
530
+ )
531
+ total_usd_outcome_0, total_usd_outcome_1 = compute_price_amounts(
532
+ trades_outcome_0, trades_outcome_1
533
+ )
534
+ else:
535
+ tokens_outcome_0, tokens_outcome_1 = compute_token_amounts(
536
+ logger,
537
+ initial_amount_tokens,
538
+ initial_token_prices,
539
+ selected_trades,
540
+ prev_tokens_outcome_0,
541
+ prev_tokens_outcome_1,
542
+ )
543
+ total_usd_outcome_0, total_usd_outcome_1 = compute_price_amounts(
544
+ trades_outcome_0,
545
+ trades_outcome_1,
546
+ prev_usd_outcome_0,
547
+ prev_usd_outcome_1,
548
+ )
549
+
550
+ prev_tokens_outcome_0 = tokens_outcome_0
551
+ prev_tokens_outcome_1 = tokens_outcome_1
552
+ prev_usd_outcome_0 = total_usd_outcome_0
553
+ prev_usd_outcome_1 = total_usd_outcome_1
554
+ # price weighted distribution at the end of the day
555
+ pwc_first_outcome, pwc_second_outcome = (
556
+ compute_price_weighted_distributions(
557
+ total_usd_outcome_0, total_usd_outcome_1, logger
558
+ )
559
+ )
560
+ logger.info(
561
+ f"pwc distributions eod = {pwc_first_outcome, pwc_second_outcome}"
562
+ )
563
+ # distribution based on initial token amounts
564
+ tokens_perc_first_outcome, tokens_perc_second_outcome = (
565
+ compute_tokens_distribution(
566
+ tokens_outcome_0,
567
+ tokens_outcome_1,
568
+ )
569
+ )
570
+ logger.info(
571
+ f"token distributions eod = {tokens_perc_first_outcome, tokens_perc_second_outcome}"
572
+ )
573
+ # bought tokens distribution at the end of the day
574
+ # bought_tokens_first, bought_tokens_second = compute_bought_tokens_distribution(
575
+ # trades_outcome_0, trades_outcome_1
576
+ # )
577
+ metric = abs(tokens_perc_first_outcome - pwc_first_outcome)
578
+ logger.info(f"eod {day}: gap metric {metric}")
579
+ new_market_daily_metrics.at[i, "dist_gap_perc"] = metric
580
+ except Exception as e:
581
+ logger.error(e)
582
+ logger.warning(
583
+ f"It was not possible to compute the distributions for the day {day} and market id {market_id}"
584
+ )
585
+ continue
586
+ return new_market_daily_metrics
587
+
588
+
589
+ def collect_trades_info(filename, logger):
590
+ if os.path.exists(DATA_DIR / filename):
591
+ fpmms = pd.read_parquet(DATA_DIR / filename)
592
+ else:
593
+ raise FileNotFoundError("Parquet file with trades not found")
594
+
595
+ fpmms = remove_testing_market(fpmms)
596
+ # Iterate over the markets
597
+ markets_with_daily_metrics = None
598
+
599
+ for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
600
+ # get the market id
601
+ market_id = fpmm["id"]
602
+ logger.info(f"Adding trades information for the market {market_id}")
603
+ try:
604
+ market_trades_json = collect_trades_subgraph(
605
+ fpmm_id=market_id, logger=logger
606
+ )
607
+ market_trades = pd.DataFrame(market_trades_json["data"]["fpmmTrades"])
608
+ if len(market_trades) == 0:
609
+ logger.warning("No trades for this market")
610
+ continue
611
+
612
+ market_trades.outcomeIndex = pd.to_numeric(
613
+ market_trades.outcomeIndex, errors="coerce"
614
+ )
615
+ fpmms.at[i, "total_trades"] = len(market_trades)
616
+ except Exception:
617
+ fpmms.at[i, "total_trades"] = 0
618
+ continue
619
+
620
+ if len(market_trades) > 0:
621
+ # formatting types
622
+ market_trades["collateralAmountUSD"] = (
623
+ market_trades.collateralAmountUSD.apply(lambda x: round(float(x), 3))
624
+ )
625
+ market_trades["outcomeTokensTraded"] = (
626
+ market_trades.outcomeTokensTraded.apply(lambda x: int(x))
627
+ )
628
+ market_trades["oldOutcomeTokenMarginalPrice"] = (
629
+ market_trades.oldOutcomeTokenMarginalPrice.apply(
630
+ lambda x: round(float(x), 9)
631
+ )
632
+ )
633
+ market_trades["outcomeTokenMarginalPrice"] = (
634
+ market_trades.outcomeTokenMarginalPrice.apply(
635
+ lambda x: round(float(x), 9)
636
+ )
637
+ )
638
+ # datetimes
639
+ market_trades["creationTimestamp"] = market_trades[
640
+ "creationTimestamp"
641
+ ].apply(lambda x: datetime.fromtimestamp(int(x)))
642
+
643
+ new_data = compute_daily_gap_metric(market_trades, market_id, logger)
644
+ if new_data is None:
645
+ continue
646
+
647
+ if markets_with_daily_metrics is None:
648
+ markets_with_daily_metrics = new_data
649
+ else:
650
+ markets_with_daily_metrics = pd.concat(
651
+ [markets_with_daily_metrics, new_data], axis=0, ignore_index=True
652
+ )
653
+ # saving the trades information into the file
654
+ logger.info(
655
+ f"Saving trades info into the parquet file. Length of file = {len(markets_with_daily_metrics)}"
656
+ )
657
+ markets_with_daily_metrics.to_parquet(
658
+ DATA_DIR / "markets_with_daily_metrics.parquet", index=False
659
+ )
660
+
661
+ fpmms.to_parquet(DATA_DIR / filename, index=False)
662
+
663
+
664
+ if __name__ == "__main__":
665
+
666
+ logger.setLevel(logging.DEBUG)
667
+ # stream handler and formatter
668
+ stream_handler = logging.StreamHandler()
669
+ stream_handler.setLevel(logging.DEBUG)
670
+ formatter = logging.Formatter(
671
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
672
+ )
673
+ stream_handler.setFormatter(formatter)
674
+ logging.basicConfig(
675
+ handlers=[logging.FileHandler("live_markets.log"), stream_handler]
676
+ )
677
+ # collect_closed_markets("closed_markets_data.parquet")
678
+ collect_trades_info("closed_markets_data.parquet", logger)
scripts/queries.py CHANGED
@@ -63,6 +63,7 @@ FPMMS_WITH_TOKENS_QUERY = Template(
63
  ${title_field}
64
  ${opening_timestamp_field}
65
  ${creation_timestamp_field}
 
66
  ${liquidity_field}
67
  ${liquidity_measure_field}
68
  ${token_amounts_field}
@@ -71,6 +72,33 @@ FPMMS_WITH_TOKENS_QUERY = Template(
71
  """
72
  )
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  omen_market_trades_query = Template(
76
  """
@@ -116,3 +144,61 @@ omen_market_trades_query = Template(
116
  }
117
  """
118
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  ${title_field}
64
  ${opening_timestamp_field}
65
  ${creation_timestamp_field}
66
+ resolutionTimestamp
67
  ${liquidity_field}
68
  ${liquidity_measure_field}
69
  ${token_amounts_field}
 
72
  """
73
  )
74
 
75
+ FPMMS_CLOSED_MARKETS_QUERY = Template(
76
+ """
77
+ {
78
+ ${fpmms_field}(
79
+ where: {
80
+ creator: "${creator}",
81
+ id_gt: "${fpmm_id}",
82
+ creationTimestamp_gt: ${start_timestamp}
83
+ creationTimestamp_lt: ${end_timestamp}
84
+ },
85
+ orderBy: ${id_field}
86
+ orderDirection: asc
87
+ first: 1000
88
+ ){
89
+ ${id_field}
90
+ isPendingArbitration
91
+
92
+ ${title_field}
93
+ ${opening_timestamp_field}
94
+ ${creation_timestamp_field}
95
+ ${liquidity_field}
96
+ ${liquidity_measure_field}
97
+ ${token_amounts_field}
98
+ }
99
+ }
100
+ """
101
+ )
102
 
103
  omen_market_trades_query = Template(
104
  """
 
144
  }
145
  """
146
  )
147
+
148
+ omen_market_sorted_trades_query = Template(
149
+ """
150
+ {
151
+ fpmmTrades(
152
+ where: {
153
+ type: Buy,
154
+ fpmm_: {
155
+ creator: "${fpmm_creator}",
156
+ id: "${fpmm_id}",
157
+ },
158
+ id_gt: "${id_gt}"
159
+ }
160
+ first: 1000
161
+ orderBy: creationTimestamp
162
+ orderDirection: asc
163
+ ) {
164
+ id
165
+ title
166
+ collateralToken
167
+ outcomeTokenMarginalPrice
168
+ oldOutcomeTokenMarginalPrice
169
+ creationTimestamp
170
+ collateralAmount
171
+ collateralAmountUSD
172
+ feeAmount
173
+ outcomeIndex
174
+ outcomeTokensTraded
175
+ transactionHash
176
+ }
177
+ }
178
+ """
179
+ )
180
+
181
+ get_initial_amount_tokens_query = Template(
182
+ """
183
+ {
184
+
185
+ fpmmLiquidities(
186
+ where: {
187
+ fpmm_: {
188
+ creator: "${fpmm_creator}",
189
+ id: "${fpmm_id}",
190
+ },
191
+ id_gt: ""
192
+ }
193
+ orderBy: creationTimestamp
194
+ orderDirection: asc
195
+ )
196
+ {
197
+ id
198
+ outcomeTokenAmounts
199
+ creationTimestamp
200
+ additionalLiquidityParameter
201
+ }
202
+ }
203
+ """
204
+ )
scripts/utils.py CHANGED
@@ -17,7 +17,6 @@ from json.decoder import JSONDecodeError
17
 
18
  CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
19
  BATCH_SIZE = 1000
20
- # OMEN_SUBGRAPH = "https://api.thegraph.com/subgraphs/name/protofire/omen-xdai"
21
  OMEN_SUBGRAPH_URL = Template(
22
  """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
23
  )
 
17
 
18
  CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
19
  BATCH_SIZE = 1000
 
20
  OMEN_SUBGRAPH_URL = Template(
21
  """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
22
  )