cyberosa commited on
Commit
35989d5
·
1 Parent(s): ec75e86

data update and based tokens graphs

Browse files
app.py CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
5
  import seaborn as sns
6
  import duckdb
7
  import logging
 
8
 
9
 
10
  def get_logger():
@@ -40,21 +41,50 @@ def prepare_data():
40
  return df
41
 
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  demo = gr.Blocks()
44
  markets_data = prepare_data()
45
 
46
  with demo:
47
  gr.HTML("<h1>Olas Predict Live Markets </h1>")
48
  gr.Markdown("This app shows the distributions of predictions on the live markets.")
49
-
50
  with gr.Tabs():
51
- with gr.TabItem("💹Probability distributions"):
 
 
 
52
  with gr.Row():
53
- gr.Markdown("# Daily probability distribution of live markets")
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  with gr.Row():
55
- # TODO
56
- print("WIP")
57
- gr.Markdown("Under construction (WIP)")
58
- # daily_distributions = plot_daily_market_distributions(markets_data)
59
 
60
  demo.queue(default_concurrency_limit=40).launch()
 
5
  import seaborn as sns
6
  import duckdb
7
  import logging
8
+ from tabs.tokens_votes_dist import get_based_tokens_distribution
9
 
10
 
11
  def get_logger():
 
41
  return df
42
 
43
 
44
+ def get_extreme_cases(live_fpmms: pd.DataFrame):
45
+ """Function to return the id of the best and worst case according to the dist gap metric"""
46
+ # select markets with more than 1 sample
47
+ samples_per_market = (
48
+ live_fpmms[["id", "tokens_timestamp"]].groupby("id").count().reset_index()
49
+ )
50
+ markets_with_multiple_samples = list(
51
+ samples_per_market.loc[samples_per_market["tokens_timestamp"] > 1, "id"].values
52
+ )
53
+ selected_markets = live_fpmms.loc[
54
+ live_fpmms["id"].isin(markets_with_multiple_samples)
55
+ ]
56
+ selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
57
+ return selected_markets.iloc[0].id, selected_markets.iloc[-1].id
58
+
59
+
60
  demo = gr.Blocks()
61
  markets_data = prepare_data()
62
 
63
  with demo:
64
  gr.HTML("<h1>Olas Predict Live Markets </h1>")
65
  gr.Markdown("This app shows the distributions of predictions on the live markets.")
66
+ best_market_id, worst_market_id = get_extreme_cases(markets_data)
67
  with gr.Tabs():
68
+ with gr.TabItem("💹 Probability distributions of live markets"):
69
+ with gr.Row():
70
+ gr.Markdown("# Evolution of outcomes probability based on tokens")
71
+
72
  with gr.Row():
73
+ gr.Markdown("Best case: a market with a low distribution gap metric")
74
+ with gr.Row():
75
+ best_market_tokens_dist = get_based_tokens_distribution(
76
+ best_market_id, markets_data
77
+ )
78
+
79
+ with gr.Row():
80
+ gr.Markdown("Worst case: a market with a high distribution gap metric")
81
+
82
+ with gr.Row():
83
+ worst_market_tokens_dist = get_based_tokens_distribution(
84
+ worst_market_id, markets_data
85
+ )
86
+
87
  with gr.Row():
88
+ gr.Markdown("# Evolution of outcomes probability based on votes")
 
 
 
89
 
90
  demo.queue(default_concurrency_limit=40).launch()
live_data/markets_live_data.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e29014d0396e1174838cbe49097cd73c21b8cd2168aeea23ba98240f16c96996
3
- size 22391
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f1aefc2dd441883aca8a95db7715a511b763a5b486307a903dcea30df7ef828
3
+ size 27422
live_data/markets_live_data_sample.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7f78eff92f06483f12d9acf36494488732e39c22098ce2f3e21e6d44efb88af
3
- size 25464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e601332794f53f1c65384434aa7bbcad617853f3aa7f89eeb68640f36edc7b14
3
+ size 22201
notebooks/analysis_of_markets_data.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
scripts/live_markets_data.py CHANGED
@@ -206,13 +206,13 @@ def get_answer(fpmm: pd.Series) -> str:
206
  def get_first_token_perc(row):
207
  if row["total_tokens"] == 0.0:
208
  return 0
209
- return round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
210
 
211
 
212
  def get_second_token_perc(row):
213
  if row["total_tokens"] == 0.0:
214
  return 0
215
- return round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
216
 
217
 
218
  def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
@@ -220,13 +220,14 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
220
 
221
  # prepare the new ones
222
  # Add current timestamp
223
- fpmms["tokens_timestamp"] = current_timestamp
224
  fpmms["open"] = True
225
  fpmms["total_trades"] = 0
226
  fpmms["dist_gap_perc"] = 0.0
227
  fpmms["votes_first_outcome_perc"] = 0.0
228
  fpmms["votes_second_outcome_perc"] = 0.0
229
-
 
230
  # computation of token distributions
231
  fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
232
  fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
@@ -236,7 +237,12 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
236
  fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
237
  fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
238
  fpmms.drop(
239
- columns=["token_first_amount", "token_second_amount", "total_tokens"],
 
 
 
 
 
240
  inplace=True,
241
  )
242
  # previous file to update?
@@ -263,7 +269,7 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
263
  """Fetch, process, store and return the markets as a Dataframe."""
264
 
265
  logger.info("fetching new markets information")
266
- current_timestamp = int(datetime.now(UTC).timestamp())
267
  fpmms = fetch_fpmms(current_timestamp)
268
  logger.debug("New collected data")
269
  logger.debug(fpmms.head())
@@ -276,7 +282,7 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
276
  add_trading_info(fpmms, current_timestamp)
277
 
278
  logger.info("saving the data")
279
- print(fpmms.info())
280
  if filename:
281
  fpmms.to_parquet(DATA_DIR / filename, index=False)
282
 
 
206
  def get_first_token_perc(row):
207
  if row["total_tokens"] == 0.0:
208
  return 0
209
+ return 100.0 - round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
210
 
211
 
212
  def get_second_token_perc(row):
213
  if row["total_tokens"] == 0.0:
214
  return 0
215
+ return 100.0 - round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
216
 
217
 
218
  def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
 
220
 
221
  # prepare the new ones
222
  # Add current timestamp
223
+ fpmms["sample_timestamp"] = current_timestamp
224
  fpmms["open"] = True
225
  fpmms["total_trades"] = 0
226
  fpmms["dist_gap_perc"] = 0.0
227
  fpmms["votes_first_outcome_perc"] = 0.0
228
  fpmms["votes_second_outcome_perc"] = 0.0
229
+ fpmms["first_outcome"] = fpmms.question.apply(lambda x: x["outcomes"][0])
230
+ fpmms["second_outcome"] = fpmms.question.apply(lambda x: x["outcomes"][1])
231
  # computation of token distributions
232
  fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
233
  fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
 
237
  fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
238
  fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
239
  fpmms.drop(
240
+ columns=[
241
+ "token_first_amount",
242
+ "token_second_amount",
243
+ "total_tokens",
244
+ "question",
245
+ ],
246
  inplace=True,
247
  )
248
  # previous file to update?
 
269
  """Fetch, process, store and return the markets as a Dataframe."""
270
 
271
  logger.info("fetching new markets information")
272
+ current_timestamp = int(datetime.now(UTC).timestamp()) # seconds
273
  fpmms = fetch_fpmms(current_timestamp)
274
  logger.debug("New collected data")
275
  logger.debug(fpmms.head())
 
282
  add_trading_info(fpmms, current_timestamp)
283
 
284
  logger.info("saving the data")
285
+ logger.debug(fpmms.info())
286
  if filename:
287
  fpmms.to_parquet(DATA_DIR / filename, index=False)
288
 
scripts/live_traders_data.py CHANGED
@@ -74,8 +74,6 @@ def transform_trades(trades_json: dict) -> pd.DataFrame:
74
  logger.warning("No trades for this market")
75
  return df
76
 
77
- # print(df.info())
78
-
79
  # convert creator to address
80
  df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
81
 
@@ -114,21 +112,21 @@ def compute_from_timestamp_value(
114
 
115
  def compute_votes_distribution(market_trades: pd.DataFrame):
116
  """Function to compute the distribution of votes for the trades of a market"""
 
117
  total_trades = len(market_trades)
118
- print(f"The total number of trades is {total_trades}")
119
  # outcomeIndex is always 1 or 0?
120
  sum_outcome_index_1 = sum(market_trades.outcomeIndex)
121
- print(f"The total number of votes for index 1 is {sum_outcome_index_1}")
122
- logger.info(f"The total number of votes for index 1 is {sum_outcome_index_1}")
123
  percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
124
  return (100 - percentage_index_1), percentage_index_1
125
 
126
 
127
  def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
128
  """Function to update only the information related with the current timestamp"""
129
- # Iterate over the markets
130
  logger.info("Adding votes distribution per market")
131
-
132
  for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
133
  # update the trades for this market and at this specific current_timestamp
134
  logger.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
@@ -138,7 +136,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
138
  continue
139
  market_id = fpmm["id"]
140
 
141
- logger.info(f"Adding information for the market {market_id}")
142
  market_trades_json = _query_omen_xdai_subgraph(
143
  fpmm_id=market_id,
144
  )
@@ -146,8 +144,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
146
  if len(market_trades) == 0:
147
  logger.info("No trades for this market")
148
  continue
149
- # to compute the votes distribution
150
- logger.info("Computing the votes distribution")
151
  fpmms.at[i,"total_trades"] = len(market_trades)
152
  first_outcome, second_outcome = compute_votes_distribution(market_trades)
153
  logger.info(
 
74
  logger.warning("No trades for this market")
75
  return df
76
 
 
 
77
  # convert creator to address
78
  df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
79
 
 
112
 
113
  def compute_votes_distribution(market_trades: pd.DataFrame):
114
  """Function to compute the distribution of votes for the trades of a market"""
115
+ logger.info("Computing the votes distribution")
116
  total_trades = len(market_trades)
117
+ logger.info(f"The total number of trades is {total_trades}")
118
  # outcomeIndex is always 1 or 0?
119
  sum_outcome_index_1 = sum(market_trades.outcomeIndex)
120
+ logger.debug(f"The total number of votes for index 1 is {sum_outcome_index_1}")
 
121
  percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
122
  return (100 - percentage_index_1), percentage_index_1
123
 
124
 
125
  def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
126
  """Function to update only the information related with the current timestamp"""
127
+
128
  logger.info("Adding votes distribution per market")
129
+ # Iterate over the markets
130
  for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
131
  # update the trades for this market and at this specific current_timestamp
132
  logger.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
 
136
  continue
137
  market_id = fpmm["id"]
138
 
139
+ logger.info(f"Adding trades information for the market {market_id}")
140
  market_trades_json = _query_omen_xdai_subgraph(
141
  fpmm_id=market_id,
142
  )
 
144
  if len(market_trades) == 0:
145
  logger.info("No trades for this market")
146
  continue
147
+
 
148
  fpmms.at[i,"total_trades"] = len(market_trades)
149
  first_outcome, second_outcome = compute_votes_distribution(market_trades)
150
  logger.info(
tabs/dist_gap.py ADDED
File without changes
tabs/tokens_votes_dist.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from seaborn import FacetGrid
6
+ import plotly.express as px
7
+
8
+
9
+ def get_based_tokens_distribution(market_id: str, all_markets: pd.DataFrame):
10
+ """Function to paint the evolution of the probability of the outcomes based on the tokens distributions over time"""
11
+ selected_market = all_markets.loc[all_markets["id"] == market_id]
12
+ ax = selected_market.plot(
13
+ x="sample_datetime",
14
+ y=["first_token_perc", "second_token_perc"],
15
+ kind="bar",
16
+ rot=0,
17
+ stacked=True,
18
+ )
19
+ # add overall title
20
+ plt.title(
21
+ "Outcomes probability over time based on tokens distributions", fontsize=16
22
+ )
23
+
24
+ # add axis titles
25
+ plt.xlabel("Sample date")
26
+ plt.ylabel("Percentage")
27
+ first_outcome = selected_market.iloc[0].first_outcome
28
+ second_outcome = selected_market.iloc[0].second_outcome
29
+ ax.legend(
30
+ bbox_to_anchor=(1, 1.02),
31
+ loc="upper left",
32
+ labels=[first_outcome, second_outcome],
33
+ )
34
+ return gr.Plot(value=ax.figure)