cyberosa
commited on
Commit
·
35989d5
1
Parent(s):
ec75e86
data update and based tokens graphs
Browse files- app.py +37 -7
- live_data/markets_live_data.parquet +2 -2
- live_data/markets_live_data_sample.parquet +2 -2
- notebooks/analysis_of_markets_data.ipynb +0 -0
- scripts/live_markets_data.py +13 -7
- scripts/live_traders_data.py +7 -10
- tabs/dist_gap.py +0 -0
- tabs/tokens_votes_dist.py +34 -0
app.py
CHANGED
@@ -5,6 +5,7 @@ import pandas as pd
|
|
5 |
import seaborn as sns
|
6 |
import duckdb
|
7 |
import logging
|
|
|
8 |
|
9 |
|
10 |
def get_logger():
|
@@ -40,21 +41,50 @@ def prepare_data():
|
|
40 |
return df
|
41 |
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
demo = gr.Blocks()
|
44 |
markets_data = prepare_data()
|
45 |
|
46 |
with demo:
|
47 |
gr.HTML("<h1>Olas Predict Live Markets </h1>")
|
48 |
gr.Markdown("This app shows the distributions of predictions on the live markets.")
|
49 |
-
|
50 |
with gr.Tabs():
|
51 |
-
with gr.TabItem("💹Probability distributions"):
|
|
|
|
|
|
|
52 |
with gr.Row():
|
53 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
with gr.Row():
|
55 |
-
#
|
56 |
-
print("WIP")
|
57 |
-
gr.Markdown("Under construction (WIP)")
|
58 |
-
# daily_distributions = plot_daily_market_distributions(markets_data)
|
59 |
|
60 |
demo.queue(default_concurrency_limit=40).launch()
|
|
|
5 |
import seaborn as sns
|
6 |
import duckdb
|
7 |
import logging
|
8 |
+
from tabs.tokens_votes_dist import get_based_tokens_distribution
|
9 |
|
10 |
|
11 |
def get_logger():
|
|
|
41 |
return df
|
42 |
|
43 |
|
44 |
+
def get_extreme_cases(live_fpmms: pd.DataFrame):
|
45 |
+
"""Function to return the id of the best and worst case according to the dist gap metric"""
|
46 |
+
# select markets with more than 1 sample
|
47 |
+
samples_per_market = (
|
48 |
+
live_fpmms[["id", "tokens_timestamp"]].groupby("id").count().reset_index()
|
49 |
+
)
|
50 |
+
markets_with_multiple_samples = list(
|
51 |
+
samples_per_market.loc[samples_per_market["tokens_timestamp"] > 1, "id"].values
|
52 |
+
)
|
53 |
+
selected_markets = live_fpmms.loc[
|
54 |
+
live_fpmms["id"].isin(markets_with_multiple_samples)
|
55 |
+
]
|
56 |
+
selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
|
57 |
+
return selected_markets.iloc[0].id, selected_markets.iloc[-1].id
|
58 |
+
|
59 |
+
|
60 |
demo = gr.Blocks()
|
61 |
markets_data = prepare_data()
|
62 |
|
63 |
with demo:
|
64 |
gr.HTML("<h1>Olas Predict Live Markets </h1>")
|
65 |
gr.Markdown("This app shows the distributions of predictions on the live markets.")
|
66 |
+
best_market_id, worst_market_id = get_extreme_cases(markets_data)
|
67 |
with gr.Tabs():
|
68 |
+
with gr.TabItem("💹 Probability distributions of live markets"):
|
69 |
+
with gr.Row():
|
70 |
+
gr.Markdown("# Evolution of outcomes probability based on tokens")
|
71 |
+
|
72 |
with gr.Row():
|
73 |
+
gr.Markdown("Best case: a market with a low distribution gap metric")
|
74 |
+
with gr.Row():
|
75 |
+
best_market_tokens_dist = get_based_tokens_distribution(
|
76 |
+
best_market_id, markets_data
|
77 |
+
)
|
78 |
+
|
79 |
+
with gr.Row():
|
80 |
+
gr.Markdown("Worst case: a market with a high distribution gap metric")
|
81 |
+
|
82 |
+
with gr.Row():
|
83 |
+
worst_market_tokens_dist = get_based_tokens_distribution(
|
84 |
+
worst_market_id, markets_data
|
85 |
+
)
|
86 |
+
|
87 |
with gr.Row():
|
88 |
+
gr.Markdown("# Evolution of outcomes probability based on votes")
|
|
|
|
|
|
|
89 |
|
90 |
demo.queue(default_concurrency_limit=40).launch()
|
live_data/markets_live_data.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f1aefc2dd441883aca8a95db7715a511b763a5b486307a903dcea30df7ef828
|
3 |
+
size 27422
|
live_data/markets_live_data_sample.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e601332794f53f1c65384434aa7bbcad617853f3aa7f89eeb68640f36edc7b14
|
3 |
+
size 22201
|
notebooks/analysis_of_markets_data.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
scripts/live_markets_data.py
CHANGED
@@ -206,13 +206,13 @@ def get_answer(fpmm: pd.Series) -> str:
|
|
206 |
def get_first_token_perc(row):
|
207 |
if row["total_tokens"] == 0.0:
|
208 |
return 0
|
209 |
-
return round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
|
210 |
|
211 |
|
212 |
def get_second_token_perc(row):
|
213 |
if row["total_tokens"] == 0.0:
|
214 |
return 0
|
215 |
-
return round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
|
216 |
|
217 |
|
218 |
def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
|
@@ -220,13 +220,14 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
|
|
220 |
|
221 |
# prepare the new ones
|
222 |
# Add current timestamp
|
223 |
-
fpmms["
|
224 |
fpmms["open"] = True
|
225 |
fpmms["total_trades"] = 0
|
226 |
fpmms["dist_gap_perc"] = 0.0
|
227 |
fpmms["votes_first_outcome_perc"] = 0.0
|
228 |
fpmms["votes_second_outcome_perc"] = 0.0
|
229 |
-
|
|
|
230 |
# computation of token distributions
|
231 |
fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
|
232 |
fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
|
@@ -236,7 +237,12 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
|
|
236 |
fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
|
237 |
fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
|
238 |
fpmms.drop(
|
239 |
-
columns=[
|
|
|
|
|
|
|
|
|
|
|
240 |
inplace=True,
|
241 |
)
|
242 |
# previous file to update?
|
@@ -263,7 +269,7 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
|
|
263 |
"""Fetch, process, store and return the markets as a Dataframe."""
|
264 |
|
265 |
logger.info("fetching new markets information")
|
266 |
-
current_timestamp = int(datetime.now(UTC).timestamp())
|
267 |
fpmms = fetch_fpmms(current_timestamp)
|
268 |
logger.debug("New collected data")
|
269 |
logger.debug(fpmms.head())
|
@@ -276,7 +282,7 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
|
|
276 |
add_trading_info(fpmms, current_timestamp)
|
277 |
|
278 |
logger.info("saving the data")
|
279 |
-
|
280 |
if filename:
|
281 |
fpmms.to_parquet(DATA_DIR / filename, index=False)
|
282 |
|
|
|
206 |
def get_first_token_perc(row):
|
207 |
if row["total_tokens"] == 0.0:
|
208 |
return 0
|
209 |
+
return 100.0 - round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
|
210 |
|
211 |
|
212 |
def get_second_token_perc(row):
|
213 |
if row["total_tokens"] == 0.0:
|
214 |
return 0
|
215 |
+
return 100.0 - round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
|
216 |
|
217 |
|
218 |
def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
|
|
|
220 |
|
221 |
# prepare the new ones
|
222 |
# Add current timestamp
|
223 |
+
fpmms["sample_timestamp"] = current_timestamp
|
224 |
fpmms["open"] = True
|
225 |
fpmms["total_trades"] = 0
|
226 |
fpmms["dist_gap_perc"] = 0.0
|
227 |
fpmms["votes_first_outcome_perc"] = 0.0
|
228 |
fpmms["votes_second_outcome_perc"] = 0.0
|
229 |
+
fpmms["first_outcome"] = fpmms.question.apply(lambda x: x["outcomes"][0])
|
230 |
+
fpmms["second_outcome"] = fpmms.question.apply(lambda x: x["outcomes"][1])
|
231 |
# computation of token distributions
|
232 |
fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
|
233 |
fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
|
|
|
237 |
fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
|
238 |
fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
|
239 |
fpmms.drop(
|
240 |
+
columns=[
|
241 |
+
"token_first_amount",
|
242 |
+
"token_second_amount",
|
243 |
+
"total_tokens",
|
244 |
+
"question",
|
245 |
+
],
|
246 |
inplace=True,
|
247 |
)
|
248 |
# previous file to update?
|
|
|
269 |
"""Fetch, process, store and return the markets as a Dataframe."""
|
270 |
|
271 |
logger.info("fetching new markets information")
|
272 |
+
current_timestamp = int(datetime.now(UTC).timestamp()) # seconds
|
273 |
fpmms = fetch_fpmms(current_timestamp)
|
274 |
logger.debug("New collected data")
|
275 |
logger.debug(fpmms.head())
|
|
|
282 |
add_trading_info(fpmms, current_timestamp)
|
283 |
|
284 |
logger.info("saving the data")
|
285 |
+
logger.debug(fpmms.info())
|
286 |
if filename:
|
287 |
fpmms.to_parquet(DATA_DIR / filename, index=False)
|
288 |
|
scripts/live_traders_data.py
CHANGED
@@ -74,8 +74,6 @@ def transform_trades(trades_json: dict) -> pd.DataFrame:
|
|
74 |
logger.warning("No trades for this market")
|
75 |
return df
|
76 |
|
77 |
-
# print(df.info())
|
78 |
-
|
79 |
# convert creator to address
|
80 |
df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
|
81 |
|
@@ -114,21 +112,21 @@ def compute_from_timestamp_value(
|
|
114 |
|
115 |
def compute_votes_distribution(market_trades: pd.DataFrame):
|
116 |
"""Function to compute the distribution of votes for the trades of a market"""
|
|
|
117 |
total_trades = len(market_trades)
|
118 |
-
|
119 |
# outcomeIndex is always 1 or 0?
|
120 |
sum_outcome_index_1 = sum(market_trades.outcomeIndex)
|
121 |
-
|
122 |
-
logger.info(f"The total number of votes for index 1 is {sum_outcome_index_1}")
|
123 |
percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
|
124 |
return (100 - percentage_index_1), percentage_index_1
|
125 |
|
126 |
|
127 |
def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
|
128 |
"""Function to update only the information related with the current timestamp"""
|
129 |
-
|
130 |
logger.info("Adding votes distribution per market")
|
131 |
-
|
132 |
for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
|
133 |
# update the trades for this market and at this specific current_timestamp
|
134 |
logger.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
|
@@ -138,7 +136,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
|
|
138 |
continue
|
139 |
market_id = fpmm["id"]
|
140 |
|
141 |
-
logger.info(f"Adding information for the market {market_id}")
|
142 |
market_trades_json = _query_omen_xdai_subgraph(
|
143 |
fpmm_id=market_id,
|
144 |
)
|
@@ -146,8 +144,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
|
|
146 |
if len(market_trades) == 0:
|
147 |
logger.info("No trades for this market")
|
148 |
continue
|
149 |
-
|
150 |
-
logger.info("Computing the votes distribution")
|
151 |
fpmms.at[i,"total_trades"] = len(market_trades)
|
152 |
first_outcome, second_outcome = compute_votes_distribution(market_trades)
|
153 |
logger.info(
|
|
|
74 |
logger.warning("No trades for this market")
|
75 |
return df
|
76 |
|
|
|
|
|
77 |
# convert creator to address
|
78 |
df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
|
79 |
|
|
|
112 |
|
113 |
def compute_votes_distribution(market_trades: pd.DataFrame):
|
114 |
"""Function to compute the distribution of votes for the trades of a market"""
|
115 |
+
logger.info("Computing the votes distribution")
|
116 |
total_trades = len(market_trades)
|
117 |
+
logger.info(f"The total number of trades is {total_trades}")
|
118 |
# outcomeIndex is always 1 or 0?
|
119 |
sum_outcome_index_1 = sum(market_trades.outcomeIndex)
|
120 |
+
logger.debug(f"The total number of votes for index 1 is {sum_outcome_index_1}")
|
|
|
121 |
percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
|
122 |
return (100 - percentage_index_1), percentage_index_1
|
123 |
|
124 |
|
125 |
def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
|
126 |
"""Function to update only the information related with the current timestamp"""
|
127 |
+
|
128 |
logger.info("Adding votes distribution per market")
|
129 |
+
# Iterate over the markets
|
130 |
for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
|
131 |
# update the trades for this market and at this specific current_timestamp
|
132 |
logger.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
|
|
|
136 |
continue
|
137 |
market_id = fpmm["id"]
|
138 |
|
139 |
+
logger.info(f"Adding trades information for the market {market_id}")
|
140 |
market_trades_json = _query_omen_xdai_subgraph(
|
141 |
fpmm_id=market_id,
|
142 |
)
|
|
|
144 |
if len(market_trades) == 0:
|
145 |
logger.info("No trades for this market")
|
146 |
continue
|
147 |
+
|
|
|
148 |
fpmms.at[i,"total_trades"] = len(market_trades)
|
149 |
first_outcome, second_outcome = compute_votes_distribution(market_trades)
|
150 |
logger.info(
|
tabs/dist_gap.py
ADDED
File without changes
|
tabs/tokens_votes_dist.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import gradio as gr
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
import seaborn as sns
|
5 |
+
from seaborn import FacetGrid
|
6 |
+
import plotly.express as px
|
7 |
+
|
8 |
+
|
9 |
+
def get_based_tokens_distribution(market_id: str, all_markets: pd.DataFrame):
|
10 |
+
"""Function to paint the evolution of the probability of the outcomes based on the tokens distributions over time"""
|
11 |
+
selected_market = all_markets.loc[all_markets["id"] == market_id]
|
12 |
+
ax = selected_market.plot(
|
13 |
+
x="sample_datetime",
|
14 |
+
y=["first_token_perc", "second_token_perc"],
|
15 |
+
kind="bar",
|
16 |
+
rot=0,
|
17 |
+
stacked=True,
|
18 |
+
)
|
19 |
+
# add overall title
|
20 |
+
plt.title(
|
21 |
+
"Outcomes probability over time based on tokens distributions", fontsize=16
|
22 |
+
)
|
23 |
+
|
24 |
+
# add axis titles
|
25 |
+
plt.xlabel("Sample date")
|
26 |
+
plt.ylabel("Percentage")
|
27 |
+
first_outcome = selected_market.iloc[0].first_outcome
|
28 |
+
second_outcome = selected_market.iloc[0].second_outcome
|
29 |
+
ax.legend(
|
30 |
+
bbox_to_anchor=(1, 1.02),
|
31 |
+
loc="upper left",
|
32 |
+
labels=[first_outcome, second_outcome],
|
33 |
+
)
|
34 |
+
return gr.Plot(value=ax.figure)
|