Spaces:

valory
/

olas_predict_live_markets

Running

App Files Files Community

cyberosa commited on Aug 1, 2024

Commit

35989d5

1 Parent(s): ec75e86

data update and based tokens graphs

Browse files

Files changed (8) hide show

app.py +37 -7
live_data/markets_live_data.parquet +2 -2
live_data/markets_live_data_sample.parquet +2 -2
notebooks/analysis_of_markets_data.ipynb +0 -0
scripts/live_markets_data.py +13 -7
scripts/live_traders_data.py +7 -10
tabs/dist_gap.py +0 -0
tabs/tokens_votes_dist.py +34 -0

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import pandas as pd
 import seaborn as sns
 import duckdb
 import logging
 def get_logger():
@@ -40,21 +41,50 @@ def prepare_data():
     return df
 demo = gr.Blocks()
 markets_data = prepare_data()
 with demo:
     gr.HTML("<h1>Olas Predict Live Markets </h1>")
     gr.Markdown("This app shows the distributions of predictions on the live markets.")
     with gr.Tabs():
-        with gr.TabItem("💹Probability distributions"):
             with gr.Row():
-                gr.Markdown("# Daily probability distribution of live markets")
             with gr.Row():
-                # TODO
-                print("WIP")
-                gr.Markdown("Under construction (WIP)")
-                # daily_distributions = plot_daily_market_distributions(markets_data)
     demo.queue(default_concurrency_limit=40).launch()

 import seaborn as sns
 import duckdb
 import logging
+from tabs.tokens_votes_dist import get_based_tokens_distribution
 def get_logger():
     return df
+def get_extreme_cases(live_fpmms: pd.DataFrame):
+    """Function to return the id of the best and worst case according to the dist gap metric"""
+    # select markets with more than 1 sample
+    samples_per_market = (
+        live_fpmms[["id", "tokens_timestamp"]].groupby("id").count().reset_index()
+    )
+    markets_with_multiple_samples = list(
+        samples_per_market.loc[samples_per_market["tokens_timestamp"] > 1, "id"].values
+    )
+    selected_markets = live_fpmms.loc[
+        live_fpmms["id"].isin(markets_with_multiple_samples)
+    ]
+    selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
+    return selected_markets.iloc[0].id, selected_markets.iloc[-1].id
 demo = gr.Blocks()
 markets_data = prepare_data()
 with demo:
     gr.HTML("<h1>Olas Predict Live Markets </h1>")
     gr.Markdown("This app shows the distributions of predictions on the live markets.")
+    best_market_id, worst_market_id = get_extreme_cases(markets_data)
     with gr.Tabs():
+        with gr.TabItem("💹 Probability distributions of live markets"):
+            with gr.Row():
+                gr.Markdown("# Evolution of outcomes probability based on tokens")
             with gr.Row():
+                gr.Markdown("Best case: a market with a low distribution gap metric")
+            with gr.Row():
+                best_market_tokens_dist = get_based_tokens_distribution(
+                    best_market_id, markets_data
+                )
+            with gr.Row():
+                gr.Markdown("Worst case: a market with a high distribution gap metric")
+            with gr.Row():
+                worst_market_tokens_dist = get_based_tokens_distribution(
+                    worst_market_id, markets_data
+                )
             with gr.Row():
+                gr.Markdown("# Evolution of outcomes probability based on votes")
     demo.queue(default_concurrency_limit=40).launch()

live_data/markets_live_data.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e29014d0396e1174838cbe49097cd73c21b8cd2168aeea23ba98240f16c96996
-size 22391

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f1aefc2dd441883aca8a95db7715a511b763a5b486307a903dcea30df7ef828
+size 27422

live_data/markets_live_data_sample.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7f78eff92f06483f12d9acf36494488732e39c22098ce2f3e21e6d44efb88af
-size 25464

 version https://git-lfs.github.com/spec/v1
+oid sha256:e601332794f53f1c65384434aa7bbcad617853f3aa7f89eeb68640f36edc7b14
+size 22201

notebooks/analysis_of_markets_data.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

scripts/live_markets_data.py CHANGED Viewed

@@ -206,13 +206,13 @@ def get_answer(fpmm: pd.Series) -> str:
 def get_first_token_perc(row):
     if row["total_tokens"] == 0.0:
         return 0
-    return round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
 def get_second_token_perc(row):
     if row["total_tokens"] == 0.0:
         return 0
-    return round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
 def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
@@ -220,13 +220,14 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
     # prepare the new ones
     # Add current timestamp
-    fpmms["tokens_timestamp"] = current_timestamp
     fpmms["open"] = True
     fpmms["total_trades"] = 0
     fpmms["dist_gap_perc"] = 0.0
     fpmms["votes_first_outcome_perc"] = 0.0
     fpmms["votes_second_outcome_perc"] = 0.0
     # computation of token distributions
     fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
     fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
@@ -236,7 +237,12 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
     fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
     fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
     fpmms.drop(
-        columns=["token_first_amount", "token_second_amount", "total_tokens"],
         inplace=True,
     )
     # previous file to update?
@@ -263,7 +269,7 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
     """Fetch, process, store and return the markets as a Dataframe."""
     logger.info("fetching new markets information")
-    current_timestamp = int(datetime.now(UTC).timestamp())
     fpmms = fetch_fpmms(current_timestamp)
     logger.debug("New collected data")
     logger.debug(fpmms.head())
@@ -276,7 +282,7 @@ def compute_distributions(filename: Optional[str]) -> pd.DataFrame:
     add_trading_info(fpmms, current_timestamp)
     logger.info("saving the data")
-    print(fpmms.info())
     if filename:
         fpmms.to_parquet(DATA_DIR / filename, index=False)

 def get_first_token_perc(row):
     if row["total_tokens"] == 0.0:
         return 0
+    return 100.0 - round((row["token_first_amount"] / row["total_tokens"]) * 100, 2)
 def get_second_token_perc(row):
     if row["total_tokens"] == 0.0:
         return 0
+    return 100.0 - round((row["token_second_amount"] / row["total_tokens"]) * 100, 2)
 def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int) -> None:
     # prepare the new ones
     # Add current timestamp
+    fpmms["sample_timestamp"] = current_timestamp
     fpmms["open"] = True
     fpmms["total_trades"] = 0
     fpmms["dist_gap_perc"] = 0.0
     fpmms["votes_first_outcome_perc"] = 0.0
     fpmms["votes_second_outcome_perc"] = 0.0
+    fpmms["first_outcome"] = fpmms.question.apply(lambda x: x["outcomes"][0])
+    fpmms["second_outcome"] = fpmms.question.apply(lambda x: x["outcomes"][1])
     # computation of token distributions
     fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
     fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
     fpmms["first_token_perc"] = fpmms.apply(lambda x: get_first_token_perc(x), axis=1)
     fpmms["second_token_perc"] = fpmms.apply(lambda x: get_second_token_perc(x), axis=1)
     fpmms.drop(
+        columns=[
+            "token_first_amount",
+            "token_second_amount",
+            "total_tokens",
+            "question",
+        ],
         inplace=True,
     )
     # previous file to update?
     """Fetch, process, store and return the markets as a Dataframe."""
     logger.info("fetching new markets information")
+    current_timestamp = int(datetime.now(UTC).timestamp())  # seconds
     fpmms = fetch_fpmms(current_timestamp)
     logger.debug("New collected data")
     logger.debug(fpmms.head())
     add_trading_info(fpmms, current_timestamp)
     logger.info("saving the data")
+    logger.debug(fpmms.info())
     if filename:
         fpmms.to_parquet(DATA_DIR / filename, index=False)

scripts/live_traders_data.py CHANGED Viewed

@@ -74,8 +74,6 @@ def transform_trades(trades_json: dict) -> pd.DataFrame:
         logger.warning("No trades for this market")
         return df
-    # print(df.info())
     # convert creator to address
     df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
@@ -114,21 +112,21 @@ def compute_from_timestamp_value(
 def compute_votes_distribution(market_trades: pd.DataFrame):
     """Function to compute the distribution of votes for the trades of a market"""
     total_trades = len(market_trades)
-    print(f"The total number of trades is {total_trades}")
     # outcomeIndex is always 1 or 0?
     sum_outcome_index_1 = sum(market_trades.outcomeIndex)
-    print(f"The total number of votes for index 1 is {sum_outcome_index_1}")
-    logger.info(f"The total number of votes for index 1 is {sum_outcome_index_1}")
     percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
     return (100 - percentage_index_1), percentage_index_1
 def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
     """Function to update only the information related with the current timestamp"""
-    # Iterate over the markets
     logger.info("Adding votes distribution per market")
     for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
         # update the trades for this market and at this specific current_timestamp
         logger.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
@@ -138,7 +136,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
             continue
         market_id = fpmm["id"]
-        logger.info(f"Adding information for the market {market_id}")
         market_trades_json = _query_omen_xdai_subgraph(
             fpmm_id=market_id,
         )
@@ -146,8 +144,7 @@ def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
         if len(market_trades) == 0:
             logger.info("No trades for this market")
             continue
-        # to compute the votes distribution
-        logger.info("Computing the votes distribution")
         fpmms.at[i,"total_trades"] = len(market_trades)
         first_outcome, second_outcome = compute_votes_distribution(market_trades)
         logger.info(

         logger.warning("No trades for this market")
         return df
     # convert creator to address
     df["trade_creator"] = df["creator"].apply(lambda x: x["id"])
 def compute_votes_distribution(market_trades: pd.DataFrame):
     """Function to compute the distribution of votes for the trades of a market"""
+    logger.info("Computing the votes distribution")
     total_trades = len(market_trades)
+    logger.info(f"The total number of trades is {total_trades}")
     # outcomeIndex is always 1 or 0?
     sum_outcome_index_1 = sum(market_trades.outcomeIndex)
+    logger.debug(f"The total number of votes for index 1 is {sum_outcome_index_1}")
     percentage_index_1 = round((sum_outcome_index_1 / total_trades) * 100, 2)
     return (100 - percentage_index_1), percentage_index_1
 def add_trading_info(fpmms: pd.DataFrame, current_timestamp: int) -> None:
     """Function to update only the information related with the current timestamp"""
     logger.info("Adding votes distribution per market")
+    # Iterate over the markets
     for i, fpmm in tqdm(fpmms.iterrows(), total=len(fpmms), desc="Analysing trades"):
         # update the trades for this market and at this specific current_timestamp
         logger.debug(f"current timestamp = {current_timestamp} and market timestamp={fpmm["tokens_timestamp"]}")
             continue
         market_id = fpmm["id"]
+        logger.info(f"Adding trades information for the market {market_id}")
         market_trades_json = _query_omen_xdai_subgraph(
             fpmm_id=market_id,
         )
         if len(market_trades) == 0:
             logger.info("No trades for this market")
             continue
         fpmms.at[i,"total_trades"] = len(market_trades)
         first_outcome, second_outcome = compute_votes_distribution(market_trades)
         logger.info(

tabs/dist_gap.py ADDED Viewed

File without changes

tabs/tokens_votes_dist.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import pandas as pd
+import gradio as gr
+import matplotlib.pyplot as plt
+import seaborn as sns
+from seaborn import FacetGrid
+import plotly.express as px
+def get_based_tokens_distribution(market_id: str, all_markets: pd.DataFrame):
+    """Function to paint the evolution of the probability of the outcomes based on the tokens distributions over time"""
+    selected_market = all_markets.loc[all_markets["id"] == market_id]
+    ax = selected_market.plot(
+        x="sample_datetime",
+        y=["first_token_perc", "second_token_perc"],
+        kind="bar",
+        rot=0,
+        stacked=True,
+    )
+    # add overall title
+    plt.title(
+        "Outcomes probability over time based on tokens distributions", fontsize=16
+    )
+    # add axis titles
+    plt.xlabel("Sample date")
+    plt.ylabel("Percentage")
+    first_outcome = selected_market.iloc[0].first_outcome
+    second_outcome = selected_market.iloc[0].second_outcome
+    ax.legend(
+        bbox_to_anchor=(1, 1.02),
+        loc="upper left",
+        labels=[first_outcome, second_outcome],
+    )
+    return gr.Plot(value=ax.figure)