Spaces:

valory
/

olas_predict_live_markets

Running

App Files Files Community

cyberosa commited on Aug 5, 2024

Commit

ddd4c40

1 Parent(s): f842047

trades filter for live distribution

Browse files

Files changed (7) hide show

app.py +2 -22
live_data/markets_live_data.parquet +2 -2
live_data/markets_live_data_sample.parquet +2 -2
notebooks/analysis_of_markets_data.ipynb +57 -24
notebooks/research_on_specific_markets.ipynb +455 -0
scripts/live_markets_data.py +3 -0
tabs/tokens_votes_dist.py +15 -0

app.py CHANGED Viewed

@@ -3,10 +3,11 @@ import gradio as gr
 import pandas as pd
 import duckdb
 import logging
-from typing import Tuple
 from tabs.tokens_votes_dist import (
     get_based_tokens_distribution,
     get_based_votes_distribution,
 )
 from tabs.dist_gap import (
     get_distribution_plot,
@@ -56,27 +57,6 @@ def prepare_data():
     return df
-def get_extreme_cases(live_fpmms: pd.DataFrame) -> Tuple:
-    """Function to return the id of the best and worst case according to the dist gap metric"""
-    # select markets with more than 1 sample
-    samples_per_market = (
-        live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index()
-    )
-    markets_with_multiple_samples = list(
-        samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values
-    )
-    selected_markets = live_fpmms.loc[
-        live_fpmms["id"].isin(markets_with_multiple_samples)
-    ]
-    selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
-    return (
-        selected_markets.iloc[-1].id,
-        selected_markets.iloc[-1].dist_gap_perc,
-        selected_markets.iloc[0].id,
-        selected_markets.iloc[0].dist_gap_perc,
-    )
 demo = gr.Blocks()
 markets_data = prepare_data()
 live_markets_data = markets_data.loc[markets_data["open"] == True]

 import pandas as pd
 import duckdb
 import logging
 from tabs.tokens_votes_dist import (
     get_based_tokens_distribution,
     get_based_votes_distribution,
+    get_extreme_cases,
 )
 from tabs.dist_gap import (
     get_distribution_plot,
     return df
 demo = gr.Blocks()
 markets_data = prepare_data()
 live_markets_data = markets_data.loc[markets_data["open"] == True]

live_data/markets_live_data.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ae654e0654ae908aff28be1cac0ebdff765f65e6c442dc94df1a2c801319c2a
-size 39132

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac46b671cb0a3663931568dbef2b5221ff3d7d3cd0c4648258032c871335bf3a
+size 38288

live_data/markets_live_data_sample.parquet CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e0451385c1de2891fe1ab7fff8b3e5a5f797f351b2ca799266e65adec57a9b9
-size 34357

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac46b671cb0a3663931568dbef2b5221ff3d7d3cd0c4648258032c871335bf3a
+size 38288

notebooks/analysis_of_markets_data.ipynb CHANGED Viewed

@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -57,7 +57,6 @@
        "      <th>second_token_perc</th>\n",
        "      <th>mean_trade_size</th>\n",
        "      <th>sample_datetime</th>\n",
-       "      <th>market_id</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -82,7 +81,6 @@
        "      <td>14.76</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>2.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -105,7 +103,6 @@
        "      <td>47.84</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>3.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -128,7 +125,6 @@
        "      <td>43.07</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>6.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -151,7 +147,6 @@
        "      <td>32.06</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>7.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -174,7 +169,6 @@
        "      <td>50.32</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
-       "      <td>8.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -223,15 +217,15 @@
        "3                      37.04           Yes             No             67.94   \n",
        "4                      52.46           Yes             No             49.68   \n",
        "\n",
-       "   second_token_perc  mean_trade_size     sample_datetime  market_id  \n",
-       "0              14.76              NaN 2024-07-31 18:06:59        2.0  \n",
-       "1              47.84              NaN 2024-07-31 18:06:59        3.0  \n",
-       "2              43.07              NaN 2024-07-31 18:06:59        6.0  \n",
-       "3              32.06              NaN 2024-07-31 18:06:59        7.0  \n",
-       "4              50.32              NaN 2024-07-31 18:06:59        8.0  "
       ]
      },
-     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -243,7 +237,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -252,7 +285,7 @@
      "text": [
       "<class 'pandas.core.frame.DataFrame'>\n",
       "RangeIndex: 168 entries, 0 to 167\n",
-      "Data columns (total 20 columns):\n",
       " #   Column                     Non-Null Count  Dtype         \n",
       "---  ------                     --------------  -----         \n",
       " 0   creationTimestamp          168 non-null    object        \n",
@@ -273,10 +306,9 @@
       " 15  first_token_perc           168 non-null    float64       \n",
       " 16  second_token_perc          168 non-null    float64       \n",
       " 17  mean_trade_size            84 non-null     float64       \n",
-      " 18  sample_datetime            126 non-null    datetime64[ns]\n",
-      " 19  market_id                  126 non-null    float64       \n",
-      "dtypes: bool(1), datetime64[ns](1), float64(7), int64(3), object(8)\n",
-      "memory usage: 25.2+ KB\n"
      ]
     }
    ],
@@ -286,7 +318,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 157,
    "metadata": {},
    "outputs": [
     {
@@ -296,10 +328,11 @@
        "1722442019    42\n",
        "1722501882    42\n",
        "1722593849    42\n",
        "Name: count, dtype: int64"
       ]
      },
-     "execution_count": 157,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -310,7 +343,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -322,7 +355,7 @@
        "Name: count, dtype: int64"
       ]
      },
-     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }

   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
        "      <th>second_token_perc</th>\n",
        "      <th>mean_trade_size</th>\n",
        "      <th>sample_datetime</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "      <td>14.76</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>47.84</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>43.07</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>32.06</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>50.32</td>\n",
        "      <td>NaN</td>\n",
        "      <td>2024-07-31 18:06:59</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "3                      37.04           Yes             No             67.94   \n",
        "4                      52.46           Yes             No             49.68   \n",
        "\n",
+       "   second_token_perc  mean_trade_size     sample_datetime  \n",
+       "0              14.76              NaN 2024-07-31 18:06:59  \n",
+       "1              47.84              NaN 2024-07-31 18:06:59  \n",
+       "2              43.07              NaN 2024-07-31 18:06:59  \n",
+       "3              32.06              NaN 2024-07-31 18:06:59  \n",
+       "4              50.32              NaN 2024-07-31 18:06:59  "
       ]
      },
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "creationTimestamp             0\n",
+       "id                            0\n",
+       "liquidityMeasure              0\n",
+       "liquidityParameter            0\n",
+       "openingTimestamp              0\n",
+       "outcomeTokenAmounts           0\n",
+       "title                         0\n",
+       "sample_timestamp              0\n",
+       "open                          0\n",
+       "total_trades                  0\n",
+       "dist_gap_perc                 0\n",
+       "votes_first_outcome_perc      0\n",
+       "votes_second_outcome_perc     0\n",
+       "first_outcome                 0\n",
+       "second_outcome                0\n",
+       "first_token_perc              0\n",
+       "second_token_perc             0\n",
+       "mean_trade_size              84\n",
+       "sample_datetime               0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "live_fpmms.isna().sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "text": [
       "<class 'pandas.core.frame.DataFrame'>\n",
       "RangeIndex: 168 entries, 0 to 167\n",
+      "Data columns (total 19 columns):\n",
       " #   Column                     Non-Null Count  Dtype         \n",
       "---  ------                     --------------  -----         \n",
       " 0   creationTimestamp          168 non-null    object        \n",
       " 15  first_token_perc           168 non-null    float64       \n",
       " 16  second_token_perc          168 non-null    float64       \n",
       " 17  mean_trade_size            84 non-null     float64       \n",
+      " 18  sample_datetime            168 non-null    datetime64[ns]\n",
+      "dtypes: bool(1), datetime64[ns](1), float64(6), int64(3), object(8)\n",
+      "memory usage: 23.9+ KB\n"
      ]
     }
    ],
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
        "1722442019    42\n",
        "1722501882    42\n",
        "1722593849    42\n",
+       "1722852594    42\n",
        "Name: count, dtype: int64"
       ]
      },
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
        "Name: count, dtype: int64"
       ]
      },
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }

notebooks/research_on_specific_markets.ipynb ADDED Viewed

	@@ -0,0 +1,455 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "import gc"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>creationTimestamp</th>\n",
+       "      <th>id</th>\n",
+       "      <th>liquidityMeasure</th>\n",
+       "      <th>liquidityParameter</th>\n",
+       "      <th>openingTimestamp</th>\n",
+       "      <th>outcomeTokenAmounts</th>\n",
+       "      <th>title</th>\n",
+       "      <th>sample_timestamp</th>\n",
+       "      <th>open</th>\n",
+       "      <th>total_trades</th>\n",
+       "      <th>dist_gap_perc</th>\n",
+       "      <th>votes_first_outcome_perc</th>\n",
+       "      <th>votes_second_outcome_perc</th>\n",
+       "      <th>first_outcome</th>\n",
+       "      <th>second_outcome</th>\n",
+       "      <th>first_token_perc</th>\n",
+       "      <th>second_token_perc</th>\n",
+       "      <th>mean_trade_size</th>\n",
+       "      <th>sample_datetime</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1722127095</td>\n",
+       "      <td>0x18386924426f7c8ab7f5db4ad586c12dac5cd5e3</td>\n",
+       "      <td>4965258435682032297</td>\n",
+       "      <td>7000000000000000009</td>\n",
+       "      <td>1722556800</td>\n",
+       "      <td>[2912373242574997426, 16824766579944358195]</td>\n",
+       "      <td>Will the new AI-powered upgrade for the Philip...</td>\n",
+       "      <td>1722442019</td>\n",
+       "      <td>False</td>\n",
+       "      <td>29</td>\n",
+       "      <td>19.72</td>\n",
+       "      <td>65.52</td>\n",
+       "      <td>34.48</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>85.24</td>\n",
+       "      <td>14.76</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2024-07-31 18:06:59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1722133525</td>\n",
+       "      <td>0x1f0f1fd3fcb3b49eeeb6197abcb5c44c1907dfbd</td>\n",
+       "      <td>6993447239584866547</td>\n",
+       "      <td>7000000000000000012</td>\n",
+       "      <td>1722556800</td>\n",
+       "      <td>[6703462178421126245, 7309655622095420488]</td>\n",
+       "      <td>Will Harvey Weinstein recover from Covid-19 an...</td>\n",
+       "      <td>1722442019</td>\n",
+       "      <td>False</td>\n",
+       "      <td>44</td>\n",
+       "      <td>11.48</td>\n",
+       "      <td>63.64</td>\n",
+       "      <td>36.36</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>52.16</td>\n",
+       "      <td>47.84</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2024-07-31 18:06:59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1722132875</td>\n",
+       "      <td>0x3725b8f54cc53b468cdc165ee10218344b607158</td>\n",
+       "      <td>6932346630944751276</td>\n",
+       "      <td>7000000000000000011</td>\n",
+       "      <td>1722556800</td>\n",
+       "      <td>[6087978352168369108, 8048648856076756352]</td>\n",
+       "      <td>Will Tesla's net income increase in the third ...</td>\n",
+       "      <td>1722442019</td>\n",
+       "      <td>False</td>\n",
+       "      <td>44</td>\n",
+       "      <td>4.66</td>\n",
+       "      <td>52.27</td>\n",
+       "      <td>47.73</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>56.93</td>\n",
+       "      <td>43.07</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2024-07-31 18:06:59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1722300340</td>\n",
+       "      <td>0x38d2b80cbd152b93a8df640a21d80e4b9d75039a</td>\n",
+       "      <td>6533756051198779116</td>\n",
+       "      <td>7000000000000000009</td>\n",
+       "      <td>1722729600</td>\n",
+       "      <td>[4808284238922480369, 10190745298156651455]</td>\n",
+       "      <td>Will SpaceX launch Falcon 9 rocket on 3 August...</td>\n",
+       "      <td>1722442019</td>\n",
+       "      <td>False</td>\n",
+       "      <td>27</td>\n",
+       "      <td>4.98</td>\n",
+       "      <td>62.96</td>\n",
+       "      <td>37.04</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>67.94</td>\n",
+       "      <td>32.06</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2024-07-31 18:06:59</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1722125375</td>\n",
+       "      <td>0x39e657d48714c483b7ee2bc9314e6c7ad63d2d79</td>\n",
+       "      <td>6999859700819864416</td>\n",
+       "      <td>7000000000000000015</td>\n",
+       "      <td>1722556800</td>\n",
+       "      <td>[7044460134742943173, 6955820469241400760]</td>\n",
+       "      <td>Will the wildfire in California be under contr...</td>\n",
+       "      <td>1722442019</td>\n",
+       "      <td>False</td>\n",
+       "      <td>61</td>\n",
+       "      <td>2.14</td>\n",
+       "      <td>47.54</td>\n",
+       "      <td>52.46</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>49.68</td>\n",
+       "      <td>50.32</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2024-07-31 18:06:59</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  creationTimestamp                                          id  \\\n",
+       "0        1722127095  0x18386924426f7c8ab7f5db4ad586c12dac5cd5e3   \n",
+       "1        1722133525  0x1f0f1fd3fcb3b49eeeb6197abcb5c44c1907dfbd   \n",
+       "2        1722132875  0x3725b8f54cc53b468cdc165ee10218344b607158   \n",
+       "3        1722300340  0x38d2b80cbd152b93a8df640a21d80e4b9d75039a   \n",
+       "4        1722125375  0x39e657d48714c483b7ee2bc9314e6c7ad63d2d79   \n",
+       "\n",
+       "      liquidityMeasure   liquidityParameter openingTimestamp  \\\n",
+       "0  4965258435682032297  7000000000000000009       1722556800   \n",
+       "1  6993447239584866547  7000000000000000012       1722556800   \n",
+       "2  6932346630944751276  7000000000000000011       1722556800   \n",
+       "3  6533756051198779116  7000000000000000009       1722729600   \n",
+       "4  6999859700819864416  7000000000000000015       1722556800   \n",
+       "\n",
+       "                           outcomeTokenAmounts  \\\n",
+       "0  [2912373242574997426, 16824766579944358195]   \n",
+       "1   [6703462178421126245, 7309655622095420488]   \n",
+       "2   [6087978352168369108, 8048648856076756352]   \n",
+       "3  [4808284238922480369, 10190745298156651455]   \n",
+       "4   [7044460134742943173, 6955820469241400760]   \n",
+       "\n",
+       "                                               title  sample_timestamp   open  \\\n",
+       "0  Will the new AI-powered upgrade for the Philip...        1722442019  False   \n",
+       "1  Will Harvey Weinstein recover from Covid-19 an...        1722442019  False   \n",
+       "2  Will Tesla's net income increase in the third ...        1722442019  False   \n",
+       "3  Will SpaceX launch Falcon 9 rocket on 3 August...        1722442019  False   \n",
+       "4  Will the wildfire in California be under contr...        1722442019  False   \n",
+       "\n",
+       "   total_trades  dist_gap_perc  votes_first_outcome_perc  \\\n",
+       "0            29          19.72                     65.52   \n",
+       "1            44          11.48                     63.64   \n",
+       "2            44           4.66                     52.27   \n",
+       "3            27           4.98                     62.96   \n",
+       "4            61           2.14                     47.54   \n",
+       "\n",
+       "   votes_second_outcome_perc first_outcome second_outcome  first_token_perc  \\\n",
+       "0                      34.48           Yes             No             85.24   \n",
+       "1                      36.36           Yes             No             52.16   \n",
+       "2                      47.73           Yes             No             56.93   \n",
+       "3                      37.04           Yes             No             67.94   \n",
+       "4                      52.46           Yes             No             49.68   \n",
+       "\n",
+       "   second_token_perc  mean_trade_size     sample_datetime  \n",
+       "0              14.76              NaN 2024-07-31 18:06:59  \n",
+       "1              47.84              NaN 2024-07-31 18:06:59  \n",
+       "2              43.07              NaN 2024-07-31 18:06:59  \n",
+       "3              32.06              NaN 2024-07-31 18:06:59  \n",
+       "4              50.32              NaN 2024-07-31 18:06:59  "
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "live_fpmms = pd.read_parquet('../live_data/markets_live_data.parquet')\n",
+    "live_fpmms.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 168 entries, 0 to 167\n",
+      "Data columns (total 19 columns):\n",
+      " #   Column                     Non-Null Count  Dtype         \n",
+      "---  ------                     --------------  -----         \n",
+      " 0   creationTimestamp          168 non-null    object        \n",
+      " 1   id                         168 non-null    object        \n",
+      " 2   liquidityMeasure           168 non-null    int64         \n",
+      " 3   liquidityParameter         168 non-null    object        \n",
+      " 4   openingTimestamp           168 non-null    object        \n",
+      " 5   outcomeTokenAmounts        168 non-null    object        \n",
+      " 6   title                      168 non-null    object        \n",
+      " 7   sample_timestamp           168 non-null    int64         \n",
+      " 8   open                       168 non-null    bool          \n",
+      " 9   total_trades               168 non-null    int64         \n",
+      " 10  dist_gap_perc              168 non-null    float64       \n",
+      " 11  votes_first_outcome_perc   168 non-null    float64       \n",
+      " 12  votes_second_outcome_perc  168 non-null    float64       \n",
+      " 13  first_outcome              168 non-null    object        \n",
+      " 14  second_outcome             168 non-null    object        \n",
+      " 15  first_token_perc           168 non-null    float64       \n",
+      " 16  second_token_perc          168 non-null    float64       \n",
+      " 17  mean_trade_size            84 non-null     float64       \n",
+      " 18  sample_datetime            126 non-null    datetime64[ns]\n",
+      "dtypes: bool(1), datetime64[ns](1), float64(6), int64(3), object(8)\n",
+      "memory usage: 23.9+ KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "live_fpmms.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "id = \"0xf2db83c7a5f926290fb93cebea810746cd674916\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_market = live_fpmms.loc[live_fpmms[\"id\"]==id]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>creationTimestamp</th>\n",
+       "      <th>id</th>\n",
+       "      <th>liquidityMeasure</th>\n",
+       "      <th>liquidityParameter</th>\n",
+       "      <th>openingTimestamp</th>\n",
+       "      <th>outcomeTokenAmounts</th>\n",
+       "      <th>title</th>\n",
+       "      <th>sample_timestamp</th>\n",
+       "      <th>open</th>\n",
+       "      <th>total_trades</th>\n",
+       "      <th>dist_gap_perc</th>\n",
+       "      <th>votes_first_outcome_perc</th>\n",
+       "      <th>votes_second_outcome_perc</th>\n",
+       "      <th>first_outcome</th>\n",
+       "      <th>second_outcome</th>\n",
+       "      <th>first_token_perc</th>\n",
+       "      <th>second_token_perc</th>\n",
+       "      <th>mean_trade_size</th>\n",
+       "      <th>sample_datetime</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>122</th>\n",
+       "      <td>1722571590</td>\n",
+       "      <td>0xf2db83c7a5f926290fb93cebea810746cd674916</td>\n",
+       "      <td>7000000000000000000</td>\n",
+       "      <td>7000000000000000000</td>\n",
+       "      <td>1722988800</td>\n",
+       "      <td>[7000000000000000000, 7000000000000000000]</td>\n",
+       "      <td>Will Donald Trump's campaign announce another ...</td>\n",
+       "      <td>1722593849</td>\n",
+       "      <td>True</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>50.00</td>\n",
+       "      <td>50.00</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>50.00</td>\n",
+       "      <td>50.00</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>2024-08-02 12:17:29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>166</th>\n",
+       "      <td>1722571590</td>\n",
+       "      <td>0xf2db83c7a5f926290fb93cebea810746cd674916</td>\n",
+       "      <td>6949985446986235988</td>\n",
+       "      <td>7000000000000000011</td>\n",
+       "      <td>1722988800</td>\n",
+       "      <td>[6209077712260007050, 7891671238587987896]</td>\n",
+       "      <td>Will Donald Trump's campaign announce another ...</td>\n",
+       "      <td>1722847693</td>\n",
+       "      <td>True</td>\n",
+       "      <td>39</td>\n",
+       "      <td>13.26</td>\n",
+       "      <td>69.23</td>\n",
+       "      <td>30.77</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>55.97</td>\n",
+       "      <td>44.03</td>\n",
+       "      <td>0.646436</td>\n",
+       "      <td>NaT</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    creationTimestamp                                          id  \\\n",
+       "122        1722571590  0xf2db83c7a5f926290fb93cebea810746cd674916   \n",
+       "166        1722571590  0xf2db83c7a5f926290fb93cebea810746cd674916   \n",
+       "\n",
+       "        liquidityMeasure   liquidityParameter openingTimestamp  \\\n",
+       "122  7000000000000000000  7000000000000000000       1722988800   \n",
+       "166  6949985446986235988  7000000000000000011       1722988800   \n",
+       "\n",
+       "                            outcomeTokenAmounts  \\\n",
+       "122  [7000000000000000000, 7000000000000000000]   \n",
+       "166  [6209077712260007050, 7891671238587987896]   \n",
+       "\n",
+       "                                                 title  sample_timestamp  \\\n",
+       "122  Will Donald Trump's campaign announce another ...        1722593849   \n",
+       "166  Will Donald Trump's campaign announce another ...        1722847693   \n",
+       "\n",
+       "     open  total_trades  dist_gap_perc  votes_first_outcome_perc  \\\n",
+       "122  True             0           0.00                     50.00   \n",
+       "166  True            39          13.26                     69.23   \n",
+       "\n",
+       "     votes_second_outcome_perc first_outcome second_outcome  first_token_perc  \\\n",
+       "122                      50.00           Yes             No             50.00   \n",
+       "166                      30.77           Yes             No             55.97   \n",
+       "\n",
+       "     second_token_perc  mean_trade_size     sample_datetime  \n",
+       "122              50.00         0.000000 2024-08-02 12:17:29  \n",
+       "166              44.03         0.646436                 NaT  "
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_market"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "hf_dashboards",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

scripts/live_markets_data.py CHANGED Viewed

@@ -232,6 +232,9 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
     fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
     fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
     fpmms["liquidityMeasure"] = fpmms["liquidityMeasure"].apply(lambda x: int(x))
     fpmms["total_tokens"] = fpmms.apply(
         lambda x: x.token_first_amount + x.token_second_amount, axis=1
     )

     fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
     fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
     fpmms["liquidityMeasure"] = fpmms["liquidityMeasure"].apply(lambda x: int(x))
+    fpmms["sample_datetime"] = fpmms["sample_timestamp"].apply(
+        lambda x: datetime.fromtimestamp(x)
+    )
     fpmms["total_tokens"] = fpmms.apply(
         lambda x: x.token_first_amount + x.token_second_amount, axis=1
     )

tabs/tokens_votes_dist.py CHANGED Viewed

@@ -4,6 +4,8 @@ import matplotlib.pyplot as plt
 import seaborn as sns
 from seaborn import FacetGrid
 import plotly.express as px
 def get_based_tokens_distribution(market_id: str, all_markets: pd.DataFrame):
@@ -58,3 +60,16 @@ def get_based_votes_distribution(market_id: str, all_markets: pd.DataFrame):
         labels=[first_outcome, second_outcome],
     )
     return gr.Plot(value=ax.figure)

 import seaborn as sns
 from seaborn import FacetGrid
 import plotly.express as px
+import logging
+from typing import Tuple
 def get_based_tokens_distribution(market_id: str, all_markets: pd.DataFrame):
         labels=[first_outcome, second_outcome],
     )
     return gr.Plot(value=ax.figure)
+def get_extreme_cases(live_fpmms: pd.DataFrame) -> Tuple:
+    """Function to return the id of the best and worst case according to the dist gap metric"""
+    # select markets with some trades
+    selected_markets = live_fpmms.loc[live_fpmms["total_trades"] > 0]
+    selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
+    return (
+        selected_markets.iloc[-1].id,
+        selected_markets.iloc[-1].dist_gap_perc,
+        selected_markets.iloc[0].id,
+        selected_markets.iloc[0].dist_gap_perc,
+    )