cyberosa commited on
Commit
52d1750
·
1 Parent(s): d41146f

Adding divergence graph

Browse files
app.py CHANGED
@@ -14,9 +14,11 @@ from tabs.trader_plots import (
14
  plot_trader_metrics_by_trader_type,
15
  default_trader_metric,
16
  trader_metric_choices,
17
- get_trader_metrics_text,
18
  )
19
 
 
 
20
 
21
  def get_logger():
22
  logger = logging.getLogger(__name__)
@@ -37,7 +39,7 @@ logger = get_logger()
37
 
38
  def get_all_data():
39
  """
40
- Get parquet file from weekly stats
41
  """
42
  logger.info("Getting traders data")
43
  con = duckdb.connect(":memory:")
@@ -49,14 +51,22 @@ def get_all_data():
49
  df1 = con.execute(query1).fetchdf()
50
  logger.info("Got all data from all_trades_profitability.parquet")
51
 
 
 
 
 
 
 
 
 
52
  con.close()
53
 
54
- return df1
55
 
56
 
57
  def prepare_data():
58
 
59
- all_trades = get_all_data()
60
 
61
  all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date
62
 
@@ -81,10 +91,14 @@ def prepare_data():
81
  trader_agents_data["month_year_week"] = (
82
  trader_agents_data["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d")
83
  )
84
- return trader_agents_data
 
 
 
 
85
 
86
 
87
- trader_agents_data = prepare_data()
88
  print("trader agents data before computing metrics")
89
  print(trader_agents_data.head())
90
  demo = gr.Blocks()
@@ -122,7 +136,7 @@ with demo:
122
  traders_df=weekly_metrics_by_market_creator,
123
  )
124
  with gr.Column(scale=1):
125
- trade_details_text = get_trader_metrics_text()
126
 
127
  def update_trader_details(trader_detail):
128
  return plot_trader_metrics_by_market_creator(
@@ -154,7 +168,7 @@ with demo:
154
  traders_df=weekly_metrics_by_trader_type,
155
  )
156
  with gr.Column(scale=1):
157
- trader_metrics_text = get_trader_metrics_text()
158
 
159
  def update_trader_metric(trader_metric):
160
  return plot_trader_metrics_by_trader_type(
@@ -167,5 +181,19 @@ with demo:
167
  inputs=trader_metric_selector,
168
  outputs=trader_type_plot,
169
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  demo.queue(default_concurrency_limit=40).launch()
 
14
  plot_trader_metrics_by_trader_type,
15
  default_trader_metric,
16
  trader_metric_choices,
17
+ get_metrics_text,
18
  )
19
 
20
+ from tabs.market_plots import plot_kl_div_per_market
21
+
22
 
23
  def get_logger():
24
  logger = logging.getLogger(__name__)
 
39
 
40
  def get_all_data():
41
  """
42
+ Get parquet files from weekly stats and new generated
43
  """
44
  logger.info("Getting traders data")
45
  con = duckdb.connect(":memory:")
 
51
  df1 = con.execute(query1).fetchdf()
52
  logger.info("Got all data from all_trades_profitability.parquet")
53
 
54
+ # Query to fetch data from closed_markets_div.parquet
55
+ query2 = f"""
56
+ SELECT *
57
+ FROM read_parquet('./data/closed_markets_div.parquet')
58
+ """
59
+ df2 = con.execute(query2).fetchdf()
60
+ logger.info("Got all data from closed_markets_div.parquet")
61
+
62
  con.close()
63
 
64
+ return df1, df2
65
 
66
 
67
  def prepare_data():
68
 
69
+ all_trades, closed_markets = get_all_data()
70
 
71
  all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date
72
 
 
91
  trader_agents_data["month_year_week"] = (
92
  trader_agents_data["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d")
93
  )
94
+
95
+ closed_markets["month_year_week"] = (
96
+ closed_markets["opening_datetime"].dt.to_period("W").dt.strftime("%b-%d")
97
+ )
98
+ return trader_agents_data, closed_markets
99
 
100
 
101
+ trader_agents_data, closed_markets = prepare_data()
102
  print("trader agents data before computing metrics")
103
  print(trader_agents_data.head())
104
  demo = gr.Blocks()
 
136
  traders_df=weekly_metrics_by_market_creator,
137
  )
138
  with gr.Column(scale=1):
139
+ trade_details_text = get_metrics_text()
140
 
141
  def update_trader_details(trader_detail):
142
  return plot_trader_metrics_by_market_creator(
 
168
  traders_df=weekly_metrics_by_trader_type,
169
  )
170
  with gr.Column(scale=1):
171
+ trader_metrics_text = get_metrics_text()
172
 
173
  def update_trader_metric(trader_metric):
174
  return plot_trader_metrics_by_trader_type(
 
181
  inputs=trader_metric_selector,
182
  outputs=trader_type_plot,
183
  )
184
+ with gr.TabItem("📉Closed Markets Kullback–Leibler divergence"):
185
+ with gr.Row():
186
+ gr.Markdown(
187
+ "# Weekly Kullback–Leibler divergence computed for the closed markets"
188
+ )
189
+ with gr.Row():
190
+ gr.Markdown(
191
+ "This divergence is a type of statistical distance between two probability distributions P and Q. In our case P is the probability defined by the final liquidity distribution of the market. While Q is the distribution of the final outcome."
192
+ )
193
+ with gr.Row():
194
+ with gr.Column(scale=3):
195
+ kl_div_plot = plot_kl_div_per_market(closed_markets=closed_markets)
196
+ with gr.Column(scale=1):
197
+ metrics_text = get_metrics_text()
198
 
199
  demo.queue(default_concurrency_limit=40).launch()
data/closed_markets_div.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01028e48165f8e468cd377da59e13da584a0938cdc64549dee2a1c523d6e1b13
3
+ size 48695
data/fpmms.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86135bb64013c54d5180c31fca13235943eb39571e760a695dac2aaa1e9cb1ce
3
+ size 436427
notebooks/closed_markets.ipynb ADDED
@@ -0,0 +1,1481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd"
10
+ ]
11
+ },
12
+ {
13
+ "cell_type": "code",
14
+ "execution_count": 20,
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "try:\n",
19
+ " markets = pd.read_parquet(\"../data/fpmms.parquet\")\n",
20
+ "except Exception:\n",
21
+ " print(\"Error reading the parquet file\")\n",
22
+ "\n",
23
+ "markets[\"currentAnswer\"] = markets[\"currentAnswer\"].apply(lambda x: x.lower())\n",
24
+ "# filter only markets with yes, no answers\n",
25
+ "valid_answers = [\"yes\", \"no\"]\n",
26
+ "markets = markets.loc[markets[\"currentAnswer\"].isin(valid_answers)]"
27
+ ]
28
+ },
29
+ {
30
+ "cell_type": "code",
31
+ "execution_count": 3,
32
+ "metadata": {},
33
+ "outputs": [
34
+ {
35
+ "data": {
36
+ "text/plain": [
37
+ "4686"
38
+ ]
39
+ },
40
+ "execution_count": 3,
41
+ "metadata": {},
42
+ "output_type": "execute_result"
43
+ }
44
+ ],
45
+ "source": [
46
+ "len(markets)"
47
+ ]
48
+ },
49
+ {
50
+ "cell_type": "code",
51
+ "execution_count": 4,
52
+ "metadata": {},
53
+ "outputs": [
54
+ {
55
+ "data": {
56
+ "text/plain": [
57
+ "4686"
58
+ ]
59
+ },
60
+ "execution_count": 4,
61
+ "metadata": {},
62
+ "output_type": "execute_result"
63
+ }
64
+ ],
65
+ "source": [
66
+ "len(markets.id.unique())"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "code",
71
+ "execution_count": 5,
72
+ "metadata": {},
73
+ "outputs": [
74
+ {
75
+ "data": {
76
+ "text/html": [
77
+ "<div>\n",
78
+ "<style scoped>\n",
79
+ " .dataframe tbody tr th:only-of-type {\n",
80
+ " vertical-align: middle;\n",
81
+ " }\n",
82
+ "\n",
83
+ " .dataframe tbody tr th {\n",
84
+ " vertical-align: top;\n",
85
+ " }\n",
86
+ "\n",
87
+ " .dataframe thead th {\n",
88
+ " text-align: right;\n",
89
+ " }\n",
90
+ "</style>\n",
91
+ "<table border=\"1\" class=\"dataframe\">\n",
92
+ " <thead>\n",
93
+ " <tr style=\"text-align: right;\">\n",
94
+ " <th></th>\n",
95
+ " <th>currentAnswer</th>\n",
96
+ " <th>id</th>\n",
97
+ " <th>title</th>\n",
98
+ " <th>market_creator</th>\n",
99
+ " </tr>\n",
100
+ " </thead>\n",
101
+ " <tbody>\n",
102
+ " <tr>\n",
103
+ " <th>0</th>\n",
104
+ " <td>no</td>\n",
105
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
106
+ " <td>Will the first floating offshore wind research...</td>\n",
107
+ " <td>quickstart</td>\n",
108
+ " </tr>\n",
109
+ " <tr>\n",
110
+ " <th>1</th>\n",
111
+ " <td>no</td>\n",
112
+ " <td>0x0020d13c89140b47e10db54cbd53852b90bc1391</td>\n",
113
+ " <td>Will the Francis Scott Key Bridge in Baltimore...</td>\n",
114
+ " <td>quickstart</td>\n",
115
+ " </tr>\n",
116
+ " <tr>\n",
117
+ " <th>2</th>\n",
118
+ " <td>no</td>\n",
119
+ " <td>0x003ae5e007cc38b3f86b0ed7c82f938a1285ac07</td>\n",
120
+ " <td>Will FC Saarbrucken reach the final of the Ger...</td>\n",
121
+ " <td>quickstart</td>\n",
122
+ " </tr>\n",
123
+ " <tr>\n",
124
+ " <th>3</th>\n",
125
+ " <td>yes</td>\n",
126
+ " <td>0x004c8d4c619dc6b9caa940f5ea7ef699ae85359c</td>\n",
127
+ " <td>Will the pro-life activists convicted for 'con...</td>\n",
128
+ " <td>quickstart</td>\n",
129
+ " </tr>\n",
130
+ " <tr>\n",
131
+ " <th>4</th>\n",
132
+ " <td>yes</td>\n",
133
+ " <td>0x005e3f7a90585acbec807425a750fbba1d0c2b5c</td>\n",
134
+ " <td>Will Apple announce the release of a new M4 ch...</td>\n",
135
+ " <td>quickstart</td>\n",
136
+ " </tr>\n",
137
+ " </tbody>\n",
138
+ "</table>\n",
139
+ "</div>"
140
+ ],
141
+ "text/plain": [
142
+ " currentAnswer id \\\n",
143
+ "0 no 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 \n",
144
+ "1 no 0x0020d13c89140b47e10db54cbd53852b90bc1391 \n",
145
+ "2 no 0x003ae5e007cc38b3f86b0ed7c82f938a1285ac07 \n",
146
+ "3 yes 0x004c8d4c619dc6b9caa940f5ea7ef699ae85359c \n",
147
+ "4 yes 0x005e3f7a90585acbec807425a750fbba1d0c2b5c \n",
148
+ "\n",
149
+ " title market_creator \n",
150
+ "0 Will the first floating offshore wind research... quickstart \n",
151
+ "1 Will the Francis Scott Key Bridge in Baltimore... quickstart \n",
152
+ "2 Will FC Saarbrucken reach the final of the Ger... quickstart \n",
153
+ "3 Will the pro-life activists convicted for 'con... quickstart \n",
154
+ "4 Will Apple announce the release of a new M4 ch... quickstart "
155
+ ]
156
+ },
157
+ "execution_count": 5,
158
+ "metadata": {},
159
+ "output_type": "execute_result"
160
+ }
161
+ ],
162
+ "source": [
163
+ "markets.head()"
164
+ ]
165
+ },
166
+ {
167
+ "cell_type": "code",
168
+ "execution_count": null,
169
+ "metadata": {},
170
+ "outputs": [],
171
+ "source": []
172
+ },
173
+ {
174
+ "cell_type": "code",
175
+ "execution_count": 6,
176
+ "metadata": {},
177
+ "outputs": [],
178
+ "source": [
179
+ "trades = pd.read_parquet(\"../data/fpmmTrades.parquet\")"
180
+ ]
181
+ },
182
+ {
183
+ "cell_type": "code",
184
+ "execution_count": 7,
185
+ "metadata": {},
186
+ "outputs": [
187
+ {
188
+ "data": {
189
+ "text/html": [
190
+ "<div>\n",
191
+ "<style scoped>\n",
192
+ " .dataframe tbody tr th:only-of-type {\n",
193
+ " vertical-align: middle;\n",
194
+ " }\n",
195
+ "\n",
196
+ " .dataframe tbody tr th {\n",
197
+ " vertical-align: top;\n",
198
+ " }\n",
199
+ "\n",
200
+ " .dataframe thead th {\n",
201
+ " text-align: right;\n",
202
+ " }\n",
203
+ "</style>\n",
204
+ "<table border=\"1\" class=\"dataframe\">\n",
205
+ " <thead>\n",
206
+ " <tr style=\"text-align: right;\">\n",
207
+ " <th></th>\n",
208
+ " <th>collateralAmount</th>\n",
209
+ " <th>collateralAmountUSD</th>\n",
210
+ " <th>collateralToken</th>\n",
211
+ " <th>creationTimestamp</th>\n",
212
+ " <th>trader_address</th>\n",
213
+ " <th>feeAmount</th>\n",
214
+ " <th>id</th>\n",
215
+ " <th>oldOutcomeTokenMarginalPrice</th>\n",
216
+ " <th>outcomeIndex</th>\n",
217
+ " <th>outcomeTokenMarginalPrice</th>\n",
218
+ " <th>...</th>\n",
219
+ " <th>market_creator</th>\n",
220
+ " <th>fpmm.answerFinalizedTimestamp</th>\n",
221
+ " <th>fpmm.arbitrationOccurred</th>\n",
222
+ " <th>fpmm.currentAnswer</th>\n",
223
+ " <th>fpmm.id</th>\n",
224
+ " <th>fpmm.isPendingArbitration</th>\n",
225
+ " <th>fpmm.openingTimestamp</th>\n",
226
+ " <th>fpmm.outcomes</th>\n",
227
+ " <th>fpmm.title</th>\n",
228
+ " <th>fpmm.condition.id</th>\n",
229
+ " </tr>\n",
230
+ " </thead>\n",
231
+ " <tbody>\n",
232
+ " <tr>\n",
233
+ " <th>0</th>\n",
234
+ " <td>450426474650738688</td>\n",
235
+ " <td>0.4504269694034145716308073094168006</td>\n",
236
+ " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
237
+ " <td>1724553455</td>\n",
238
+ " <td>0x022b36c50b85b8ae7addfb8a35d76c59d5814834</td>\n",
239
+ " <td>9008529493014773</td>\n",
240
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x02...</td>\n",
241
+ " <td>0.592785210609610270634125335572129</td>\n",
242
+ " <td>1</td>\n",
243
+ " <td>0.6171295391012242250994586583534301</td>\n",
244
+ " <td>...</td>\n",
245
+ " <td>quickstart</td>\n",
246
+ " <td>1725071760</td>\n",
247
+ " <td>False</td>\n",
248
+ " <td>0x00000000000000000000000000000000000000000000...</td>\n",
249
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
250
+ " <td>False</td>\n",
251
+ " <td>1724976000</td>\n",
252
+ " <td>[Yes, No]</td>\n",
253
+ " <td>Will the first floating offshore wind research...</td>\n",
254
+ " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
255
+ " </tr>\n",
256
+ " <tr>\n",
257
+ " <th>1</th>\n",
258
+ " <td>610163214546941400</td>\n",
259
+ " <td>0.6101636232215150135654007337015298</td>\n",
260
+ " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
261
+ " <td>1724811940</td>\n",
262
+ " <td>0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736</td>\n",
263
+ " <td>12203264290938828</td>\n",
264
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x03...</td>\n",
265
+ " <td>0.842992636523755061934822129394812</td>\n",
266
+ " <td>1</td>\n",
267
+ " <td>0.8523396372892128845826889719620915</td>\n",
268
+ " <td>...</td>\n",
269
+ " <td>quickstart</td>\n",
270
+ " <td>1725071760</td>\n",
271
+ " <td>False</td>\n",
272
+ " <td>0x00000000000000000000000000000000000000000000...</td>\n",
273
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
274
+ " <td>False</td>\n",
275
+ " <td>1724976000</td>\n",
276
+ " <td>[Yes, No]</td>\n",
277
+ " <td>Will the first floating offshore wind research...</td>\n",
278
+ " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
279
+ " </tr>\n",
280
+ " <tr>\n",
281
+ " <th>2</th>\n",
282
+ " <td>789065092332460672</td>\n",
283
+ " <td>0.7890644120527324071908793822796086</td>\n",
284
+ " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
285
+ " <td>1724815755</td>\n",
286
+ " <td>0x09e9d42a029e8b0c2df3871709a762117a681d92</td>\n",
287
+ " <td>15781301846649213</td>\n",
288
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09...</td>\n",
289
+ " <td>0.7983775743712442891104598770339028</td>\n",
290
+ " <td>1</td>\n",
291
+ " <td>0.8152123711444691659642000374025623</td>\n",
292
+ " <td>...</td>\n",
293
+ " <td>quickstart</td>\n",
294
+ " <td>1725071760</td>\n",
295
+ " <td>False</td>\n",
296
+ " <td>0x00000000000000000000000000000000000000000000...</td>\n",
297
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
298
+ " <td>False</td>\n",
299
+ " <td>1724976000</td>\n",
300
+ " <td>[Yes, No]</td>\n",
301
+ " <td>Will the first floating offshore wind research...</td>\n",
302
+ " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
303
+ " </tr>\n",
304
+ " <tr>\n",
305
+ " <th>3</th>\n",
306
+ " <td>1000000000000000000</td>\n",
307
+ " <td>1.000000605383660329048491794939126</td>\n",
308
+ " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
309
+ " <td>1724546620</td>\n",
310
+ " <td>0x09e9d42a029e8b0c2df3871709a762117a681d92</td>\n",
311
+ " <td>20000000000000000</td>\n",
312
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09...</td>\n",
313
+ " <td>0.5110745907733438805447072252622708</td>\n",
314
+ " <td>1</td>\n",
315
+ " <td>0.5746805204222762335911904727318937</td>\n",
316
+ " <td>...</td>\n",
317
+ " <td>quickstart</td>\n",
318
+ " <td>1725071760</td>\n",
319
+ " <td>False</td>\n",
320
+ " <td>0x00000000000000000000000000000000000000000000...</td>\n",
321
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
322
+ " <td>False</td>\n",
323
+ " <td>1724976000</td>\n",
324
+ " <td>[Yes, No]</td>\n",
325
+ " <td>Will the first floating offshore wind research...</td>\n",
326
+ " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
327
+ " </tr>\n",
328
+ " <tr>\n",
329
+ " <th>4</th>\n",
330
+ " <td>100000000000000000</td>\n",
331
+ " <td>0.1000004271262862419547394646567906</td>\n",
332
+ " <td>0xe91d153e0b41518a2ce8dd3d7944fa863463a97d</td>\n",
333
+ " <td>1724771260</td>\n",
334
+ " <td>0x0d049dcaece0ecb6fc81a460da7bcc2a4785d6e5</td>\n",
335
+ " <td>2000000000000000</td>\n",
336
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x0d...</td>\n",
337
+ " <td>0.2713968218662319388988681987389408</td>\n",
338
+ " <td>0</td>\n",
339
+ " <td>0.2804586217805511523845593360379658</td>\n",
340
+ " <td>...</td>\n",
341
+ " <td>quickstart</td>\n",
342
+ " <td>1725071760</td>\n",
343
+ " <td>False</td>\n",
344
+ " <td>0x00000000000000000000000000000000000000000000...</td>\n",
345
+ " <td>0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5</td>\n",
346
+ " <td>False</td>\n",
347
+ " <td>1724976000</td>\n",
348
+ " <td>[Yes, No]</td>\n",
349
+ " <td>Will the first floating offshore wind research...</td>\n",
350
+ " <td>0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1...</td>\n",
351
+ " </tr>\n",
352
+ " </tbody>\n",
353
+ "</table>\n",
354
+ "<p>5 rows × 24 columns</p>\n",
355
+ "</div>"
356
+ ],
357
+ "text/plain": [
358
+ " collateralAmount collateralAmountUSD \\\n",
359
+ "0 450426474650738688 0.4504269694034145716308073094168006 \n",
360
+ "1 610163214546941400 0.6101636232215150135654007337015298 \n",
361
+ "2 789065092332460672 0.7890644120527324071908793822796086 \n",
362
+ "3 1000000000000000000 1.000000605383660329048491794939126 \n",
363
+ "4 100000000000000000 0.1000004271262862419547394646567906 \n",
364
+ "\n",
365
+ " collateralToken creationTimestamp \\\n",
366
+ "0 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724553455 \n",
367
+ "1 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724811940 \n",
368
+ "2 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724815755 \n",
369
+ "3 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724546620 \n",
370
+ "4 0xe91d153e0b41518a2ce8dd3d7944fa863463a97d 1724771260 \n",
371
+ "\n",
372
+ " trader_address feeAmount \\\n",
373
+ "0 0x022b36c50b85b8ae7addfb8a35d76c59d5814834 9008529493014773 \n",
374
+ "1 0x034c4ad84f7ac6638bf19300d5bbe7d9b981e736 12203264290938828 \n",
375
+ "2 0x09e9d42a029e8b0c2df3871709a762117a681d92 15781301846649213 \n",
376
+ "3 0x09e9d42a029e8b0c2df3871709a762117a681d92 20000000000000000 \n",
377
+ "4 0x0d049dcaece0ecb6fc81a460da7bcc2a4785d6e5 2000000000000000 \n",
378
+ "\n",
379
+ " id \\\n",
380
+ "0 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x02... \n",
381
+ "1 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x03... \n",
382
+ "2 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09... \n",
383
+ "3 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x09... \n",
384
+ "4 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f50x0d... \n",
385
+ "\n",
386
+ " oldOutcomeTokenMarginalPrice outcomeIndex \\\n",
387
+ "0 0.592785210609610270634125335572129 1 \n",
388
+ "1 0.842992636523755061934822129394812 1 \n",
389
+ "2 0.7983775743712442891104598770339028 1 \n",
390
+ "3 0.5110745907733438805447072252622708 1 \n",
391
+ "4 0.2713968218662319388988681987389408 0 \n",
392
+ "\n",
393
+ " outcomeTokenMarginalPrice ... market_creator \\\n",
394
+ "0 0.6171295391012242250994586583534301 ... quickstart \n",
395
+ "1 0.8523396372892128845826889719620915 ... quickstart \n",
396
+ "2 0.8152123711444691659642000374025623 ... quickstart \n",
397
+ "3 0.5746805204222762335911904727318937 ... quickstart \n",
398
+ "4 0.2804586217805511523845593360379658 ... quickstart \n",
399
+ "\n",
400
+ " fpmm.answerFinalizedTimestamp fpmm.arbitrationOccurred \\\n",
401
+ "0 1725071760 False \n",
402
+ "1 1725071760 False \n",
403
+ "2 1725071760 False \n",
404
+ "3 1725071760 False \n",
405
+ "4 1725071760 False \n",
406
+ "\n",
407
+ " fpmm.currentAnswer \\\n",
408
+ "0 0x00000000000000000000000000000000000000000000... \n",
409
+ "1 0x00000000000000000000000000000000000000000000... \n",
410
+ "2 0x00000000000000000000000000000000000000000000... \n",
411
+ "3 0x00000000000000000000000000000000000000000000... \n",
412
+ "4 0x00000000000000000000000000000000000000000000... \n",
413
+ "\n",
414
+ " fpmm.id fpmm.isPendingArbitration \\\n",
415
+ "0 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
416
+ "1 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
417
+ "2 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
418
+ "3 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
419
+ "4 0x0017cd58d6a7ee1451388c7d5b1051b4c0a041f5 False \n",
420
+ "\n",
421
+ " fpmm.openingTimestamp fpmm.outcomes \\\n",
422
+ "0 1724976000 [Yes, No] \n",
423
+ "1 1724976000 [Yes, No] \n",
424
+ "2 1724976000 [Yes, No] \n",
425
+ "3 1724976000 [Yes, No] \n",
426
+ "4 1724976000 [Yes, No] \n",
427
+ "\n",
428
+ " fpmm.title \\\n",
429
+ "0 Will the first floating offshore wind research... \n",
430
+ "1 Will the first floating offshore wind research... \n",
431
+ "2 Will the first floating offshore wind research... \n",
432
+ "3 Will the first floating offshore wind research... \n",
433
+ "4 Will the first floating offshore wind research... \n",
434
+ "\n",
435
+ " fpmm.condition.id \n",
436
+ "0 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
437
+ "1 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
438
+ "2 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
439
+ "3 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
440
+ "4 0x0e940f12f30e928e4879c52d065d9da739a3d3f020d1... \n",
441
+ "\n",
442
+ "[5 rows x 24 columns]"
443
+ ]
444
+ },
445
+ "execution_count": 7,
446
+ "metadata": {},
447
+ "output_type": "execute_result"
448
+ }
449
+ ],
450
+ "source": [
451
+ "trades.head()"
452
+ ]
453
+ },
454
+ {
455
+ "cell_type": "code",
456
+ "execution_count": 9,
457
+ "metadata": {},
458
+ "outputs": [
459
+ {
460
+ "name": "stdout",
461
+ "output_type": "stream",
462
+ "text": [
463
+ "<class 'pandas.core.frame.DataFrame'>\n",
464
+ "RangeIndex: 26835 entries, 0 to 26834\n",
465
+ "Data columns (total 24 columns):\n",
466
+ " # Column Non-Null Count Dtype \n",
467
+ "--- ------ -------------- ----- \n",
468
+ " 0 collateralAmount 26835 non-null object\n",
469
+ " 1 collateralAmountUSD 26835 non-null object\n",
470
+ " 2 collateralToken 26835 non-null object\n",
471
+ " 3 creationTimestamp 26835 non-null object\n",
472
+ " 4 trader_address 26835 non-null object\n",
473
+ " 5 feeAmount 26835 non-null object\n",
474
+ " 6 id 26835 non-null object\n",
475
+ " 7 oldOutcomeTokenMarginalPrice 26835 non-null object\n",
476
+ " 8 outcomeIndex 26835 non-null object\n",
477
+ " 9 outcomeTokenMarginalPrice 26835 non-null object\n",
478
+ " 10 outcomeTokensTraded 26835 non-null object\n",
479
+ " 11 title 26835 non-null object\n",
480
+ " 12 transactionHash 26835 non-null object\n",
481
+ " 13 type 26835 non-null object\n",
482
+ " 14 market_creator 26835 non-null object\n",
483
+ " 15 fpmm.answerFinalizedTimestamp 24829 non-null object\n",
484
+ " 16 fpmm.arbitrationOccurred 26835 non-null bool \n",
485
+ " 17 fpmm.currentAnswer 24829 non-null object\n",
486
+ " 18 fpmm.id 26835 non-null object\n",
487
+ " 19 fpmm.isPendingArbitration 26835 non-null bool \n",
488
+ " 20 fpmm.openingTimestamp 26835 non-null object\n",
489
+ " 21 fpmm.outcomes 26835 non-null object\n",
490
+ " 22 fpmm.title 26835 non-null object\n",
491
+ " 23 fpmm.condition.id 26835 non-null object\n",
492
+ "dtypes: bool(2), object(22)\n",
493
+ "memory usage: 4.6+ MB\n"
494
+ ]
495
+ }
496
+ ],
497
+ "source": [
498
+ "trades.info()"
499
+ ]
500
+ },
501
+ {
502
+ "cell_type": "code",
503
+ "execution_count": 19,
504
+ "metadata": {},
505
+ "outputs": [
506
+ {
507
+ "data": {
508
+ "text/plain": [
509
+ "Index(['collateralAmount', 'collateralAmountUSD', 'collateralToken',\n",
510
+ " 'creationTimestamp', 'trader_address', 'feeAmount', 'id',\n",
511
+ " 'oldOutcomeTokenMarginalPrice', 'outcomeIndex',\n",
512
+ " 'outcomeTokenMarginalPrice', 'outcomeTokensTraded', 'title',\n",
513
+ " 'transactionHash', 'type', 'market_creator',\n",
514
+ " 'fpmm.answerFinalizedTimestamp', 'fpmm.arbitrationOccurred',\n",
515
+ " 'fpmm.currentAnswer', 'fpmm.id', 'fpmm.isPendingArbitration',\n",
516
+ " 'fpmm.openingTimestamp', 'fpmm.outcomes', 'fpmm.title',\n",
517
+ " 'fpmm.condition.id'],\n",
518
+ " dtype='object')"
519
+ ]
520
+ },
521
+ "execution_count": 19,
522
+ "metadata": {},
523
+ "output_type": "execute_result"
524
+ }
525
+ ],
526
+ "source": [
527
+ "trades.columns"
528
+ ]
529
+ },
530
+ {
531
+ "cell_type": "code",
532
+ "execution_count": 11,
533
+ "metadata": {},
534
+ "outputs": [],
535
+ "source": [
536
+ "markets = list(trades[\"fpmm.id\"].unique())"
537
+ ]
538
+ },
539
+ {
540
+ "cell_type": "code",
541
+ "execution_count": 12,
542
+ "metadata": {},
543
+ "outputs": [
544
+ {
545
+ "data": {
546
+ "text/plain": [
547
+ "803"
548
+ ]
549
+ },
550
+ "execution_count": 12,
551
+ "metadata": {},
552
+ "output_type": "execute_result"
553
+ }
554
+ ],
555
+ "source": [
556
+ "len(markets)"
557
+ ]
558
+ },
559
+ {
560
+ "cell_type": "code",
561
+ "execution_count": 50,
562
+ "metadata": {},
563
+ "outputs": [
564
+ {
565
+ "name": "stderr",
566
+ "output_type": "stream",
567
+ "text": [
568
+ "/var/folders/gp/02mb1d514ng739czlxw1lhh00000gn/T/ipykernel_3094/2495807215.py:12: SettingWithCopyWarning: \n",
569
+ "A value is trying to be set on a copy of a slice from a DataFrame\n",
570
+ "\n",
571
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
572
+ " trade_markets.rename(\n"
573
+ ]
574
+ }
575
+ ],
576
+ "source": [
577
+ "from datetime import datetime\n",
578
+ "INVALID_ANSWER_HEX = (\n",
579
+ " \"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\"\n",
580
+ ")\n",
581
+ "columns_of_interest = [\n",
582
+ " \"fpmm.currentAnswer\",\n",
583
+ " \"fpmm.id\",\n",
584
+ " \"fpmm.openingTimestamp\",\n",
585
+ " \"market_creator\",\n",
586
+ " ]\n",
587
+ "trade_markets = trades[columns_of_interest]\n",
588
+ "trade_markets.rename(\n",
589
+ " columns={\n",
590
+ " \"fpmm.currentAnswer\": \"currentAnswer\",\n",
591
+ " \"fpmm.openingTimestamp\": \"openingTimestamp\",\n",
592
+ " \"fpmm.id\": \"id\",\n",
593
+ " },\n",
594
+ " inplace=True,\n",
595
+ ")\n",
596
+ "trade_markets = trade_markets.drop_duplicates(subset=['id'], keep='last')\n",
597
+ "# remove invalid answers\n",
598
+ "\n",
599
+ "trade_markets = trade_markets.loc[trade_markets[\"currentAnswer\"]!= INVALID_ANSWER_HEX]\n",
600
+ "trade_markets[\"currentAnswer\"] = trade_markets[\"currentAnswer\"].apply(\n",
601
+ " lambda x: convert_hex_to_int(x)\n",
602
+ ")\n",
603
+ "trade_markets[\"opening_datetime\"] = trade_markets[\"openingTimestamp\"].apply(\n",
604
+ " lambda x: datetime.fromtimestamp(int(x))\n",
605
+ ")\n",
606
+ "trade_markets = trade_markets.sort_values(by=\"opening_datetime\", ascending=True)"
607
+ ]
608
+ },
609
+ {
610
+ "cell_type": "code",
611
+ "execution_count": 63,
612
+ "metadata": {},
613
+ "outputs": [
614
+ {
615
+ "data": {
616
+ "text/plain": [
617
+ "648"
618
+ ]
619
+ },
620
+ "execution_count": 63,
621
+ "metadata": {},
622
+ "output_type": "execute_result"
623
+ }
624
+ ],
625
+ "source": [
626
+ "len(trade_markets.id.unique())"
627
+ ]
628
+ },
629
+ {
630
+ "cell_type": "code",
631
+ "execution_count": 64,
632
+ "metadata": {},
633
+ "outputs": [
634
+ {
635
+ "data": {
636
+ "text/plain": [
637
+ "648"
638
+ ]
639
+ },
640
+ "execution_count": 64,
641
+ "metadata": {},
642
+ "output_type": "execute_result"
643
+ }
644
+ ],
645
+ "source": [
646
+ "len(trade_markets)"
647
+ ]
648
+ },
649
+ {
650
+ "cell_type": "code",
651
+ "execution_count": 52,
652
+ "metadata": {},
653
+ "outputs": [
654
+ {
655
+ "data": {
656
+ "text/html": [
657
+ "<div>\n",
658
+ "<style scoped>\n",
659
+ " .dataframe tbody tr th:only-of-type {\n",
660
+ " vertical-align: middle;\n",
661
+ " }\n",
662
+ "\n",
663
+ " .dataframe tbody tr th {\n",
664
+ " vertical-align: top;\n",
665
+ " }\n",
666
+ "\n",
667
+ " .dataframe thead th {\n",
668
+ " text-align: right;\n",
669
+ " }\n",
670
+ "</style>\n",
671
+ "<table border=\"1\" class=\"dataframe\">\n",
672
+ " <thead>\n",
673
+ " <tr style=\"text-align: right;\">\n",
674
+ " <th></th>\n",
675
+ " <th>currentAnswer</th>\n",
676
+ " <th>id</th>\n",
677
+ " <th>openingTimestamp</th>\n",
678
+ " <th>market_creator</th>\n",
679
+ " <th>opening_datetime</th>\n",
680
+ " </tr>\n",
681
+ " </thead>\n",
682
+ " <tbody>\n",
683
+ " <tr>\n",
684
+ " <th>15736</th>\n",
685
+ " <td>NaN</td>\n",
686
+ " <td>0x92ed80e541f642b564f992245abe640282dd273c</td>\n",
687
+ " <td>1727568000</td>\n",
688
+ " <td>quickstart</td>\n",
689
+ " <td>2024-09-29 02:00:00</td>\n",
690
+ " </tr>\n",
691
+ " <tr>\n",
692
+ " <th>6272</th>\n",
693
+ " <td>NaN</td>\n",
694
+ " <td>0x4002481fe7bc39c1baa4b5988c038da13ed05832</td>\n",
695
+ " <td>1727568000</td>\n",
696
+ " <td>quickstart</td>\n",
697
+ " <td>2024-09-29 02:00:00</td>\n",
698
+ " </tr>\n",
699
+ " <tr>\n",
700
+ " <th>24383</th>\n",
701
+ " <td>NaN</td>\n",
702
+ " <td>0xf820d06509027c309b00cd386055982d9bea0c10</td>\n",
703
+ " <td>1727568000</td>\n",
704
+ " <td>quickstart</td>\n",
705
+ " <td>2024-09-29 02:00:00</td>\n",
706
+ " </tr>\n",
707
+ " <tr>\n",
708
+ " <th>12418</th>\n",
709
+ " <td>NaN</td>\n",
710
+ " <td>0x74e0fa941341ebe980fbdcfa8b40244cb448eb56</td>\n",
711
+ " <td>1727568000</td>\n",
712
+ " <td>quickstart</td>\n",
713
+ " <td>2024-09-29 02:00:00</td>\n",
714
+ " </tr>\n",
715
+ " <tr>\n",
716
+ " <th>4754</th>\n",
717
+ " <td>NaN</td>\n",
718
+ " <td>0x2f44e179b5cc964e504046bac31d6945a0652af2</td>\n",
719
+ " <td>1727568000</td>\n",
720
+ " <td>quickstart</td>\n",
721
+ " <td>2024-09-29 02:00:00</td>\n",
722
+ " </tr>\n",
723
+ " </tbody>\n",
724
+ "</table>\n",
725
+ "</div>"
726
+ ],
727
+ "text/plain": [
728
+ " currentAnswer id \\\n",
729
+ "15736 NaN 0x92ed80e541f642b564f992245abe640282dd273c \n",
730
+ "6272 NaN 0x4002481fe7bc39c1baa4b5988c038da13ed05832 \n",
731
+ "24383 NaN 0xf820d06509027c309b00cd386055982d9bea0c10 \n",
732
+ "12418 NaN 0x74e0fa941341ebe980fbdcfa8b40244cb448eb56 \n",
733
+ "4754 NaN 0x2f44e179b5cc964e504046bac31d6945a0652af2 \n",
734
+ "\n",
735
+ " openingTimestamp market_creator opening_datetime \n",
736
+ "15736 1727568000 quickstart 2024-09-29 02:00:00 \n",
737
+ "6272 1727568000 quickstart 2024-09-29 02:00:00 \n",
738
+ "24383 1727568000 quickstart 2024-09-29 02:00:00 \n",
739
+ "12418 1727568000 quickstart 2024-09-29 02:00:00 \n",
740
+ "4754 1727568000 quickstart 2024-09-29 02:00:00 "
741
+ ]
742
+ },
743
+ "execution_count": 52,
744
+ "metadata": {},
745
+ "output_type": "execute_result"
746
+ }
747
+ ],
748
+ "source": [
749
+ "trade_markets.tail()"
750
+ ]
751
+ },
752
+ {
753
+ "cell_type": "code",
754
+ "execution_count": 53,
755
+ "metadata": {},
756
+ "outputs": [
757
+ {
758
+ "data": {
759
+ "text/plain": [
760
+ "719"
761
+ ]
762
+ },
763
+ "execution_count": 53,
764
+ "metadata": {},
765
+ "output_type": "execute_result"
766
+ }
767
+ ],
768
+ "source": [
769
+ "len(trade_markets)"
770
+ ]
771
+ },
772
+ {
773
+ "cell_type": "code",
774
+ "execution_count": 54,
775
+ "metadata": {},
776
+ "outputs": [],
777
+ "source": [
778
+ "trade_markets.dropna(inplace=True)"
779
+ ]
780
+ },
781
+ {
782
+ "cell_type": "code",
783
+ "execution_count": 55,
784
+ "metadata": {},
785
+ "outputs": [
786
+ {
787
+ "data": {
788
+ "text/plain": [
789
+ "648"
790
+ ]
791
+ },
792
+ "execution_count": 55,
793
+ "metadata": {},
794
+ "output_type": "execute_result"
795
+ }
796
+ ],
797
+ "source": [
798
+ "len(trade_markets)"
799
+ ]
800
+ },
801
+ {
802
+ "cell_type": "code",
803
+ "execution_count": 56,
804
+ "metadata": {},
805
+ "outputs": [
806
+ {
807
+ "data": {
808
+ "text/html": [
809
+ "<div>\n",
810
+ "<style scoped>\n",
811
+ " .dataframe tbody tr th:only-of-type {\n",
812
+ " vertical-align: middle;\n",
813
+ " }\n",
814
+ "\n",
815
+ " .dataframe tbody tr th {\n",
816
+ " vertical-align: top;\n",
817
+ " }\n",
818
+ "\n",
819
+ " .dataframe thead th {\n",
820
+ " text-align: right;\n",
821
+ " }\n",
822
+ "</style>\n",
823
+ "<table border=\"1\" class=\"dataframe\">\n",
824
+ " <thead>\n",
825
+ " <tr style=\"text-align: right;\">\n",
826
+ " <th></th>\n",
827
+ " <th>currentAnswer</th>\n",
828
+ " <th>id</th>\n",
829
+ " <th>openingTimestamp</th>\n",
830
+ " <th>market_creator</th>\n",
831
+ " <th>opening_datetime</th>\n",
832
+ " </tr>\n",
833
+ " </thead>\n",
834
+ " <tbody>\n",
835
+ " <tr>\n",
836
+ " <th>20792</th>\n",
837
+ " <td>1.0</td>\n",
838
+ " <td>0xcc9c26a86dd55aa04dcb0066c9b8fca2983f407d</td>\n",
839
+ " <td>1727136000</td>\n",
840
+ " <td>quickstart</td>\n",
841
+ " <td>2024-09-24 02:00:00</td>\n",
842
+ " </tr>\n",
843
+ " <tr>\n",
844
+ " <th>21130</th>\n",
845
+ " <td>1.0</td>\n",
846
+ " <td>0xd1bd18d7601d106639f922f1b5d2eda025c26be7</td>\n",
847
+ " <td>1727136000</td>\n",
848
+ " <td>quickstart</td>\n",
849
+ " <td>2024-09-24 02:00:00</td>\n",
850
+ " </tr>\n",
851
+ " <tr>\n",
852
+ " <th>7494</th>\n",
853
+ " <td>0.0</td>\n",
854
+ " <td>0x4eba0ec2464ec7c746e8872078165c8ad52d346f</td>\n",
855
+ " <td>1727136000</td>\n",
856
+ " <td>quickstart</td>\n",
857
+ " <td>2024-09-24 02:00:00</td>\n",
858
+ " </tr>\n",
859
+ " <tr>\n",
860
+ " <th>9911</th>\n",
861
+ " <td>1.0</td>\n",
862
+ " <td>0x61065f131e2ec851c40765bb0b078a318a36f53e</td>\n",
863
+ " <td>1727136000</td>\n",
864
+ " <td>quickstart</td>\n",
865
+ " <td>2024-09-24 02:00:00</td>\n",
866
+ " </tr>\n",
867
+ " <tr>\n",
868
+ " <th>26182</th>\n",
869
+ " <td>0.0</td>\n",
870
+ " <td>0x7e191324f0efb8aa20b8c702d95e812e55b4179c</td>\n",
871
+ " <td>1727136000</td>\n",
872
+ " <td>pearl</td>\n",
873
+ " <td>2024-09-24 02:00:00</td>\n",
874
+ " </tr>\n",
875
+ " </tbody>\n",
876
+ "</table>\n",
877
+ "</div>"
878
+ ],
879
+ "text/plain": [
880
+ " currentAnswer id \\\n",
881
+ "20792 1.0 0xcc9c26a86dd55aa04dcb0066c9b8fca2983f407d \n",
882
+ "21130 1.0 0xd1bd18d7601d106639f922f1b5d2eda025c26be7 \n",
883
+ "7494 0.0 0x4eba0ec2464ec7c746e8872078165c8ad52d346f \n",
884
+ "9911 1.0 0x61065f131e2ec851c40765bb0b078a318a36f53e \n",
885
+ "26182 0.0 0x7e191324f0efb8aa20b8c702d95e812e55b4179c \n",
886
+ "\n",
887
+ " openingTimestamp market_creator opening_datetime \n",
888
+ "20792 1727136000 quickstart 2024-09-24 02:00:00 \n",
889
+ "21130 1727136000 quickstart 2024-09-24 02:00:00 \n",
890
+ "7494 1727136000 quickstart 2024-09-24 02:00:00 \n",
891
+ "9911 1727136000 quickstart 2024-09-24 02:00:00 \n",
892
+ "26182 1727136000 pearl 2024-09-24 02:00:00 "
893
+ ]
894
+ },
895
+ "execution_count": 56,
896
+ "metadata": {},
897
+ "output_type": "execute_result"
898
+ }
899
+ ],
900
+ "source": [
901
+ "trade_markets.tail()"
902
+ ]
903
+ },
904
+ {
905
+ "cell_type": "code",
906
+ "execution_count": 51,
907
+ "metadata": {},
908
+ "outputs": [],
909
+ "source": [
910
+ "import math\n",
911
+ "def market_KL_divergence(market_row: pd.DataFrame) -> float:\n",
912
+ " \"\"\"Function to compute the divergence based on the formula\n",
913
+ " Formula in https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence\"\"\"\n",
914
+ " current_answer = market_row.currentAnswer # \"yes\", \"no\"\n",
915
+ " target_prob = 1 # = 100%\n",
916
+ " if current_answer == \"yes\":\n",
917
+ " candidate_prob = market_row.first_outcome_prob\n",
918
+ " else: # \"no\"\n",
919
+ " candidate_prob = market_row.second_outcome_prob\n",
920
+ "\n",
921
+ " # we have only one sample, the final probability based on tokens\n",
922
+ " kl_divergence = candidate_prob * round(math.log(candidate_prob / target_prob), 4)\n",
923
+ " return kl_divergence"
924
+ ]
925
+ },
926
+ {
927
+ "cell_type": "code",
928
+ "execution_count": null,
929
+ "metadata": {},
930
+ "outputs": [],
931
+ "source": []
932
+ },
933
+ {
934
+ "cell_type": "code",
935
+ "execution_count": null,
936
+ "metadata": {},
937
+ "outputs": [],
938
+ "source": []
939
+ },
940
+ {
941
+ "cell_type": "code",
942
+ "execution_count": 36,
943
+ "metadata": {},
944
+ "outputs": [
945
+ {
946
+ "data": {
947
+ "text/plain": [
948
+ "719"
949
+ ]
950
+ },
951
+ "execution_count": 36,
952
+ "metadata": {},
953
+ "output_type": "execute_result"
954
+ }
955
+ ],
956
+ "source": [
957
+ "len(trade_markets)"
958
+ ]
959
+ },
960
+ {
961
+ "cell_type": "code",
962
+ "execution_count": 37,
963
+ "metadata": {},
964
+ "outputs": [
965
+ {
966
+ "data": {
967
+ "text/plain": [
968
+ "719"
969
+ ]
970
+ },
971
+ "execution_count": 37,
972
+ "metadata": {},
973
+ "output_type": "execute_result"
974
+ }
975
+ ],
976
+ "source": [
977
+ "len(list(trade_markets.id.unique()))"
978
+ ]
979
+ },
980
+ {
981
+ "cell_type": "code",
982
+ "execution_count": 38,
983
+ "metadata": {},
984
+ "outputs": [
985
+ {
986
+ "data": {
987
+ "text/plain": [
988
+ "currentAnswer\n",
989
+ "0x0000000000000000000000000000000000000000000000000000000000000001 407\n",
990
+ "0x0000000000000000000000000000000000000000000000000000000000000000 241\n",
991
+ "Name: count, dtype: int64"
992
+ ]
993
+ },
994
+ "execution_count": 38,
995
+ "metadata": {},
996
+ "output_type": "execute_result"
997
+ }
998
+ ],
999
+ "source": [
1000
+ "trade_markets.currentAnswer.value_counts()"
1001
+ ]
1002
+ },
1003
+ {
1004
+ "cell_type": "code",
1005
+ "execution_count": 15,
1006
+ "metadata": {},
1007
+ "outputs": [],
1008
+ "source": [
1009
+ "INVALID_ANSWER_HEX = (\n",
1010
+ " \"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\"\n",
1011
+ ")"
1012
+ ]
1013
+ },
1014
+ {
1015
+ "cell_type": "code",
1016
+ "execution_count": 40,
1017
+ "metadata": {},
1018
+ "outputs": [],
1019
+ "source": [
1020
+ "import numpy as np\n",
1021
+ "def convert_hex_to_int(x):\n",
1022
+ " \"\"\"Convert hex to int\"\"\"\n",
1023
+ " if isinstance(x, float):\n",
1024
+ " return np.nan\n",
1025
+ " if isinstance(x, str):\n",
1026
+ " if x == INVALID_ANSWER_HEX:\n",
1027
+ " return -1\n",
1028
+ " answer = int(x, 16)\n",
1029
+ " return answer\n",
1030
+ " "
1031
+ ]
1032
+ },
1033
+ {
1034
+ "cell_type": "code",
1035
+ "execution_count": null,
1036
+ "metadata": {},
1037
+ "outputs": [],
1038
+ "source": [
1039
+ "market_ids = list(markets.id.unique())\n",
1040
+ "for i in range(len(trade_markets)):\n",
1041
+ " market = trade_markets.iloc[i]\n",
1042
+ " if market.id in market_ids:\n",
1043
+ " current_answer = convert_hex_to_int(market.currentAnswer)\n",
1044
+ " market_answer = markets.loc[markets[\"id\"]==market.id].currentAnswer.values[0]\n",
1045
+ " print(f\"current answer = {current_answer} and market answer {market_answer}\")\n",
1046
+ " trade_markets.at[i, \"currentAnswer\"] = market_answer"
1047
+ ]
1048
+ },
1049
+ {
1050
+ "cell_type": "code",
1051
+ "execution_count": 17,
1052
+ "metadata": {},
1053
+ "outputs": [],
1054
+ "source": [
1055
+ "markets[\"currentAnswer\"] = markets[\"currentAnswer\"].apply(lambda x: convert_hex_to_int(x))"
1056
+ ]
1057
+ },
1058
+ {
1059
+ "cell_type": "code",
1060
+ "execution_count": 18,
1061
+ "metadata": {},
1062
+ "outputs": [
1063
+ {
1064
+ "data": {
1065
+ "text/plain": [
1066
+ "currentAnswer\n",
1067
+ " 1.0 407\n",
1068
+ " 0.0 241\n",
1069
+ "-1.0 84\n",
1070
+ "Name: count, dtype: int64"
1071
+ ]
1072
+ },
1073
+ "execution_count": 18,
1074
+ "metadata": {},
1075
+ "output_type": "execute_result"
1076
+ }
1077
+ ],
1078
+ "source": [
1079
+ "markets.currentAnswer.value_counts()"
1080
+ ]
1081
+ },
1082
+ {
1083
+ "cell_type": "code",
1084
+ "execution_count": 70,
1085
+ "metadata": {},
1086
+ "outputs": [
1087
+ {
1088
+ "data": {
1089
+ "text/plain": [
1090
+ "0.0769610411361284"
1091
+ ]
1092
+ },
1093
+ "execution_count": 70,
1094
+ "metadata": {},
1095
+ "output_type": "execute_result"
1096
+ }
1097
+ ],
1098
+ "source": [
1099
+ "import math\n",
1100
+ "\n",
1101
+ "candidate_prob = 9/25\n",
1102
+ "target_prob = 1/3\n",
1103
+ "math.log(candidate_prob/target_prob)"
1104
+ ]
1105
+ },
1106
+ {
1107
+ "cell_type": "code",
1108
+ "execution_count": 72,
1109
+ "metadata": {},
1110
+ "outputs": [
1111
+ {
1112
+ "name": "stdout",
1113
+ "output_type": "stream",
1114
+ "text": [
1115
+ "KL divergence: 6.296890976997244\n"
1116
+ ]
1117
+ }
1118
+ ],
1119
+ "source": [
1120
+ "import numpy as np\n",
1121
+ "\n",
1122
+ "def kl_divergence(p, q):\n",
1123
+ " \"\"\"\n",
1124
+ " Compute KL divergence for a single sample with two probabilities.\n",
1125
+ " \n",
1126
+ " :param p: First probability (true distribution)\n",
1127
+ " :param q: Second probability (approximating distribution)\n",
1128
+ " :return: KL divergence value\n",
1129
+ " \"\"\"\n",
1130
+ " # Ensure probabilities sum to 1\n",
1131
+ " p = np.array([p, 1-p])\n",
1132
+ " q = np.array([q, 1-q])\n",
1133
+ " \n",
1134
+ " # Avoid division by zero\n",
1135
+ " epsilon = 1e-10\n",
1136
+ " q = np.clip(q, epsilon, 1-epsilon)\n",
1137
+ " \n",
1138
+ " # Compute KL divergence\n",
1139
+ " kl_div = np.sum(p * np.log(p / q))\n",
1140
+ " \n",
1141
+ " return kl_div\n",
1142
+ "\n",
1143
+ "# Example usage\n",
1144
+ "p = 0.7 # probability from true distribution\n",
1145
+ "q = 1.0 # probability from approximating distribution\n",
1146
+ "\n",
1147
+ "result = kl_divergence(p, q)\n",
1148
+ "print(f\"KL divergence: {result}\")"
1149
+ ]
1150
+ },
1151
+ {
1152
+ "cell_type": "code",
1153
+ "execution_count": 74,
1154
+ "metadata": {},
1155
+ "outputs": [
1156
+ {
1157
+ "name": "stdout",
1158
+ "output_type": "stream",
1159
+ "text": [
1160
+ "KL divergence: inf\n"
1161
+ ]
1162
+ }
1163
+ ],
1164
+ "source": [
1165
+ "from scipy.special import kl_div\n",
1166
+ "\n",
1167
+ "# For multiple probabilities\n",
1168
+ "p = np.array([0.3, 0.7])\n",
1169
+ "q = np.array([0.0, 1.0])\n",
1170
+ "\n",
1171
+ "kl = np.sum(kl_div(p, q))\n",
1172
+ "print(f\"KL divergence: {kl}\")"
1173
+ ]
1174
+ },
1175
+ {
1176
+ "cell_type": "markdown",
1177
+ "metadata": {},
1178
+ "source": [
1179
+ "This library is not useful if we have extreme values"
1180
+ ]
1181
+ },
1182
+ {
1183
+ "cell_type": "code",
1184
+ "execution_count": 75,
1185
+ "metadata": {},
1186
+ "outputs": [
1187
+ {
1188
+ "data": {
1189
+ "text/html": [
1190
+ "<div>\n",
1191
+ "<style scoped>\n",
1192
+ " .dataframe tbody tr th:only-of-type {\n",
1193
+ " vertical-align: middle;\n",
1194
+ " }\n",
1195
+ "\n",
1196
+ " .dataframe tbody tr th {\n",
1197
+ " vertical-align: top;\n",
1198
+ " }\n",
1199
+ "\n",
1200
+ " .dataframe thead th {\n",
1201
+ " text-align: right;\n",
1202
+ " }\n",
1203
+ "</style>\n",
1204
+ "<table border=\"1\" class=\"dataframe\">\n",
1205
+ " <thead>\n",
1206
+ " <tr style=\"text-align: right;\">\n",
1207
+ " <th></th>\n",
1208
+ " <th>currentAnswer</th>\n",
1209
+ " <th>id</th>\n",
1210
+ " <th>openingTimestamp</th>\n",
1211
+ " <th>market_creator</th>\n",
1212
+ " <th>opening_datetime</th>\n",
1213
+ " <th>first_outcome_prob</th>\n",
1214
+ " <th>second_outcome_prob</th>\n",
1215
+ " <th>kl_divergence</th>\n",
1216
+ " </tr>\n",
1217
+ " </thead>\n",
1218
+ " <tbody>\n",
1219
+ " <tr>\n",
1220
+ " <th>0</th>\n",
1221
+ " <td>yes</td>\n",
1222
+ " <td>0x67490193504b49a247d6a3ba7d441e9894d9615f</td>\n",
1223
+ " <td>1722470400</td>\n",
1224
+ " <td>quickstart</td>\n",
1225
+ " <td>2024-08-01 02:00:00</td>\n",
1226
+ " <td>0.8145</td>\n",
1227
+ " <td>0.1855</td>\n",
1228
+ " <td>3.791664</td>\n",
1229
+ " </tr>\n",
1230
+ " <tr>\n",
1231
+ " <th>1</th>\n",
1232
+ " <td>no</td>\n",
1233
+ " <td>0x17f2c97bf52a79671878201bf2995a3b6daba041</td>\n",
1234
+ " <td>1722470400</td>\n",
1235
+ " <td>quickstart</td>\n",
1236
+ " <td>2024-08-01 02:00:00</td>\n",
1237
+ " <td>0.1975</td>\n",
1238
+ " <td>0.8025</td>\n",
1239
+ " <td>4.050688</td>\n",
1240
+ " </tr>\n",
1241
+ " <tr>\n",
1242
+ " <th>2</th>\n",
1243
+ " <td>no</td>\n",
1244
+ " <td>0xbca6aa704a02a5c5a766ff829dacc81aee5547cf</td>\n",
1245
+ " <td>1722470400</td>\n",
1246
+ " <td>quickstart</td>\n",
1247
+ " <td>2024-08-01 02:00:00</td>\n",
1248
+ " <td>0.6969</td>\n",
1249
+ " <td>0.3031</td>\n",
1250
+ " <td>15.433247</td>\n",
1251
+ " </tr>\n",
1252
+ " <tr>\n",
1253
+ " <th>3</th>\n",
1254
+ " <td>no</td>\n",
1255
+ " <td>0x221c71bab604691b0b8805c1c433fc8e22123a67</td>\n",
1256
+ " <td>1722470400</td>\n",
1257
+ " <td>pearl</td>\n",
1258
+ " <td>2024-08-01 02:00:00</td>\n",
1259
+ " <td>0.4757</td>\n",
1260
+ " <td>0.5243</td>\n",
1261
+ " <td>10.261432</td>\n",
1262
+ " </tr>\n",
1263
+ " <tr>\n",
1264
+ " <th>4</th>\n",
1265
+ " <td>no</td>\n",
1266
+ " <td>0xe4d078b9be12319c0063f58dc10f19604a5df163</td>\n",
1267
+ " <td>1722470400</td>\n",
1268
+ " <td>quickstart</td>\n",
1269
+ " <td>2024-08-01 02:00:00</td>\n",
1270
+ " <td>0.3473</td>\n",
1271
+ " <td>0.6527</td>\n",
1272
+ " <td>7.351119</td>\n",
1273
+ " </tr>\n",
1274
+ " </tbody>\n",
1275
+ "</table>\n",
1276
+ "</div>"
1277
+ ],
1278
+ "text/plain": [
1279
+ " currentAnswer id openingTimestamp \\\n",
1280
+ "0 yes 0x67490193504b49a247d6a3ba7d441e9894d9615f 1722470400 \n",
1281
+ "1 no 0x17f2c97bf52a79671878201bf2995a3b6daba041 1722470400 \n",
1282
+ "2 no 0xbca6aa704a02a5c5a766ff829dacc81aee5547cf 1722470400 \n",
1283
+ "3 no 0x221c71bab604691b0b8805c1c433fc8e22123a67 1722470400 \n",
1284
+ "4 no 0xe4d078b9be12319c0063f58dc10f19604a5df163 1722470400 \n",
1285
+ "\n",
1286
+ " market_creator opening_datetime first_outcome_prob second_outcome_prob \\\n",
1287
+ "0 quickstart 2024-08-01 02:00:00 0.8145 0.1855 \n",
1288
+ "1 quickstart 2024-08-01 02:00:00 0.1975 0.8025 \n",
1289
+ "2 quickstart 2024-08-01 02:00:00 0.6969 0.3031 \n",
1290
+ "3 pearl 2024-08-01 02:00:00 0.4757 0.5243 \n",
1291
+ "4 quickstart 2024-08-01 02:00:00 0.3473 0.6527 \n",
1292
+ "\n",
1293
+ " kl_divergence \n",
1294
+ "0 3.791664 \n",
1295
+ "1 4.050688 \n",
1296
+ "2 15.433247 \n",
1297
+ "3 10.261432 \n",
1298
+ "4 7.351119 "
1299
+ ]
1300
+ },
1301
+ "execution_count": 75,
1302
+ "metadata": {},
1303
+ "output_type": "execute_result"
1304
+ }
1305
+ ],
1306
+ "source": [
1307
+ "markets_div = pd.read_parquet(\"../data/closed_markets_div.parquet\")\n",
1308
+ "markets_div.head()"
1309
+ ]
1310
+ },
1311
+ {
1312
+ "cell_type": "code",
1313
+ "execution_count": 76,
1314
+ "metadata": {},
1315
+ "outputs": [
1316
+ {
1317
+ "data": {
1318
+ "text/html": [
1319
+ "<div>\n",
1320
+ "<style scoped>\n",
1321
+ " .dataframe tbody tr th:only-of-type {\n",
1322
+ " vertical-align: middle;\n",
1323
+ " }\n",
1324
+ "\n",
1325
+ " .dataframe tbody tr th {\n",
1326
+ " vertical-align: top;\n",
1327
+ " }\n",
1328
+ "\n",
1329
+ " .dataframe thead th {\n",
1330
+ " text-align: right;\n",
1331
+ " }\n",
1332
+ "</style>\n",
1333
+ "<table border=\"1\" class=\"dataframe\">\n",
1334
+ " <thead>\n",
1335
+ " <tr style=\"text-align: right;\">\n",
1336
+ " <th></th>\n",
1337
+ " <th>currentAnswer</th>\n",
1338
+ " <th>id</th>\n",
1339
+ " <th>openingTimestamp</th>\n",
1340
+ " <th>market_creator</th>\n",
1341
+ " <th>opening_datetime</th>\n",
1342
+ " <th>first_outcome_prob</th>\n",
1343
+ " <th>second_outcome_prob</th>\n",
1344
+ " <th>kl_divergence</th>\n",
1345
+ " </tr>\n",
1346
+ " </thead>\n",
1347
+ " <tbody>\n",
1348
+ " <tr>\n",
1349
+ " <th>642</th>\n",
1350
+ " <td>yes</td>\n",
1351
+ " <td>0x4eba0ec2464ec7c746e8872078165c8ad52d346f</td>\n",
1352
+ " <td>1727136000</td>\n",
1353
+ " <td>quickstart</td>\n",
1354
+ " <td>2024-09-24 02:00:00</td>\n",
1355
+ " <td>0.5392</td>\n",
1356
+ " <td>0.4608</td>\n",
1357
+ " <td>9.920241</td>\n",
1358
+ " </tr>\n",
1359
+ " <tr>\n",
1360
+ " <th>643</th>\n",
1361
+ " <td>no</td>\n",
1362
+ " <td>0x3535b4cea3ea7b1862fbe1af5a458702cc1c0dad</td>\n",
1363
+ " <td>1727136000</td>\n",
1364
+ " <td>quickstart</td>\n",
1365
+ " <td>2024-09-24 02:00:00</td>\n",
1366
+ " <td>0.2812</td>\n",
1367
+ " <td>0.7188</td>\n",
1368
+ " <td>5.880786</td>\n",
1369
+ " </tr>\n",
1370
+ " <tr>\n",
1371
+ " <th>644</th>\n",
1372
+ " <td>yes</td>\n",
1373
+ " <td>0x7e191324f0efb8aa20b8c702d95e812e55b4179c</td>\n",
1374
+ " <td>1727136000</td>\n",
1375
+ " <td>pearl</td>\n",
1376
+ " <td>2024-09-24 02:00:00</td>\n",
1377
+ " <td>0.5000</td>\n",
1378
+ " <td>0.5000</td>\n",
1379
+ " <td>10.819778</td>\n",
1380
+ " </tr>\n",
1381
+ " <tr>\n",
1382
+ " <th>645</th>\n",
1383
+ " <td>no</td>\n",
1384
+ " <td>0xd1bd18d7601d106639f922f1b5d2eda025c26be7</td>\n",
1385
+ " <td>1727136000</td>\n",
1386
+ " <td>quickstart</td>\n",
1387
+ " <td>2024-09-24 02:00:00</td>\n",
1388
+ " <td>0.5000</td>\n",
1389
+ " <td>0.5000</td>\n",
1390
+ " <td>10.819778</td>\n",
1391
+ " </tr>\n",
1392
+ " <tr>\n",
1393
+ " <th>646</th>\n",
1394
+ " <td>no</td>\n",
1395
+ " <td>0x61065f131e2ec851c40765bb0b078a318a36f53e</td>\n",
1396
+ " <td>1727136000</td>\n",
1397
+ " <td>quickstart</td>\n",
1398
+ " <td>2024-09-24 02:00:00</td>\n",
1399
+ " <td>0.5000</td>\n",
1400
+ " <td>0.5000</td>\n",
1401
+ " <td>10.819778</td>\n",
1402
+ " </tr>\n",
1403
+ " </tbody>\n",
1404
+ "</table>\n",
1405
+ "</div>"
1406
+ ],
1407
+ "text/plain": [
1408
+ " currentAnswer id \\\n",
1409
+ "642 yes 0x4eba0ec2464ec7c746e8872078165c8ad52d346f \n",
1410
+ "643 no 0x3535b4cea3ea7b1862fbe1af5a458702cc1c0dad \n",
1411
+ "644 yes 0x7e191324f0efb8aa20b8c702d95e812e55b4179c \n",
1412
+ "645 no 0xd1bd18d7601d106639f922f1b5d2eda025c26be7 \n",
1413
+ "646 no 0x61065f131e2ec851c40765bb0b078a318a36f53e \n",
1414
+ "\n",
1415
+ " openingTimestamp market_creator opening_datetime first_outcome_prob \\\n",
1416
+ "642 1727136000 quickstart 2024-09-24 02:00:00 0.5392 \n",
1417
+ "643 1727136000 quickstart 2024-09-24 02:00:00 0.2812 \n",
1418
+ "644 1727136000 pearl 2024-09-24 02:00:00 0.5000 \n",
1419
+ "645 1727136000 quickstart 2024-09-24 02:00:00 0.5000 \n",
1420
+ "646 1727136000 quickstart 2024-09-24 02:00:00 0.5000 \n",
1421
+ "\n",
1422
+ " second_outcome_prob kl_divergence \n",
1423
+ "642 0.4608 9.920241 \n",
1424
+ "643 0.7188 5.880786 \n",
1425
+ "644 0.5000 10.819778 \n",
1426
+ "645 0.5000 10.819778 \n",
1427
+ "646 0.5000 10.819778 "
1428
+ ]
1429
+ },
1430
+ "execution_count": 76,
1431
+ "metadata": {},
1432
+ "output_type": "execute_result"
1433
+ }
1434
+ ],
1435
+ "source": [
1436
+ "markets_div.tail()"
1437
+ ]
1438
+ },
1439
+ {
1440
+ "cell_type": "code",
1441
+ "execution_count": 77,
1442
+ "metadata": {},
1443
+ "outputs": [
1444
+ {
1445
+ "data": {
1446
+ "text/plain": [
1447
+ "647"
1448
+ ]
1449
+ },
1450
+ "execution_count": 77,
1451
+ "metadata": {},
1452
+ "output_type": "execute_result"
1453
+ }
1454
+ ],
1455
+ "source": [
1456
+ "len(markets_div)"
1457
+ ]
1458
+ }
1459
+ ],
1460
+ "metadata": {
1461
+ "kernelspec": {
1462
+ "display_name": "hf_dashboards",
1463
+ "language": "python",
1464
+ "name": "python3"
1465
+ },
1466
+ "language_info": {
1467
+ "codemirror_mode": {
1468
+ "name": "ipython",
1469
+ "version": 3
1470
+ },
1471
+ "file_extension": ".py",
1472
+ "mimetype": "text/x-python",
1473
+ "name": "python",
1474
+ "nbconvert_exporter": "python",
1475
+ "pygments_lexer": "ipython3",
1476
+ "version": "3.12.2"
1477
+ }
1478
+ },
1479
+ "nbformat": 4,
1480
+ "nbformat_minor": 2
1481
+ }
scripts/closed_markets_divergence.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import os
3
+ import math
4
+ import pandas as pd
5
+ import numpy as np
6
+ from typing import Any, Union
7
+ from string import Template
8
+ import requests
9
+ import pickle
10
+ from concurrent.futures import ThreadPoolExecutor, as_completed
11
+ from tqdm import tqdm
12
+ import time
13
+ from datetime import datetime
14
+
15
+ NUM_WORKERS = 10
16
+ IPFS_POLL_INTERVAL = 0.07
17
+ INVALID_ANSWER_HEX = (
18
+ "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
19
+ )
20
+ INVALID_ANSWER = -1
21
+ SCRIPTS_DIR = Path(__file__).parent
22
+ ROOT_DIR = SCRIPTS_DIR.parent
23
+ DATA_DIR = ROOT_DIR / "data"
24
+ SUBGRAPH_API_KEY = os.environ.get("SUBGRAPH_API_KEY", None)
25
+ OMEN_SUBGRAPH_URL = Template(
26
+ """https://gateway-arbitrum.network.thegraph.com/api/${subgraph_api_key}/subgraphs/id/9fUVQpFwzpdWS9bq5WkAnmKbNNcoBwatMR4yZq81pbbz"""
27
+ )
28
+ get_token_amounts_query = Template(
29
+ """
30
+ {
31
+
32
+ fpmmLiquidities(
33
+ where: {
34
+ fpmm_: {
35
+ creator: "${fpmm_creator}",
36
+ id: "${fpmm_id}",
37
+ },
38
+ id_gt: ""
39
+ }
40
+ orderBy: creationTimestamp
41
+ orderDirection: asc
42
+ )
43
+ {
44
+ id
45
+ outcomeTokenAmounts
46
+ creationTimestamp
47
+ additionalLiquidityParameter
48
+ }
49
+ }
50
+ """
51
+ )
52
+ CREATOR = "0x89c5cc945dd550BcFfb72Fe42BfF002429F46Fec"
53
+ PEARL_CREATOR = "0xFfc8029154ECD55ABED15BD428bA596E7D23f557"
54
+ market_creators_map = {"quickstart": CREATOR, "pearl": PEARL_CREATOR}
55
+ headers = {
56
+ "Accept": "application/json, multipart/mixed",
57
+ "Content-Type": "application/json",
58
+ }
59
+
60
+
61
+ def _to_content(q: str) -> dict[str, Any]:
62
+ """Convert the given query string to payload content, i.e., add it under a `queries` key and convert it to bytes."""
63
+ finalized_query = {
64
+ "query": q,
65
+ "variables": None,
66
+ "extensions": {"headers": None},
67
+ }
68
+ return finalized_query
69
+
70
+
71
+ def collect_liquidity_info(
72
+ index: int, fpmm_id: str, market_creator: str
73
+ ) -> dict[str, Any]:
74
+ omen_subgraph = OMEN_SUBGRAPH_URL.substitute(subgraph_api_key=SUBGRAPH_API_KEY)
75
+ market_creator_id = market_creators_map[market_creator]
76
+ query = get_token_amounts_query.substitute(
77
+ fpmm_creator=market_creator_id.lower(),
78
+ fpmm_id=fpmm_id,
79
+ )
80
+ content_json = _to_content(query)
81
+ # print(f"Executing liquidity query {query}")
82
+ res = requests.post(omen_subgraph, headers=headers, json=content_json)
83
+ result_json = res.json()
84
+ tokens_info = result_json.get("data", {}).get("fpmmLiquidities", [])
85
+ if not tokens_info:
86
+ return None
87
+
88
+ # the second item is the final information of the market
89
+ first_info = tokens_info[1]
90
+ token_amounts = [int(x) for x in first_info["outcomeTokenAmounts"]]
91
+ time.sleep(IPFS_POLL_INTERVAL)
92
+ return {fpmm_id: token_amounts}
93
+
94
+
95
+ def convert_hex_to_int(x: Union[str, float]) -> Union[int, float]:
96
+ """Convert hex to int"""
97
+ if isinstance(x, float):
98
+ return np.nan
99
+ if isinstance(x, str):
100
+ if x == INVALID_ANSWER_HEX:
101
+ return "invalid"
102
+ return "yes" if int(x, 16) == 0 else "no"
103
+
104
+
105
+ def get_closed_markets():
106
+ print("Reading parquet file with closed markets data from trades")
107
+ try:
108
+ markets = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
109
+ except Exception:
110
+ print("Error reading the parquet file")
111
+
112
+ columns_of_interest = [
113
+ "fpmm.currentAnswer",
114
+ "fpmm.id",
115
+ "fpmm.openingTimestamp",
116
+ "market_creator",
117
+ ]
118
+ markets = markets[columns_of_interest]
119
+ markets.rename(
120
+ columns={
121
+ "fpmm.currentAnswer": "currentAnswer",
122
+ "fpmm.openingTimestamp": "openingTimestamp",
123
+ "fpmm.id": "id",
124
+ },
125
+ inplace=True,
126
+ )
127
+ markets = markets.drop_duplicates(subset=["id"], keep="last")
128
+ # remove invalid answers
129
+ markets = markets.loc[markets["currentAnswer"] != INVALID_ANSWER_HEX]
130
+ markets["currentAnswer"] = markets["currentAnswer"].apply(
131
+ lambda x: convert_hex_to_int(x)
132
+ )
133
+ markets.dropna(inplace=True)
134
+ markets["opening_datetime"] = markets["openingTimestamp"].apply(
135
+ lambda x: datetime.fromtimestamp(int(x))
136
+ )
137
+ markets = markets.sort_values(by="opening_datetime", ascending=True)
138
+ return markets
139
+
140
+
141
+ def kl_divergence(p, q):
142
+ """
143
+ Compute KL divergence for a single sample with two probabilities.
144
+
145
+ :param p: First probability (true distribution)
146
+ :param q: Second probability (approximating distribution)
147
+ :return: KL divergence value
148
+ """
149
+ # Ensure probabilities sum to 1
150
+ p = np.array([p, 1 - p])
151
+ q = np.array([q, 1 - q])
152
+
153
+ # Avoid division by zero
154
+ epsilon = 1e-10
155
+ q = np.clip(q, epsilon, 1 - epsilon)
156
+
157
+ # Compute KL divergence
158
+ kl_div = np.sum(p * np.log(p / q))
159
+
160
+ return kl_div
161
+
162
+
163
+ def market_KL_divergence(market_row: pd.DataFrame) -> float:
164
+ """Function to compute the divergence based on the formula
165
+ Formula in https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence"""
166
+ current_answer = market_row.currentAnswer # "yes", "no"
167
+ candidate_prob = market_row.first_outcome_prob
168
+ target_prob = 1.0 # for yes outcome
169
+ if current_answer == "no":
170
+ target_prob = 0.0 # = 0% for yes outcome
171
+
172
+ # we have only one sample, the final probability based on tokens
173
+ return kl_divergence(candidate_prob, target_prob)
174
+
175
+
176
+ def compute_tokens_prob(token_amounts: list) -> list:
177
+ first_token_amounts = token_amounts[0]
178
+ second_token_amounts = token_amounts[1]
179
+ total_tokens = first_token_amounts + second_token_amounts
180
+ first_token_prob = 1 - round((first_token_amounts / total_tokens), 4)
181
+ return [first_token_prob, 1 - first_token_prob]
182
+
183
+
184
+ def prepare_closed_markets_data():
185
+ closed_markets = get_closed_markets()
186
+ closed_markets["first_outcome_prob"] = -1.0
187
+ closed_markets["second_outcome_prob"] = -1.0
188
+ total_markets = len(closed_markets)
189
+ markets_no_info = []
190
+ no_info = 0
191
+ with ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
192
+ futures = []
193
+ for i in range(total_markets):
194
+ futures.append(
195
+ executor.submit(
196
+ collect_liquidity_info,
197
+ i,
198
+ closed_markets.iloc[i].id,
199
+ closed_markets.iloc[i].market_creator,
200
+ )
201
+ )
202
+ markets_with_info = 0
203
+ for future in tqdm(
204
+ as_completed(futures),
205
+ total=len(futures),
206
+ desc=f"Fetching Market liquidity info",
207
+ ):
208
+ token_amounts_dict = future.result()
209
+ if token_amounts_dict:
210
+ fpmm_id, token_amounts = token_amounts_dict.popitem()
211
+ if token_amounts:
212
+ tokens_prob = compute_tokens_prob(token_amounts)
213
+ closed_markets.loc[
214
+ closed_markets["id"] == fpmm_id, "first_outcome_prob"
215
+ ] = tokens_prob[0]
216
+ closed_markets.loc[
217
+ closed_markets["id"] == fpmm_id, "second_outcome_prob"
218
+ ] = tokens_prob[1]
219
+ markets_with_info += 1
220
+ else:
221
+ tqdm.write(f"Skipping market with no liquidity info")
222
+ markets_no_info.append(i)
223
+ else:
224
+ tqdm.write(f"Skipping market with no liquidity info")
225
+ no_info += 1
226
+
227
+ print(f"Markets with info = {markets_with_info}")
228
+ # Removing markets with no liq info
229
+ closed_markets = closed_markets.loc[closed_markets["first_outcome_prob"] != -1.0]
230
+ print(
231
+ f"Finished computing all markets liquidity info. Final length = {len(closed_markets)}"
232
+ )
233
+ if len(markets_no_info) > 0:
234
+ print(
235
+ f"There were {len(markets_no_info)} markets with no liquidity info. Printing some index of the dataframe"
236
+ )
237
+ with open("no_liq_info.pickle", "wb") as file:
238
+ pickle.dump(markets_no_info, file)
239
+ print(markets_no_info[:1])
240
+ print(closed_markets.head())
241
+ # Add the Kullback–Leibler divergence values
242
+ print("Computing Kullback–Leibler (KL) divergence")
243
+ closed_markets["kl_divergence"] = closed_markets.apply(
244
+ lambda x: market_KL_divergence(x), axis=1
245
+ )
246
+ closed_markets.to_parquet(DATA_DIR / "closed_markets_div.parquet", index=False)
247
+ print("Finished preparing final dataset for visualization")
248
+ print(closed_markets.head())
249
+
250
+
251
+ if __name__ == "__main__":
252
+ prepare_closed_markets_data()
scripts/metrics.py CHANGED
@@ -69,9 +69,9 @@ def compute_trader_metrics_by_market_creator(
69
  if len(filtered_traders_data) == 0:
70
  tqdm.write(f"No data. Skipping market creator {market_creator}")
71
  return {} # No Data
72
- tqdm.write(
73
- f"Volume of data for trader {trader_address} and market creator {market_creator} = {len(filtered_traders_data)}"
74
- )
75
  metrics = compute_metrics(trader_address, filtered_traders_data)
76
  return metrics
77
 
 
69
  if len(filtered_traders_data) == 0:
70
  tqdm.write(f"No data. Skipping market creator {market_creator}")
71
  return {} # No Data
72
+ # tqdm.write(
73
+ # f"Volume of data for trader {trader_address} and market creator {market_creator} = {len(filtered_traders_data)}"
74
+ # )
75
  metrics = compute_metrics(trader_address, filtered_traders_data)
76
  return metrics
77
 
tabs/market_plots.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import matplotlib.pyplot as plt
4
+ import seaborn as sns
5
+ from typing import Tuple
6
+ import plotly.express as px
7
+
8
+
9
+ def plot_kl_div_per_market(closed_markets: pd.DataFrame) -> gr.Plot:
10
+
11
+ # adding the total
12
+ all_markets = closed_markets.copy(deep=True)
13
+ all_markets["market_creator"] = "all"
14
+
15
+ # merging both dataframes
16
+ final_markets = pd.concat([closed_markets, all_markets], ignore_index=True)
17
+ final_markets = final_markets.sort_values(by="opening_datetime", ascending=True)
18
+
19
+ fig = px.box(
20
+ final_markets,
21
+ x="month_year_week",
22
+ y="kl_divergence",
23
+ color="market_creator",
24
+ color_discrete_sequence=["purple", "goldenrod", "darkgreen"],
25
+ category_orders={"market_creator": ["pearl", "quickstart", "all"]},
26
+ )
27
+ fig.update_traces(boxmean=True)
28
+ fig.update_layout(
29
+ xaxis_title="Markets closing Week",
30
+ yaxis_title="Kullback–Leibler divergence",
31
+ legend=dict(yanchor="top", y=0.5),
32
+ )
33
+ fig.update_xaxes(tickformat="%b %d\n%Y")
34
+
35
+ return gr.Plot(
36
+ value=fig,
37
+ )
tabs/trader_plots.py CHANGED
@@ -12,7 +12,7 @@ trader_metric_choices = [
12
  default_trader_metric = "ROI"
13
 
14
 
15
- def get_trader_metrics_text() -> gr.Markdown:
16
  metric_text = """
17
  ## Description of the graph
18
  These metrics are computed weekly. The statistical measures are:
 
12
  default_trader_metric = "ROI"
13
 
14
 
15
+ def get_metrics_text() -> gr.Markdown:
16
  metric_text = """
17
  ## Description of the graph
18
  These metrics are computed weekly. The statistical measures are: