cyberosa commited on
Commit
ddd4c40
·
1 Parent(s): f842047

trades filter for live distribution

Browse files
app.py CHANGED
@@ -3,10 +3,11 @@ import gradio as gr
3
  import pandas as pd
4
  import duckdb
5
  import logging
6
- from typing import Tuple
7
  from tabs.tokens_votes_dist import (
8
  get_based_tokens_distribution,
9
  get_based_votes_distribution,
 
10
  )
11
  from tabs.dist_gap import (
12
  get_distribution_plot,
@@ -56,27 +57,6 @@ def prepare_data():
56
  return df
57
 
58
 
59
- def get_extreme_cases(live_fpmms: pd.DataFrame) -> Tuple:
60
- """Function to return the id of the best and worst case according to the dist gap metric"""
61
- # select markets with more than 1 sample
62
- samples_per_market = (
63
- live_fpmms[["id", "sample_timestamp"]].groupby("id").count().reset_index()
64
- )
65
- markets_with_multiple_samples = list(
66
- samples_per_market.loc[samples_per_market["sample_timestamp"] > 1, "id"].values
67
- )
68
- selected_markets = live_fpmms.loc[
69
- live_fpmms["id"].isin(markets_with_multiple_samples)
70
- ]
71
- selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
72
- return (
73
- selected_markets.iloc[-1].id,
74
- selected_markets.iloc[-1].dist_gap_perc,
75
- selected_markets.iloc[0].id,
76
- selected_markets.iloc[0].dist_gap_perc,
77
- )
78
-
79
-
80
  demo = gr.Blocks()
81
  markets_data = prepare_data()
82
  live_markets_data = markets_data.loc[markets_data["open"] == True]
 
3
  import pandas as pd
4
  import duckdb
5
  import logging
6
+
7
  from tabs.tokens_votes_dist import (
8
  get_based_tokens_distribution,
9
  get_based_votes_distribution,
10
+ get_extreme_cases,
11
  )
12
  from tabs.dist_gap import (
13
  get_distribution_plot,
 
57
  return df
58
 
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  demo = gr.Blocks()
61
  markets_data = prepare_data()
62
  live_markets_data = markets_data.loc[markets_data["open"] == True]
live_data/markets_live_data.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ae654e0654ae908aff28be1cac0ebdff765f65e6c442dc94df1a2c801319c2a
3
- size 39132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac46b671cb0a3663931568dbef2b5221ff3d7d3cd0c4648258032c871335bf3a
3
+ size 38288
live_data/markets_live_data_sample.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e0451385c1de2891fe1ab7fff8b3e5a5f797f351b2ca799266e65adec57a9b9
3
- size 34357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac46b671cb0a3663931568dbef2b5221ff3d7d3cd0c4648258032c871335bf3a
3
+ size 38288
notebooks/analysis_of_markets_data.ipynb CHANGED
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "cell_type": "code",
17
- "execution_count": 3,
18
  "metadata": {},
19
  "outputs": [
20
  {
@@ -57,7 +57,6 @@
57
  " <th>second_token_perc</th>\n",
58
  " <th>mean_trade_size</th>\n",
59
  " <th>sample_datetime</th>\n",
60
- " <th>market_id</th>\n",
61
  " </tr>\n",
62
  " </thead>\n",
63
  " <tbody>\n",
@@ -82,7 +81,6 @@
82
  " <td>14.76</td>\n",
83
  " <td>NaN</td>\n",
84
  " <td>2024-07-31 18:06:59</td>\n",
85
- " <td>2.0</td>\n",
86
  " </tr>\n",
87
  " <tr>\n",
88
  " <th>1</th>\n",
@@ -105,7 +103,6 @@
105
  " <td>47.84</td>\n",
106
  " <td>NaN</td>\n",
107
  " <td>2024-07-31 18:06:59</td>\n",
108
- " <td>3.0</td>\n",
109
  " </tr>\n",
110
  " <tr>\n",
111
  " <th>2</th>\n",
@@ -128,7 +125,6 @@
128
  " <td>43.07</td>\n",
129
  " <td>NaN</td>\n",
130
  " <td>2024-07-31 18:06:59</td>\n",
131
- " <td>6.0</td>\n",
132
  " </tr>\n",
133
  " <tr>\n",
134
  " <th>3</th>\n",
@@ -151,7 +147,6 @@
151
  " <td>32.06</td>\n",
152
  " <td>NaN</td>\n",
153
  " <td>2024-07-31 18:06:59</td>\n",
154
- " <td>7.0</td>\n",
155
  " </tr>\n",
156
  " <tr>\n",
157
  " <th>4</th>\n",
@@ -174,7 +169,6 @@
174
  " <td>50.32</td>\n",
175
  " <td>NaN</td>\n",
176
  " <td>2024-07-31 18:06:59</td>\n",
177
- " <td>8.0</td>\n",
178
  " </tr>\n",
179
  " </tbody>\n",
180
  "</table>\n",
@@ -223,15 +217,15 @@
223
  "3 37.04 Yes No 67.94 \n",
224
  "4 52.46 Yes No 49.68 \n",
225
  "\n",
226
- " second_token_perc mean_trade_size sample_datetime market_id \n",
227
- "0 14.76 NaN 2024-07-31 18:06:59 2.0 \n",
228
- "1 47.84 NaN 2024-07-31 18:06:59 3.0 \n",
229
- "2 43.07 NaN 2024-07-31 18:06:59 6.0 \n",
230
- "3 32.06 NaN 2024-07-31 18:06:59 7.0 \n",
231
- "4 50.32 NaN 2024-07-31 18:06:59 8.0 "
232
  ]
233
  },
234
- "execution_count": 3,
235
  "metadata": {},
236
  "output_type": "execute_result"
237
  }
@@ -243,7 +237,46 @@
243
  },
244
  {
245
  "cell_type": "code",
246
- "execution_count": 4,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  "metadata": {},
248
  "outputs": [
249
  {
@@ -252,7 +285,7 @@
252
  "text": [
253
  "<class 'pandas.core.frame.DataFrame'>\n",
254
  "RangeIndex: 168 entries, 0 to 167\n",
255
- "Data columns (total 20 columns):\n",
256
  " # Column Non-Null Count Dtype \n",
257
  "--- ------ -------------- ----- \n",
258
  " 0 creationTimestamp 168 non-null object \n",
@@ -273,10 +306,9 @@
273
  " 15 first_token_perc 168 non-null float64 \n",
274
  " 16 second_token_perc 168 non-null float64 \n",
275
  " 17 mean_trade_size 84 non-null float64 \n",
276
- " 18 sample_datetime 126 non-null datetime64[ns]\n",
277
- " 19 market_id 126 non-null float64 \n",
278
- "dtypes: bool(1), datetime64[ns](1), float64(7), int64(3), object(8)\n",
279
- "memory usage: 25.2+ KB\n"
280
  ]
281
  }
282
  ],
@@ -286,7 +318,7 @@
286
  },
287
  {
288
  "cell_type": "code",
289
- "execution_count": 157,
290
  "metadata": {},
291
  "outputs": [
292
  {
@@ -296,10 +328,11 @@
296
  "1722442019 42\n",
297
  "1722501882 42\n",
298
  "1722593849 42\n",
 
299
  "Name: count, dtype: int64"
300
  ]
301
  },
302
- "execution_count": 157,
303
  "metadata": {},
304
  "output_type": "execute_result"
305
  }
@@ -310,7 +343,7 @@
310
  },
311
  {
312
  "cell_type": "code",
313
- "execution_count": 5,
314
  "metadata": {},
315
  "outputs": [
316
  {
@@ -322,7 +355,7 @@
322
  "Name: count, dtype: int64"
323
  ]
324
  },
325
- "execution_count": 5,
326
  "metadata": {},
327
  "output_type": "execute_result"
328
  }
 
14
  },
15
  {
16
  "cell_type": "code",
17
+ "execution_count": 17,
18
  "metadata": {},
19
  "outputs": [
20
  {
 
57
  " <th>second_token_perc</th>\n",
58
  " <th>mean_trade_size</th>\n",
59
  " <th>sample_datetime</th>\n",
 
60
  " </tr>\n",
61
  " </thead>\n",
62
  " <tbody>\n",
 
81
  " <td>14.76</td>\n",
82
  " <td>NaN</td>\n",
83
  " <td>2024-07-31 18:06:59</td>\n",
 
84
  " </tr>\n",
85
  " <tr>\n",
86
  " <th>1</th>\n",
 
103
  " <td>47.84</td>\n",
104
  " <td>NaN</td>\n",
105
  " <td>2024-07-31 18:06:59</td>\n",
 
106
  " </tr>\n",
107
  " <tr>\n",
108
  " <th>2</th>\n",
 
125
  " <td>43.07</td>\n",
126
  " <td>NaN</td>\n",
127
  " <td>2024-07-31 18:06:59</td>\n",
 
128
  " </tr>\n",
129
  " <tr>\n",
130
  " <th>3</th>\n",
 
147
  " <td>32.06</td>\n",
148
  " <td>NaN</td>\n",
149
  " <td>2024-07-31 18:06:59</td>\n",
 
150
  " </tr>\n",
151
  " <tr>\n",
152
  " <th>4</th>\n",
 
169
  " <td>50.32</td>\n",
170
  " <td>NaN</td>\n",
171
  " <td>2024-07-31 18:06:59</td>\n",
 
172
  " </tr>\n",
173
  " </tbody>\n",
174
  "</table>\n",
 
217
  "3 37.04 Yes No 67.94 \n",
218
  "4 52.46 Yes No 49.68 \n",
219
  "\n",
220
+ " second_token_perc mean_trade_size sample_datetime \n",
221
+ "0 14.76 NaN 2024-07-31 18:06:59 \n",
222
+ "1 47.84 NaN 2024-07-31 18:06:59 \n",
223
+ "2 43.07 NaN 2024-07-31 18:06:59 \n",
224
+ "3 32.06 NaN 2024-07-31 18:06:59 \n",
225
+ "4 50.32 NaN 2024-07-31 18:06:59 "
226
  ]
227
  },
228
+ "execution_count": 17,
229
  "metadata": {},
230
  "output_type": "execute_result"
231
  }
 
237
  },
238
  {
239
  "cell_type": "code",
240
+ "execution_count": 18,
241
+ "metadata": {},
242
+ "outputs": [
243
+ {
244
+ "data": {
245
+ "text/plain": [
246
+ "creationTimestamp 0\n",
247
+ "id 0\n",
248
+ "liquidityMeasure 0\n",
249
+ "liquidityParameter 0\n",
250
+ "openingTimestamp 0\n",
251
+ "outcomeTokenAmounts 0\n",
252
+ "title 0\n",
253
+ "sample_timestamp 0\n",
254
+ "open 0\n",
255
+ "total_trades 0\n",
256
+ "dist_gap_perc 0\n",
257
+ "votes_first_outcome_perc 0\n",
258
+ "votes_second_outcome_perc 0\n",
259
+ "first_outcome 0\n",
260
+ "second_outcome 0\n",
261
+ "first_token_perc 0\n",
262
+ "second_token_perc 0\n",
263
+ "mean_trade_size 84\n",
264
+ "sample_datetime 0\n",
265
+ "dtype: int64"
266
+ ]
267
+ },
268
+ "execution_count": 18,
269
+ "metadata": {},
270
+ "output_type": "execute_result"
271
+ }
272
+ ],
273
+ "source": [
274
+ "live_fpmms.isna().sum()"
275
+ ]
276
+ },
277
+ {
278
+ "cell_type": "code",
279
+ "execution_count": 19,
280
  "metadata": {},
281
  "outputs": [
282
  {
 
285
  "text": [
286
  "<class 'pandas.core.frame.DataFrame'>\n",
287
  "RangeIndex: 168 entries, 0 to 167\n",
288
+ "Data columns (total 19 columns):\n",
289
  " # Column Non-Null Count Dtype \n",
290
  "--- ------ -------------- ----- \n",
291
  " 0 creationTimestamp 168 non-null object \n",
 
306
  " 15 first_token_perc 168 non-null float64 \n",
307
  " 16 second_token_perc 168 non-null float64 \n",
308
  " 17 mean_trade_size 84 non-null float64 \n",
309
+ " 18 sample_datetime 168 non-null datetime64[ns]\n",
310
+ "dtypes: bool(1), datetime64[ns](1), float64(6), int64(3), object(8)\n",
311
+ "memory usage: 23.9+ KB\n"
 
312
  ]
313
  }
314
  ],
 
318
  },
319
  {
320
  "cell_type": "code",
321
+ "execution_count": 20,
322
  "metadata": {},
323
  "outputs": [
324
  {
 
328
  "1722442019 42\n",
329
  "1722501882 42\n",
330
  "1722593849 42\n",
331
+ "1722852594 42\n",
332
  "Name: count, dtype: int64"
333
  ]
334
  },
335
+ "execution_count": 20,
336
  "metadata": {},
337
  "output_type": "execute_result"
338
  }
 
343
  },
344
  {
345
  "cell_type": "code",
346
+ "execution_count": 21,
347
  "metadata": {},
348
  "outputs": [
349
  {
 
355
  "Name: count, dtype: int64"
356
  ]
357
  },
358
+ "execution_count": 21,
359
  "metadata": {},
360
  "output_type": "execute_result"
361
  }
notebooks/research_on_specific_markets.ipynb ADDED
@@ -0,0 +1,455 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import pandas as pd\n",
10
+ "import matplotlib.pyplot as plt\n",
11
+ "import seaborn as sns\n",
12
+ "import gc"
13
+ ]
14
+ },
15
+ {
16
+ "cell_type": "code",
17
+ "execution_count": 8,
18
+ "metadata": {},
19
+ "outputs": [
20
+ {
21
+ "data": {
22
+ "text/html": [
23
+ "<div>\n",
24
+ "<style scoped>\n",
25
+ " .dataframe tbody tr th:only-of-type {\n",
26
+ " vertical-align: middle;\n",
27
+ " }\n",
28
+ "\n",
29
+ " .dataframe tbody tr th {\n",
30
+ " vertical-align: top;\n",
31
+ " }\n",
32
+ "\n",
33
+ " .dataframe thead th {\n",
34
+ " text-align: right;\n",
35
+ " }\n",
36
+ "</style>\n",
37
+ "<table border=\"1\" class=\"dataframe\">\n",
38
+ " <thead>\n",
39
+ " <tr style=\"text-align: right;\">\n",
40
+ " <th></th>\n",
41
+ " <th>creationTimestamp</th>\n",
42
+ " <th>id</th>\n",
43
+ " <th>liquidityMeasure</th>\n",
44
+ " <th>liquidityParameter</th>\n",
45
+ " <th>openingTimestamp</th>\n",
46
+ " <th>outcomeTokenAmounts</th>\n",
47
+ " <th>title</th>\n",
48
+ " <th>sample_timestamp</th>\n",
49
+ " <th>open</th>\n",
50
+ " <th>total_trades</th>\n",
51
+ " <th>dist_gap_perc</th>\n",
52
+ " <th>votes_first_outcome_perc</th>\n",
53
+ " <th>votes_second_outcome_perc</th>\n",
54
+ " <th>first_outcome</th>\n",
55
+ " <th>second_outcome</th>\n",
56
+ " <th>first_token_perc</th>\n",
57
+ " <th>second_token_perc</th>\n",
58
+ " <th>mean_trade_size</th>\n",
59
+ " <th>sample_datetime</th>\n",
60
+ " </tr>\n",
61
+ " </thead>\n",
62
+ " <tbody>\n",
63
+ " <tr>\n",
64
+ " <th>0</th>\n",
65
+ " <td>1722127095</td>\n",
66
+ " <td>0x18386924426f7c8ab7f5db4ad586c12dac5cd5e3</td>\n",
67
+ " <td>4965258435682032297</td>\n",
68
+ " <td>7000000000000000009</td>\n",
69
+ " <td>1722556800</td>\n",
70
+ " <td>[2912373242574997426, 16824766579944358195]</td>\n",
71
+ " <td>Will the new AI-powered upgrade for the Philip...</td>\n",
72
+ " <td>1722442019</td>\n",
73
+ " <td>False</td>\n",
74
+ " <td>29</td>\n",
75
+ " <td>19.72</td>\n",
76
+ " <td>65.52</td>\n",
77
+ " <td>34.48</td>\n",
78
+ " <td>Yes</td>\n",
79
+ " <td>No</td>\n",
80
+ " <td>85.24</td>\n",
81
+ " <td>14.76</td>\n",
82
+ " <td>NaN</td>\n",
83
+ " <td>2024-07-31 18:06:59</td>\n",
84
+ " </tr>\n",
85
+ " <tr>\n",
86
+ " <th>1</th>\n",
87
+ " <td>1722133525</td>\n",
88
+ " <td>0x1f0f1fd3fcb3b49eeeb6197abcb5c44c1907dfbd</td>\n",
89
+ " <td>6993447239584866547</td>\n",
90
+ " <td>7000000000000000012</td>\n",
91
+ " <td>1722556800</td>\n",
92
+ " <td>[6703462178421126245, 7309655622095420488]</td>\n",
93
+ " <td>Will Harvey Weinstein recover from Covid-19 an...</td>\n",
94
+ " <td>1722442019</td>\n",
95
+ " <td>False</td>\n",
96
+ " <td>44</td>\n",
97
+ " <td>11.48</td>\n",
98
+ " <td>63.64</td>\n",
99
+ " <td>36.36</td>\n",
100
+ " <td>Yes</td>\n",
101
+ " <td>No</td>\n",
102
+ " <td>52.16</td>\n",
103
+ " <td>47.84</td>\n",
104
+ " <td>NaN</td>\n",
105
+ " <td>2024-07-31 18:06:59</td>\n",
106
+ " </tr>\n",
107
+ " <tr>\n",
108
+ " <th>2</th>\n",
109
+ " <td>1722132875</td>\n",
110
+ " <td>0x3725b8f54cc53b468cdc165ee10218344b607158</td>\n",
111
+ " <td>6932346630944751276</td>\n",
112
+ " <td>7000000000000000011</td>\n",
113
+ " <td>1722556800</td>\n",
114
+ " <td>[6087978352168369108, 8048648856076756352]</td>\n",
115
+ " <td>Will Tesla's net income increase in the third ...</td>\n",
116
+ " <td>1722442019</td>\n",
117
+ " <td>False</td>\n",
118
+ " <td>44</td>\n",
119
+ " <td>4.66</td>\n",
120
+ " <td>52.27</td>\n",
121
+ " <td>47.73</td>\n",
122
+ " <td>Yes</td>\n",
123
+ " <td>No</td>\n",
124
+ " <td>56.93</td>\n",
125
+ " <td>43.07</td>\n",
126
+ " <td>NaN</td>\n",
127
+ " <td>2024-07-31 18:06:59</td>\n",
128
+ " </tr>\n",
129
+ " <tr>\n",
130
+ " <th>3</th>\n",
131
+ " <td>1722300340</td>\n",
132
+ " <td>0x38d2b80cbd152b93a8df640a21d80e4b9d75039a</td>\n",
133
+ " <td>6533756051198779116</td>\n",
134
+ " <td>7000000000000000009</td>\n",
135
+ " <td>1722729600</td>\n",
136
+ " <td>[4808284238922480369, 10190745298156651455]</td>\n",
137
+ " <td>Will SpaceX launch Falcon 9 rocket on 3 August...</td>\n",
138
+ " <td>1722442019</td>\n",
139
+ " <td>False</td>\n",
140
+ " <td>27</td>\n",
141
+ " <td>4.98</td>\n",
142
+ " <td>62.96</td>\n",
143
+ " <td>37.04</td>\n",
144
+ " <td>Yes</td>\n",
145
+ " <td>No</td>\n",
146
+ " <td>67.94</td>\n",
147
+ " <td>32.06</td>\n",
148
+ " <td>NaN</td>\n",
149
+ " <td>2024-07-31 18:06:59</td>\n",
150
+ " </tr>\n",
151
+ " <tr>\n",
152
+ " <th>4</th>\n",
153
+ " <td>1722125375</td>\n",
154
+ " <td>0x39e657d48714c483b7ee2bc9314e6c7ad63d2d79</td>\n",
155
+ " <td>6999859700819864416</td>\n",
156
+ " <td>7000000000000000015</td>\n",
157
+ " <td>1722556800</td>\n",
158
+ " <td>[7044460134742943173, 6955820469241400760]</td>\n",
159
+ " <td>Will the wildfire in California be under contr...</td>\n",
160
+ " <td>1722442019</td>\n",
161
+ " <td>False</td>\n",
162
+ " <td>61</td>\n",
163
+ " <td>2.14</td>\n",
164
+ " <td>47.54</td>\n",
165
+ " <td>52.46</td>\n",
166
+ " <td>Yes</td>\n",
167
+ " <td>No</td>\n",
168
+ " <td>49.68</td>\n",
169
+ " <td>50.32</td>\n",
170
+ " <td>NaN</td>\n",
171
+ " <td>2024-07-31 18:06:59</td>\n",
172
+ " </tr>\n",
173
+ " </tbody>\n",
174
+ "</table>\n",
175
+ "</div>"
176
+ ],
177
+ "text/plain": [
178
+ " creationTimestamp id \\\n",
179
+ "0 1722127095 0x18386924426f7c8ab7f5db4ad586c12dac5cd5e3 \n",
180
+ "1 1722133525 0x1f0f1fd3fcb3b49eeeb6197abcb5c44c1907dfbd \n",
181
+ "2 1722132875 0x3725b8f54cc53b468cdc165ee10218344b607158 \n",
182
+ "3 1722300340 0x38d2b80cbd152b93a8df640a21d80e4b9d75039a \n",
183
+ "4 1722125375 0x39e657d48714c483b7ee2bc9314e6c7ad63d2d79 \n",
184
+ "\n",
185
+ " liquidityMeasure liquidityParameter openingTimestamp \\\n",
186
+ "0 4965258435682032297 7000000000000000009 1722556800 \n",
187
+ "1 6993447239584866547 7000000000000000012 1722556800 \n",
188
+ "2 6932346630944751276 7000000000000000011 1722556800 \n",
189
+ "3 6533756051198779116 7000000000000000009 1722729600 \n",
190
+ "4 6999859700819864416 7000000000000000015 1722556800 \n",
191
+ "\n",
192
+ " outcomeTokenAmounts \\\n",
193
+ "0 [2912373242574997426, 16824766579944358195] \n",
194
+ "1 [6703462178421126245, 7309655622095420488] \n",
195
+ "2 [6087978352168369108, 8048648856076756352] \n",
196
+ "3 [4808284238922480369, 10190745298156651455] \n",
197
+ "4 [7044460134742943173, 6955820469241400760] \n",
198
+ "\n",
199
+ " title sample_timestamp open \\\n",
200
+ "0 Will the new AI-powered upgrade for the Philip... 1722442019 False \n",
201
+ "1 Will Harvey Weinstein recover from Covid-19 an... 1722442019 False \n",
202
+ "2 Will Tesla's net income increase in the third ... 1722442019 False \n",
203
+ "3 Will SpaceX launch Falcon 9 rocket on 3 August... 1722442019 False \n",
204
+ "4 Will the wildfire in California be under contr... 1722442019 False \n",
205
+ "\n",
206
+ " total_trades dist_gap_perc votes_first_outcome_perc \\\n",
207
+ "0 29 19.72 65.52 \n",
208
+ "1 44 11.48 63.64 \n",
209
+ "2 44 4.66 52.27 \n",
210
+ "3 27 4.98 62.96 \n",
211
+ "4 61 2.14 47.54 \n",
212
+ "\n",
213
+ " votes_second_outcome_perc first_outcome second_outcome first_token_perc \\\n",
214
+ "0 34.48 Yes No 85.24 \n",
215
+ "1 36.36 Yes No 52.16 \n",
216
+ "2 47.73 Yes No 56.93 \n",
217
+ "3 37.04 Yes No 67.94 \n",
218
+ "4 52.46 Yes No 49.68 \n",
219
+ "\n",
220
+ " second_token_perc mean_trade_size sample_datetime \n",
221
+ "0 14.76 NaN 2024-07-31 18:06:59 \n",
222
+ "1 47.84 NaN 2024-07-31 18:06:59 \n",
223
+ "2 43.07 NaN 2024-07-31 18:06:59 \n",
224
+ "3 32.06 NaN 2024-07-31 18:06:59 \n",
225
+ "4 50.32 NaN 2024-07-31 18:06:59 "
226
+ ]
227
+ },
228
+ "execution_count": 8,
229
+ "metadata": {},
230
+ "output_type": "execute_result"
231
+ }
232
+ ],
233
+ "source": [
234
+ "live_fpmms = pd.read_parquet('../live_data/markets_live_data.parquet')\n",
235
+ "live_fpmms.head()"
236
+ ]
237
+ },
238
+ {
239
+ "cell_type": "code",
240
+ "execution_count": 9,
241
+ "metadata": {},
242
+ "outputs": [
243
+ {
244
+ "name": "stdout",
245
+ "output_type": "stream",
246
+ "text": [
247
+ "<class 'pandas.core.frame.DataFrame'>\n",
248
+ "RangeIndex: 168 entries, 0 to 167\n",
249
+ "Data columns (total 19 columns):\n",
250
+ " # Column Non-Null Count Dtype \n",
251
+ "--- ------ -------------- ----- \n",
252
+ " 0 creationTimestamp 168 non-null object \n",
253
+ " 1 id 168 non-null object \n",
254
+ " 2 liquidityMeasure 168 non-null int64 \n",
255
+ " 3 liquidityParameter 168 non-null object \n",
256
+ " 4 openingTimestamp 168 non-null object \n",
257
+ " 5 outcomeTokenAmounts 168 non-null object \n",
258
+ " 6 title 168 non-null object \n",
259
+ " 7 sample_timestamp 168 non-null int64 \n",
260
+ " 8 open 168 non-null bool \n",
261
+ " 9 total_trades 168 non-null int64 \n",
262
+ " 10 dist_gap_perc 168 non-null float64 \n",
263
+ " 11 votes_first_outcome_perc 168 non-null float64 \n",
264
+ " 12 votes_second_outcome_perc 168 non-null float64 \n",
265
+ " 13 first_outcome 168 non-null object \n",
266
+ " 14 second_outcome 168 non-null object \n",
267
+ " 15 first_token_perc 168 non-null float64 \n",
268
+ " 16 second_token_perc 168 non-null float64 \n",
269
+ " 17 mean_trade_size 84 non-null float64 \n",
270
+ " 18 sample_datetime 126 non-null datetime64[ns]\n",
271
+ "dtypes: bool(1), datetime64[ns](1), float64(6), int64(3), object(8)\n",
272
+ "memory usage: 23.9+ KB\n"
273
+ ]
274
+ }
275
+ ],
276
+ "source": [
277
+ "live_fpmms.info()"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": 4,
283
+ "metadata": {},
284
+ "outputs": [],
285
+ "source": [
286
+ "id = \"0xf2db83c7a5f926290fb93cebea810746cd674916\""
287
+ ]
288
+ },
289
+ {
290
+ "cell_type": "code",
291
+ "execution_count": 10,
292
+ "metadata": {},
293
+ "outputs": [],
294
+ "source": [
295
+ "target_market = live_fpmms.loc[live_fpmms[\"id\"]==id]"
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": 11,
301
+ "metadata": {},
302
+ "outputs": [
303
+ {
304
+ "data": {
305
+ "text/html": [
306
+ "<div>\n",
307
+ "<style scoped>\n",
308
+ " .dataframe tbody tr th:only-of-type {\n",
309
+ " vertical-align: middle;\n",
310
+ " }\n",
311
+ "\n",
312
+ " .dataframe tbody tr th {\n",
313
+ " vertical-align: top;\n",
314
+ " }\n",
315
+ "\n",
316
+ " .dataframe thead th {\n",
317
+ " text-align: right;\n",
318
+ " }\n",
319
+ "</style>\n",
320
+ "<table border=\"1\" class=\"dataframe\">\n",
321
+ " <thead>\n",
322
+ " <tr style=\"text-align: right;\">\n",
323
+ " <th></th>\n",
324
+ " <th>creationTimestamp</th>\n",
325
+ " <th>id</th>\n",
326
+ " <th>liquidityMeasure</th>\n",
327
+ " <th>liquidityParameter</th>\n",
328
+ " <th>openingTimestamp</th>\n",
329
+ " <th>outcomeTokenAmounts</th>\n",
330
+ " <th>title</th>\n",
331
+ " <th>sample_timestamp</th>\n",
332
+ " <th>open</th>\n",
333
+ " <th>total_trades</th>\n",
334
+ " <th>dist_gap_perc</th>\n",
335
+ " <th>votes_first_outcome_perc</th>\n",
336
+ " <th>votes_second_outcome_perc</th>\n",
337
+ " <th>first_outcome</th>\n",
338
+ " <th>second_outcome</th>\n",
339
+ " <th>first_token_perc</th>\n",
340
+ " <th>second_token_perc</th>\n",
341
+ " <th>mean_trade_size</th>\n",
342
+ " <th>sample_datetime</th>\n",
343
+ " </tr>\n",
344
+ " </thead>\n",
345
+ " <tbody>\n",
346
+ " <tr>\n",
347
+ " <th>122</th>\n",
348
+ " <td>1722571590</td>\n",
349
+ " <td>0xf2db83c7a5f926290fb93cebea810746cd674916</td>\n",
350
+ " <td>7000000000000000000</td>\n",
351
+ " <td>7000000000000000000</td>\n",
352
+ " <td>1722988800</td>\n",
353
+ " <td>[7000000000000000000, 7000000000000000000]</td>\n",
354
+ " <td>Will Donald Trump's campaign announce another ...</td>\n",
355
+ " <td>1722593849</td>\n",
356
+ " <td>True</td>\n",
357
+ " <td>0</td>\n",
358
+ " <td>0.00</td>\n",
359
+ " <td>50.00</td>\n",
360
+ " <td>50.00</td>\n",
361
+ " <td>Yes</td>\n",
362
+ " <td>No</td>\n",
363
+ " <td>50.00</td>\n",
364
+ " <td>50.00</td>\n",
365
+ " <td>0.000000</td>\n",
366
+ " <td>2024-08-02 12:17:29</td>\n",
367
+ " </tr>\n",
368
+ " <tr>\n",
369
+ " <th>166</th>\n",
370
+ " <td>1722571590</td>\n",
371
+ " <td>0xf2db83c7a5f926290fb93cebea810746cd674916</td>\n",
372
+ " <td>6949985446986235988</td>\n",
373
+ " <td>7000000000000000011</td>\n",
374
+ " <td>1722988800</td>\n",
375
+ " <td>[6209077712260007050, 7891671238587987896]</td>\n",
376
+ " <td>Will Donald Trump's campaign announce another ...</td>\n",
377
+ " <td>1722847693</td>\n",
378
+ " <td>True</td>\n",
379
+ " <td>39</td>\n",
380
+ " <td>13.26</td>\n",
381
+ " <td>69.23</td>\n",
382
+ " <td>30.77</td>\n",
383
+ " <td>Yes</td>\n",
384
+ " <td>No</td>\n",
385
+ " <td>55.97</td>\n",
386
+ " <td>44.03</td>\n",
387
+ " <td>0.646436</td>\n",
388
+ " <td>NaT</td>\n",
389
+ " </tr>\n",
390
+ " </tbody>\n",
391
+ "</table>\n",
392
+ "</div>"
393
+ ],
394
+ "text/plain": [
395
+ " creationTimestamp id \\\n",
396
+ "122 1722571590 0xf2db83c7a5f926290fb93cebea810746cd674916 \n",
397
+ "166 1722571590 0xf2db83c7a5f926290fb93cebea810746cd674916 \n",
398
+ "\n",
399
+ " liquidityMeasure liquidityParameter openingTimestamp \\\n",
400
+ "122 7000000000000000000 7000000000000000000 1722988800 \n",
401
+ "166 6949985446986235988 7000000000000000011 1722988800 \n",
402
+ "\n",
403
+ " outcomeTokenAmounts \\\n",
404
+ "122 [7000000000000000000, 7000000000000000000] \n",
405
+ "166 [6209077712260007050, 7891671238587987896] \n",
406
+ "\n",
407
+ " title sample_timestamp \\\n",
408
+ "122 Will Donald Trump's campaign announce another ... 1722593849 \n",
409
+ "166 Will Donald Trump's campaign announce another ... 1722847693 \n",
410
+ "\n",
411
+ " open total_trades dist_gap_perc votes_first_outcome_perc \\\n",
412
+ "122 True 0 0.00 50.00 \n",
413
+ "166 True 39 13.26 69.23 \n",
414
+ "\n",
415
+ " votes_second_outcome_perc first_outcome second_outcome first_token_perc \\\n",
416
+ "122 50.00 Yes No 50.00 \n",
417
+ "166 30.77 Yes No 55.97 \n",
418
+ "\n",
419
+ " second_token_perc mean_trade_size sample_datetime \n",
420
+ "122 50.00 0.000000 2024-08-02 12:17:29 \n",
421
+ "166 44.03 0.646436 NaT "
422
+ ]
423
+ },
424
+ "execution_count": 11,
425
+ "metadata": {},
426
+ "output_type": "execute_result"
427
+ }
428
+ ],
429
+ "source": [
430
+ "target_market"
431
+ ]
432
+ }
433
+ ],
434
+ "metadata": {
435
+ "kernelspec": {
436
+ "display_name": "hf_dashboards",
437
+ "language": "python",
438
+ "name": "python3"
439
+ },
440
+ "language_info": {
441
+ "codemirror_mode": {
442
+ "name": "ipython",
443
+ "version": 3
444
+ },
445
+ "file_extension": ".py",
446
+ "mimetype": "text/x-python",
447
+ "name": "python",
448
+ "nbconvert_exporter": "python",
449
+ "pygments_lexer": "ipython3",
450
+ "version": "3.12.2"
451
+ }
452
+ },
453
+ "nbformat": 4,
454
+ "nbformat_minor": 2
455
+ }
scripts/live_markets_data.py CHANGED
@@ -232,6 +232,9 @@ def transform_fpmms(fpmms: pd.DataFrame, filename: str, current_timestamp: int)
232
  fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
233
  fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
234
  fpmms["liquidityMeasure"] = fpmms["liquidityMeasure"].apply(lambda x: int(x))
 
 
 
235
  fpmms["total_tokens"] = fpmms.apply(
236
  lambda x: x.token_first_amount + x.token_second_amount, axis=1
237
  )
 
232
  fpmms["token_first_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[0]))
233
  fpmms["token_second_amount"] = fpmms.outcomeTokenAmounts.apply(lambda x: int(x[1]))
234
  fpmms["liquidityMeasure"] = fpmms["liquidityMeasure"].apply(lambda x: int(x))
235
+ fpmms["sample_datetime"] = fpmms["sample_timestamp"].apply(
236
+ lambda x: datetime.fromtimestamp(x)
237
+ )
238
  fpmms["total_tokens"] = fpmms.apply(
239
  lambda x: x.token_first_amount + x.token_second_amount, axis=1
240
  )
tabs/tokens_votes_dist.py CHANGED
@@ -4,6 +4,8 @@ import matplotlib.pyplot as plt
4
  import seaborn as sns
5
  from seaborn import FacetGrid
6
  import plotly.express as px
 
 
7
 
8
 
9
  def get_based_tokens_distribution(market_id: str, all_markets: pd.DataFrame):
@@ -58,3 +60,16 @@ def get_based_votes_distribution(market_id: str, all_markets: pd.DataFrame):
58
  labels=[first_outcome, second_outcome],
59
  )
60
  return gr.Plot(value=ax.figure)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  import seaborn as sns
5
  from seaborn import FacetGrid
6
  import plotly.express as px
7
+ import logging
8
+ from typing import Tuple
9
 
10
 
11
  def get_based_tokens_distribution(market_id: str, all_markets: pd.DataFrame):
 
60
  labels=[first_outcome, second_outcome],
61
  )
62
  return gr.Plot(value=ax.figure)
63
+
64
+
65
+ def get_extreme_cases(live_fpmms: pd.DataFrame) -> Tuple:
66
+ """Function to return the id of the best and worst case according to the dist gap metric"""
67
+ # select markets with some trades
68
+ selected_markets = live_fpmms.loc[live_fpmms["total_trades"] > 0]
69
+ selected_markets.sort_values(by="dist_gap_perc", ascending=False, inplace=True)
70
+ return (
71
+ selected_markets.iloc[-1].id,
72
+ selected_markets.iloc[-1].dist_gap_perc,
73
+ selected_markets.iloc[0].id,
74
+ selected_markets.iloc[0].dist_gap_perc,
75
+ )