cyberosa commited on
Commit
12536a4
·
1 Parent(s): dc11fb3

Adjusting divergence graph

Browse files
app.py CHANGED
@@ -20,7 +20,7 @@ from tabs.trader_plots import (
20
  get_interpretation_text,
21
  )
22
 
23
- from tabs.market_plots import plot_kl_div_per_market
24
 
25
 
26
  def get_logger():
@@ -194,19 +194,12 @@ with demo:
194
  )
195
  with gr.Row():
196
  gr.Markdown(
197
- "The accuracy is measured as the difference between the distribution of the final outcome P = [prob(yes), prob(no)] and the final liquidity distribution of the market for the yes and no responses."
198
  )
199
  with gr.Row():
200
- gr.Markdown(
201
- "There are some edge cases in the Kullback-Leibler divergence formula that can lead to extreme or infinite values, particularly when the predicted probabilities are very different from the actual outcome. To handle these cases, we capped the maximum divergence value at 20. A high divergence value (close to or at 20) indicates markets where the predicted probabilities were significantly different from the real-world outcome, potentially even opposite to what actually occurred."
202
- )
203
  with gr.Row():
204
- metrics_text = get_metrics_text()
205
- with gr.Row():
206
- with gr.Column(scale=2):
207
- kl_div_plot = plot_kl_div_per_market(closed_markets=closed_markets)
208
- with gr.Column(scale=1):
209
- interpretation_text = get_interpretation_text()
210
 
211
  with gr.TabItem("🎖️Weekly winning trades % per trader"):
212
  with gr.Row():
 
20
  get_interpretation_text,
21
  )
22
 
23
+ from tabs.market_plots import plot_kl_div_per_market, plot_kl_div_per_market2
24
 
25
 
26
  def get_logger():
 
194
  )
195
  with gr.Row():
196
  gr.Markdown(
197
+ "Aka, how much off is the market prediction’s accuracy from the real outcome of the event. Values capped at 20 for market outcomes completely opposite to the real outcome."
198
  )
199
  with gr.Row():
200
+ trade_details_text = get_metrics_text()
 
 
201
  with gr.Row():
202
+ kl_div_plot = plot_kl_div_per_market2(closed_markets=closed_markets)
 
 
 
 
 
203
 
204
  with gr.TabItem("🎖️Weekly winning trades % per trader"):
205
  with gr.Row():
data/closed_markets_div.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abdc271bdfd214e6497de73c32adeada50b680d847b1ad9b0bdf2c2fc442394b
3
- size 48595
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1e7aeecfea8085d6b11fe66ef37ddee2b5f35b88a8503230b829c4b4501d1de
3
+ size 52381
scripts/closed_markets_divergence.py CHANGED
@@ -177,6 +177,20 @@ def market_KL_divergence(market_row: pd.DataFrame) -> float:
177
  return kl_divergence(P, Q)
178
 
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  def compute_tokens_prob(token_amounts: list) -> list:
181
  first_token_amounts = token_amounts[0]
182
  second_token_amounts = token_amounts[1]
@@ -247,6 +261,9 @@ def prepare_closed_markets_data():
247
  closed_markets["kl_divergence"] = closed_markets.apply(
248
  lambda x: market_KL_divergence(x), axis=1
249
  )
 
 
 
250
  closed_markets.to_parquet(DATA_DIR / "closed_markets_div.parquet", index=False)
251
  print("Finished preparing final dataset for visualization")
252
  print(closed_markets.head())
 
177
  return kl_divergence(P, Q)
178
 
179
 
180
+ def off_by_values(market_row: pd.DataFrame) -> float:
181
+ current_answer = market_row.currentAnswer # "yes", "no"
182
+ approx_prob = market_row.first_outcome_prob
183
+ true_prob = 1.0 # for yes outcome
184
+ if current_answer == "no":
185
+ true_prob = 0.0 # = 0% for yes outcome and 100% for no
186
+
187
+ # we have only one sample, the final probability based on tokens
188
+ # Ensure probabilities sum to 1
189
+ P = np.array([true_prob, 1 - true_prob])
190
+ Q = np.array([approx_prob, 1 - approx_prob])
191
+ return abs(P[0] - Q[0]) * 100.0
192
+
193
+
194
  def compute_tokens_prob(token_amounts: list) -> list:
195
  first_token_amounts = token_amounts[0]
196
  second_token_amounts = token_amounts[1]
 
261
  closed_markets["kl_divergence"] = closed_markets.apply(
262
  lambda x: market_KL_divergence(x), axis=1
263
  )
264
+ closed_markets["off_by_perc"] = closed_markets.apply(
265
+ lambda x: off_by_values(x), axis=1
266
+ )
267
  closed_markets.to_parquet(DATA_DIR / "closed_markets_div.parquet", index=False)
268
  print("Finished preparing final dataset for visualization")
269
  print(closed_markets.head())
tabs/market_plots.py CHANGED
@@ -1,9 +1,8 @@
1
  import pandas as pd
2
  import gradio as gr
3
- import matplotlib.pyplot as plt
4
- import seaborn as sns
5
- from typing import Tuple
6
  import plotly.express as px
 
 
7
 
8
 
9
  def plot_kl_div_per_market(closed_markets: pd.DataFrame) -> gr.Plot:
@@ -35,3 +34,87 @@ def plot_kl_div_per_market(closed_markets: pd.DataFrame) -> gr.Plot:
35
  return gr.Plot(
36
  value=fig,
37
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
2
  import gradio as gr
 
 
 
3
  import plotly.express as px
4
+ import plotly.graph_objects as go
5
+ from plotly.subplots import make_subplots
6
 
7
 
8
  def plot_kl_div_per_market(closed_markets: pd.DataFrame) -> gr.Plot:
 
34
  return gr.Plot(
35
  value=fig,
36
  )
37
+
38
+
39
+ def plot_kl_div_per_market2(closed_markets: pd.DataFrame) -> gr.Plot:
40
+
41
+ # adding the total
42
+ all_markets = closed_markets.copy(deep=True)
43
+ all_markets["market_creator"] = "all"
44
+
45
+ # merging both dataframes
46
+ final_markets = pd.concat([closed_markets, all_markets], ignore_index=True)
47
+ final_markets = final_markets.sort_values(by="opening_datetime", ascending=True)
48
+
49
+ # Create the figure with secondary y-axis
50
+ fig = make_subplots(specs=[[{"secondary_y": True}]])
51
+
52
+ # Add box plots for KL divergence
53
+ for creator in ["pearl", "quickstart", "all"]:
54
+ fig.add_trace(
55
+ go.Box(
56
+ x=final_markets[final_markets["market_creator"] == creator][
57
+ "month_year_week"
58
+ ],
59
+ y=final_markets[final_markets["market_creator"] == creator][
60
+ "kl_divergence"
61
+ ],
62
+ name=f"{creator} kl-div",
63
+ boxmean=True,
64
+ marker_color={
65
+ "pearl": "purple",
66
+ "quickstart": "goldenrod",
67
+ "all": "darkgreen",
68
+ }[creator],
69
+ ),
70
+ secondary_y=False,
71
+ )
72
+ fig.add_trace(
73
+ go.Box(
74
+ x=final_markets[final_markets["market_creator"] == creator][
75
+ "month_year_week"
76
+ ],
77
+ y=final_markets[final_markets["market_creator"] == creator][
78
+ "off_by_perc"
79
+ ],
80
+ name=f"{creator} off by",
81
+ boxmean=True,
82
+ marker_color={
83
+ "pearl": "pink",
84
+ "quickstart": "yellow",
85
+ "all": "lightgreen",
86
+ }[creator],
87
+ ),
88
+ secondary_y=True,
89
+ )
90
+ # Update layout
91
+ fig.update_layout(
92
+ xaxis_title="Markets closing Week",
93
+ legend=dict(yanchor="top", y=0.5, xanchor="left", x=-0.5),
94
+ boxmode="group",
95
+ width=1000, # Adjusted for better fit on laptop screens
96
+ height=600, # Adjusted for better fit on laptop screens
97
+ )
98
+ # Calculate the range for both axes
99
+ # derived_min, derived_max = (
100
+ # final_markets["off_by"].min(),
101
+ # final_markets["off_by"].max(),
102
+ # )
103
+
104
+ # Update y-axes
105
+ fig.update_yaxes(
106
+ title_text="Kullback–Leibler divergence", secondary_y=False, side="left"
107
+ )
108
+ fig.update_yaxes(
109
+ title_text="Off by percentage between probabilities",
110
+ secondary_y=True,
111
+ side="right",
112
+ # range=[derived_min, derived_max],
113
+ )
114
+
115
+ # Update x-axis
116
+ fig.update_xaxes(tickformat="%b %d\n%Y")
117
+
118
+ return gr.Plot(
119
+ value=fig,
120
+ )