cyberosa
commited on
Commit
·
12536a4
1
Parent(s):
dc11fb3
Adjusting divergence graph
Browse files- app.py +4 -11
- data/closed_markets_div.parquet +2 -2
- scripts/closed_markets_divergence.py +17 -0
- tabs/market_plots.py +86 -3
app.py
CHANGED
@@ -20,7 +20,7 @@ from tabs.trader_plots import (
|
|
20 |
get_interpretation_text,
|
21 |
)
|
22 |
|
23 |
-
from tabs.market_plots import plot_kl_div_per_market
|
24 |
|
25 |
|
26 |
def get_logger():
|
@@ -194,19 +194,12 @@ with demo:
|
|
194 |
)
|
195 |
with gr.Row():
|
196 |
gr.Markdown(
|
197 |
-
"
|
198 |
)
|
199 |
with gr.Row():
|
200 |
-
|
201 |
-
"There are some edge cases in the Kullback-Leibler divergence formula that can lead to extreme or infinite values, particularly when the predicted probabilities are very different from the actual outcome. To handle these cases, we capped the maximum divergence value at 20. A high divergence value (close to or at 20) indicates markets where the predicted probabilities were significantly different from the real-world outcome, potentially even opposite to what actually occurred."
|
202 |
-
)
|
203 |
with gr.Row():
|
204 |
-
|
205 |
-
with gr.Row():
|
206 |
-
with gr.Column(scale=2):
|
207 |
-
kl_div_plot = plot_kl_div_per_market(closed_markets=closed_markets)
|
208 |
-
with gr.Column(scale=1):
|
209 |
-
interpretation_text = get_interpretation_text()
|
210 |
|
211 |
with gr.TabItem("🎖️Weekly winning trades % per trader"):
|
212 |
with gr.Row():
|
|
|
20 |
get_interpretation_text,
|
21 |
)
|
22 |
|
23 |
+
from tabs.market_plots import plot_kl_div_per_market, plot_kl_div_per_market2
|
24 |
|
25 |
|
26 |
def get_logger():
|
|
|
194 |
)
|
195 |
with gr.Row():
|
196 |
gr.Markdown(
|
197 |
+
"Aka, how much off is the market prediction’s accuracy from the real outcome of the event. Values capped at 20 for market outcomes completely opposite to the real outcome."
|
198 |
)
|
199 |
with gr.Row():
|
200 |
+
trade_details_text = get_metrics_text()
|
|
|
|
|
201 |
with gr.Row():
|
202 |
+
kl_div_plot = plot_kl_div_per_market2(closed_markets=closed_markets)
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
with gr.TabItem("🎖️Weekly winning trades % per trader"):
|
205 |
with gr.Row():
|
data/closed_markets_div.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1e7aeecfea8085d6b11fe66ef37ddee2b5f35b88a8503230b829c4b4501d1de
|
3 |
+
size 52381
|
scripts/closed_markets_divergence.py
CHANGED
@@ -177,6 +177,20 @@ def market_KL_divergence(market_row: pd.DataFrame) -> float:
|
|
177 |
return kl_divergence(P, Q)
|
178 |
|
179 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
def compute_tokens_prob(token_amounts: list) -> list:
|
181 |
first_token_amounts = token_amounts[0]
|
182 |
second_token_amounts = token_amounts[1]
|
@@ -247,6 +261,9 @@ def prepare_closed_markets_data():
|
|
247 |
closed_markets["kl_divergence"] = closed_markets.apply(
|
248 |
lambda x: market_KL_divergence(x), axis=1
|
249 |
)
|
|
|
|
|
|
|
250 |
closed_markets.to_parquet(DATA_DIR / "closed_markets_div.parquet", index=False)
|
251 |
print("Finished preparing final dataset for visualization")
|
252 |
print(closed_markets.head())
|
|
|
177 |
return kl_divergence(P, Q)
|
178 |
|
179 |
|
180 |
+
def off_by_values(market_row: pd.DataFrame) -> float:
|
181 |
+
current_answer = market_row.currentAnswer # "yes", "no"
|
182 |
+
approx_prob = market_row.first_outcome_prob
|
183 |
+
true_prob = 1.0 # for yes outcome
|
184 |
+
if current_answer == "no":
|
185 |
+
true_prob = 0.0 # = 0% for yes outcome and 100% for no
|
186 |
+
|
187 |
+
# we have only one sample, the final probability based on tokens
|
188 |
+
# Ensure probabilities sum to 1
|
189 |
+
P = np.array([true_prob, 1 - true_prob])
|
190 |
+
Q = np.array([approx_prob, 1 - approx_prob])
|
191 |
+
return abs(P[0] - Q[0]) * 100.0
|
192 |
+
|
193 |
+
|
194 |
def compute_tokens_prob(token_amounts: list) -> list:
|
195 |
first_token_amounts = token_amounts[0]
|
196 |
second_token_amounts = token_amounts[1]
|
|
|
261 |
closed_markets["kl_divergence"] = closed_markets.apply(
|
262 |
lambda x: market_KL_divergence(x), axis=1
|
263 |
)
|
264 |
+
closed_markets["off_by_perc"] = closed_markets.apply(
|
265 |
+
lambda x: off_by_values(x), axis=1
|
266 |
+
)
|
267 |
closed_markets.to_parquet(DATA_DIR / "closed_markets_div.parquet", index=False)
|
268 |
print("Finished preparing final dataset for visualization")
|
269 |
print(closed_markets.head())
|
tabs/market_plots.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
import pandas as pd
|
2 |
import gradio as gr
|
3 |
-
import matplotlib.pyplot as plt
|
4 |
-
import seaborn as sns
|
5 |
-
from typing import Tuple
|
6 |
import plotly.express as px
|
|
|
|
|
7 |
|
8 |
|
9 |
def plot_kl_div_per_market(closed_markets: pd.DataFrame) -> gr.Plot:
|
@@ -35,3 +34,87 @@ def plot_kl_div_per_market(closed_markets: pd.DataFrame) -> gr.Plot:
|
|
35 |
return gr.Plot(
|
36 |
value=fig,
|
37 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
import gradio as gr
|
|
|
|
|
|
|
3 |
import plotly.express as px
|
4 |
+
import plotly.graph_objects as go
|
5 |
+
from plotly.subplots import make_subplots
|
6 |
|
7 |
|
8 |
def plot_kl_div_per_market(closed_markets: pd.DataFrame) -> gr.Plot:
|
|
|
34 |
return gr.Plot(
|
35 |
value=fig,
|
36 |
)
|
37 |
+
|
38 |
+
|
39 |
+
def plot_kl_div_per_market2(closed_markets: pd.DataFrame) -> gr.Plot:
|
40 |
+
|
41 |
+
# adding the total
|
42 |
+
all_markets = closed_markets.copy(deep=True)
|
43 |
+
all_markets["market_creator"] = "all"
|
44 |
+
|
45 |
+
# merging both dataframes
|
46 |
+
final_markets = pd.concat([closed_markets, all_markets], ignore_index=True)
|
47 |
+
final_markets = final_markets.sort_values(by="opening_datetime", ascending=True)
|
48 |
+
|
49 |
+
# Create the figure with secondary y-axis
|
50 |
+
fig = make_subplots(specs=[[{"secondary_y": True}]])
|
51 |
+
|
52 |
+
# Add box plots for KL divergence
|
53 |
+
for creator in ["pearl", "quickstart", "all"]:
|
54 |
+
fig.add_trace(
|
55 |
+
go.Box(
|
56 |
+
x=final_markets[final_markets["market_creator"] == creator][
|
57 |
+
"month_year_week"
|
58 |
+
],
|
59 |
+
y=final_markets[final_markets["market_creator"] == creator][
|
60 |
+
"kl_divergence"
|
61 |
+
],
|
62 |
+
name=f"{creator} kl-div",
|
63 |
+
boxmean=True,
|
64 |
+
marker_color={
|
65 |
+
"pearl": "purple",
|
66 |
+
"quickstart": "goldenrod",
|
67 |
+
"all": "darkgreen",
|
68 |
+
}[creator],
|
69 |
+
),
|
70 |
+
secondary_y=False,
|
71 |
+
)
|
72 |
+
fig.add_trace(
|
73 |
+
go.Box(
|
74 |
+
x=final_markets[final_markets["market_creator"] == creator][
|
75 |
+
"month_year_week"
|
76 |
+
],
|
77 |
+
y=final_markets[final_markets["market_creator"] == creator][
|
78 |
+
"off_by_perc"
|
79 |
+
],
|
80 |
+
name=f"{creator} off by",
|
81 |
+
boxmean=True,
|
82 |
+
marker_color={
|
83 |
+
"pearl": "pink",
|
84 |
+
"quickstart": "yellow",
|
85 |
+
"all": "lightgreen",
|
86 |
+
}[creator],
|
87 |
+
),
|
88 |
+
secondary_y=True,
|
89 |
+
)
|
90 |
+
# Update layout
|
91 |
+
fig.update_layout(
|
92 |
+
xaxis_title="Markets closing Week",
|
93 |
+
legend=dict(yanchor="top", y=0.5, xanchor="left", x=-0.5),
|
94 |
+
boxmode="group",
|
95 |
+
width=1000, # Adjusted for better fit on laptop screens
|
96 |
+
height=600, # Adjusted for better fit on laptop screens
|
97 |
+
)
|
98 |
+
# Calculate the range for both axes
|
99 |
+
# derived_min, derived_max = (
|
100 |
+
# final_markets["off_by"].min(),
|
101 |
+
# final_markets["off_by"].max(),
|
102 |
+
# )
|
103 |
+
|
104 |
+
# Update y-axes
|
105 |
+
fig.update_yaxes(
|
106 |
+
title_text="Kullback–Leibler divergence", secondary_y=False, side="left"
|
107 |
+
)
|
108 |
+
fig.update_yaxes(
|
109 |
+
title_text="Off by percentage between probabilities",
|
110 |
+
secondary_y=True,
|
111 |
+
side="right",
|
112 |
+
# range=[derived_min, derived_max],
|
113 |
+
)
|
114 |
+
|
115 |
+
# Update x-axis
|
116 |
+
fig.update_xaxes(tickformat="%b %d\n%Y")
|
117 |
+
|
118 |
+
return gr.Plot(
|
119 |
+
value=fig,
|
120 |
+
)
|