cyberosa
commited on
Commit
·
1c9dfec
1
Parent(s):
ee5e1cf
corrections due to week format issues
Browse files- app.py +20 -6
- data/weekly_mech_calls.parquet +2 -2
- notebooks/retention_metrics.ipynb +0 -0
- scripts/num_mech_calls.py +6 -2
- scripts/retention_metrics.py +128 -45
- tabs/daily_graphs.py +2 -11
app.py
CHANGED
@@ -129,28 +129,38 @@ def prepare_data():
|
|
129 |
traders_data["trader_family"] = traders_data.apply(
|
130 |
lambda x: get_traders_family(x), axis=1
|
131 |
)
|
132 |
-
print(traders_data.head())
|
133 |
|
134 |
traders_data = traders_data.sort_values(by="creation_timestamp", ascending=True)
|
135 |
unknown_traders = unknown_traders.sort_values(
|
136 |
by="creation_timestamp", ascending=True
|
137 |
)
|
138 |
traders_data["month_year_week"] = (
|
139 |
-
traders_data["creation_timestamp"]
|
|
|
|
|
140 |
)
|
141 |
unknown_traders["month_year_week"] = (
|
142 |
-
unknown_traders["creation_timestamp"]
|
|
|
|
|
143 |
)
|
144 |
closed_markets["month_year_week"] = (
|
145 |
-
closed_markets["opening_datetime"]
|
|
|
|
|
146 |
)
|
147 |
return traders_data, closed_markets, daily_info, unknown_traders, retention_df
|
148 |
|
149 |
|
150 |
-
traders_data, closed_markets, daily_info, unknown_traders,
|
|
|
|
|
151 |
retention_df = prepare_retention_dataset(
|
152 |
-
retention_df=
|
153 |
)
|
|
|
|
|
154 |
|
155 |
demo = gr.Blocks()
|
156 |
# get weekly metrics by market creator: qs, pearl or all.
|
@@ -443,6 +453,10 @@ with demo:
|
|
443 |
|
444 |
with gr.Row():
|
445 |
gr.Markdown("# Cohort retention in pearl traders")
|
|
|
|
|
|
|
|
|
446 |
with gr.Row():
|
447 |
with gr.Column(scale=1):
|
448 |
gr.Markdown("## Cohort retention of 🌊 Olas traders")
|
|
|
129 |
traders_data["trader_family"] = traders_data.apply(
|
130 |
lambda x: get_traders_family(x), axis=1
|
131 |
)
|
132 |
+
# print(traders_data.head())
|
133 |
|
134 |
traders_data = traders_data.sort_values(by="creation_timestamp", ascending=True)
|
135 |
unknown_traders = unknown_traders.sort_values(
|
136 |
by="creation_timestamp", ascending=True
|
137 |
)
|
138 |
traders_data["month_year_week"] = (
|
139 |
+
traders_data["creation_timestamp"]
|
140 |
+
.dt.to_period("W")
|
141 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
142 |
)
|
143 |
unknown_traders["month_year_week"] = (
|
144 |
+
unknown_traders["creation_timestamp"]
|
145 |
+
.dt.to_period("W")
|
146 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
147 |
)
|
148 |
closed_markets["month_year_week"] = (
|
149 |
+
closed_markets["opening_datetime"]
|
150 |
+
.dt.to_period("W")
|
151 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
152 |
)
|
153 |
return traders_data, closed_markets, daily_info, unknown_traders, retention_df
|
154 |
|
155 |
|
156 |
+
traders_data, closed_markets, daily_info, unknown_traders, raw_retention_df = (
|
157 |
+
prepare_data()
|
158 |
+
)
|
159 |
retention_df = prepare_retention_dataset(
|
160 |
+
retention_df=raw_retention_df, unknown_df=unknown_traders
|
161 |
)
|
162 |
+
print("max date of retention df")
|
163 |
+
print(max(retention_df.creation_timestamp))
|
164 |
|
165 |
demo = gr.Blocks()
|
166 |
# get weekly metrics by market creator: qs, pearl or all.
|
|
|
453 |
|
454 |
with gr.Row():
|
455 |
gr.Markdown("# Cohort retention in pearl traders")
|
456 |
+
with gr.Row():
|
457 |
+
gr.Markdown(
|
458 |
+
"The Cohort groups are organized by cohort weeks. A trader is part of a cohort group/week where it was detected the FIRST activity ever of that trader."
|
459 |
+
)
|
460 |
with gr.Row():
|
461 |
with gr.Column(scale=1):
|
462 |
gr.Markdown("## Cohort retention of 🌊 Olas traders")
|
data/weekly_mech_calls.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:012e218c54998d9070cc6cbd301ef17188708c37653ec22a6fbff3634d04293d
|
3 |
+
size 54922
|
notebooks/retention_metrics.ipynb
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
scripts/num_mech_calls.py
CHANGED
@@ -60,7 +60,9 @@ def compute_total_mech_calls():
|
|
60 |
tools["request_date"] = tools["request_time"].dt.date
|
61 |
tools = tools.sort_values(by="request_time", ascending=True)
|
62 |
tools["month_year_week"] = (
|
63 |
-
tools["request_time"]
|
|
|
|
|
64 |
)
|
65 |
|
66 |
except Exception as e:
|
@@ -79,7 +81,9 @@ def compute_total_mech_calls():
|
|
79 |
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
|
80 |
fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
|
81 |
fpmmTrades["month_year_week"] = (
|
82 |
-
fpmmTrades["creation_timestamp"]
|
|
|
|
|
83 |
)
|
84 |
|
85 |
nr_traders = len(fpmmTrades["trader_address"].unique())
|
|
|
60 |
tools["request_date"] = tools["request_time"].dt.date
|
61 |
tools = tools.sort_values(by="request_time", ascending=True)
|
62 |
tools["month_year_week"] = (
|
63 |
+
tools["request_time"]
|
64 |
+
.dt.to_period("W")
|
65 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
66 |
)
|
67 |
|
68 |
except Exception as e:
|
|
|
81 |
fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
|
82 |
fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
|
83 |
fpmmTrades["month_year_week"] = (
|
84 |
+
fpmmTrades["creation_timestamp"]
|
85 |
+
.dt.to_period("W")
|
86 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
87 |
)
|
88 |
|
89 |
nr_traders = len(fpmmTrades["trader_address"].unique())
|
scripts/retention_metrics.py
CHANGED
@@ -10,12 +10,21 @@ def calculate_wow_retention_by_type(
|
|
10 |
filtered_df = df.loc[df["market_creator"] == market_creator]
|
11 |
# Get unique traders per week and type
|
12 |
weekly_traders = (
|
13 |
-
filtered_df.groupby(["month_year_week", "trader_type"])[
|
|
|
|
|
14 |
.nunique()
|
15 |
.reset_index()
|
16 |
)
|
17 |
-
weekly_traders = weekly_traders.sort_values([
|
|
|
|
|
|
|
|
|
|
|
18 |
|
|
|
|
|
19 |
# Calculate retention
|
20 |
retention = []
|
21 |
# Iterate through each trader type
|
@@ -25,7 +34,20 @@ def calculate_wow_retention_by_type(
|
|
25 |
# Calculate retention for each week within this trader type
|
26 |
for i in range(1, len(type_data)):
|
27 |
current_week = type_data.iloc[i]["month_year_week"]
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
# Get traders in both weeks for this type
|
31 |
current_traders = set(
|
@@ -62,7 +84,23 @@ def calculate_wow_retention_by_type(
|
|
62 |
return pd.DataFrame(retention)
|
63 |
|
64 |
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
def calculate_cohort_retention(
|
67 |
df: pd.DataFrame, market_creator: str, trader_type: str
|
68 |
) -> pd.DataFrame:
|
@@ -70,7 +108,7 @@ def calculate_cohort_retention(
|
|
70 |
(df["market_creator"] == market_creator) & (df["trader_type"] == trader_type)
|
71 |
]
|
72 |
df_filtered = df_filtered.sort_values(by="creation_timestamp", ascending=True)
|
73 |
-
# Get first week for each trader
|
74 |
first_activity = (
|
75 |
df_filtered.groupby("trader_address")
|
76 |
.agg({"creation_timestamp": "min", "month_year_week": "first"})
|
@@ -94,54 +132,82 @@ def calculate_cohort_retention(
|
|
94 |
on="trader_address",
|
95 |
)
|
96 |
|
97 |
-
#
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
|
111 |
-
|
112 |
-
|
|
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
-
|
122 |
-
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
)
|
134 |
-
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
|
135 |
-
all_traders["month_year_week"] = (
|
136 |
-
all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
|
137 |
-
)
|
138 |
-
return all_traders
|
139 |
|
140 |
|
141 |
def prepare_retention_dataset(
|
142 |
retention_df: pd.DataFrame, unknown_df: pd.DataFrame
|
143 |
) -> pd.DataFrame:
|
144 |
-
|
145 |
retention_df["trader_type"] = retention_df["staking"].apply(
|
146 |
lambda x: "non_Olas" if x == "non_Olas" else "Olas"
|
147 |
)
|
@@ -158,11 +224,28 @@ def prepare_retention_dataset(
|
|
158 |
all_traders["creation_timestamp"] = pd.to_datetime(
|
159 |
all_traders["creation_timestamp"]
|
160 |
)
|
|
|
161 |
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
|
162 |
-
|
163 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
)
|
165 |
-
|
|
|
|
|
166 |
|
167 |
|
168 |
if __name__ == "__main__":
|
|
|
10 |
filtered_df = df.loc[df["market_creator"] == market_creator]
|
11 |
# Get unique traders per week and type
|
12 |
weekly_traders = (
|
13 |
+
filtered_df.groupby(["month_year_week", "trader_type"], sort=False)[
|
14 |
+
"trader_address"
|
15 |
+
]
|
16 |
.nunique()
|
17 |
.reset_index()
|
18 |
)
|
19 |
+
# weekly_traders = weekly_traders.sort_values(['trader_type', 'month_year_week'])
|
20 |
+
# Get ordered list of unique weeks - converting to datetime for proper sorting
|
21 |
+
all_weeks = filtered_df["month_year_week"].unique()
|
22 |
+
weeks_datetime = pd.to_datetime(all_weeks)
|
23 |
+
sorted_weeks_idx = weeks_datetime.argsort()
|
24 |
+
all_weeks = all_weeks[sorted_weeks_idx]
|
25 |
|
26 |
+
# Create mapping from week string to numeric index
|
27 |
+
week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
|
28 |
# Calculate retention
|
29 |
retention = []
|
30 |
# Iterate through each trader type
|
|
|
34 |
# Calculate retention for each week within this trader type
|
35 |
for i in range(1, len(type_data)):
|
36 |
current_week = type_data.iloc[i]["month_year_week"]
|
37 |
+
# print(f"current_week={current_week}")
|
38 |
+
week_number = week_to_number[current_week]
|
39 |
+
if week_to_number == 0:
|
40 |
+
# no previous week info
|
41 |
+
continue
|
42 |
+
previous_week_number = week_number - 1
|
43 |
+
# this should give only one value
|
44 |
+
previous_week = [
|
45 |
+
key
|
46 |
+
for key in week_to_number.keys()
|
47 |
+
if week_to_number[key] == previous_week_number
|
48 |
+
][0]
|
49 |
+
# previous_week = type_data.iloc[i-1]['month_year_week']
|
50 |
+
# print(f"previous week = {previous_week}")
|
51 |
|
52 |
# Get traders in both weeks for this type
|
53 |
current_traders = set(
|
|
|
84 |
return pd.DataFrame(retention)
|
85 |
|
86 |
|
87 |
+
def create_retention_matrix(cohort_retention_df: pd.DataFrame) -> pd.DataFrame:
|
88 |
+
# Pivot the data to create the retention matrix
|
89 |
+
retention_matrix = cohort_retention_df.pivot(
|
90 |
+
index="cohort_week", columns="weeks_since_cohort", values="retention_rate"
|
91 |
+
)
|
92 |
+
|
93 |
+
# Sort index chronologically
|
94 |
+
retention_matrix.index = pd.to_datetime(retention_matrix.index)
|
95 |
+
retention_matrix = retention_matrix.sort_index()
|
96 |
+
|
97 |
+
# Rename columns to show week numbers
|
98 |
+
# retention_matrix.columns = [f"Week {i}" for i in retention_matrix.columns]
|
99 |
+
|
100 |
+
return retention_matrix
|
101 |
+
|
102 |
+
|
103 |
+
# Wow Retention at the cohort level
|
104 |
def calculate_cohort_retention(
|
105 |
df: pd.DataFrame, market_creator: str, trader_type: str
|
106 |
) -> pd.DataFrame:
|
|
|
108 |
(df["market_creator"] == market_creator) & (df["trader_type"] == trader_type)
|
109 |
]
|
110 |
df_filtered = df_filtered.sort_values(by="creation_timestamp", ascending=True)
|
111 |
+
# Get first week of activity for each trader
|
112 |
first_activity = (
|
113 |
df_filtered.groupby("trader_address")
|
114 |
.agg({"creation_timestamp": "min", "month_year_week": "first"})
|
|
|
132 |
on="trader_address",
|
133 |
)
|
134 |
|
135 |
+
# Get all unique weeks and cohorts
|
136 |
+
all_cohorts = cohort_data["cohort_week"].unique()
|
137 |
+
# print(f"all cohorts = {all_cohorts}")
|
138 |
+
retention_data = []
|
139 |
+
max_weeks = 8
|
140 |
+
for cohort in all_cohorts:
|
141 |
+
# print(f"analyzing cohort {cohort}")
|
142 |
+
# Get all traders in this cohort
|
143 |
+
cohort_traders = set(
|
144 |
+
cohort_data[cohort_data["cohort_week"] == cohort]["trader_address"]
|
145 |
+
)
|
146 |
+
cohort_size = len(cohort_traders)
|
147 |
+
# print(f"cohort size = {cohort_size}")
|
148 |
|
149 |
+
if cohort_size == 0:
|
150 |
+
print(f"NO new traders for cohort week={cohort}")
|
151 |
+
continue
|
152 |
|
153 |
+
# Calculate retention for each week after the cohort week
|
154 |
+
for week_idx, week in enumerate(all_weeks):
|
155 |
+
# print(f"Analyzing week = {week}")
|
156 |
+
weeks_since_cohort = week_idx - week_to_number[cohort]
|
157 |
+
if weeks_since_cohort < 0 or weeks_since_cohort > 8:
|
158 |
+
continue
|
159 |
+
# Get active traders from the cohort in current week
|
160 |
+
current_traders = set(
|
161 |
+
cohort_data[
|
162 |
+
(cohort_data["cohort_week"] == cohort)
|
163 |
+
& (cohort_data["month_year_week"] == week)
|
164 |
+
]["trader_address"]
|
165 |
+
)
|
166 |
|
167 |
+
# Get active traders from the cohort in previous week
|
168 |
+
if week == cohort:
|
169 |
+
# For the first week, retention is 100% by definition
|
170 |
+
retained = len(current_traders)
|
171 |
+
retention_rate = 100 if len(current_traders) > 0 else 0
|
172 |
|
173 |
+
elif week_idx > 0:
|
174 |
+
previous_week = all_weeks[week_idx - 1]
|
175 |
+
previous_traders = set(
|
176 |
+
cohort_data[
|
177 |
+
(cohort_data["cohort_week"] == cohort)
|
178 |
+
& (cohort_data["month_year_week"] == previous_week)
|
179 |
+
]["trader_address"]
|
180 |
+
)
|
181 |
+
retained = len(current_traders.intersection(previous_traders))
|
182 |
+
retention_rate = (
|
183 |
+
(retained / len(previous_traders)) * 100
|
184 |
+
if len(previous_traders) > 0
|
185 |
+
else 0
|
186 |
+
)
|
187 |
|
188 |
+
retention_data.append(
|
189 |
+
{
|
190 |
+
"cohort_week": cohort,
|
191 |
+
"week": week,
|
192 |
+
"weeks_since_cohort": weeks_since_cohort,
|
193 |
+
"cohort_size": cohort_size,
|
194 |
+
"active_traders": len(current_traders),
|
195 |
+
"retained_traders": retained,
|
196 |
+
"previous_traders": (
|
197 |
+
len(previous_traders) if week_idx > 0 else cohort_size
|
198 |
+
),
|
199 |
+
"retention_rate": round(retention_rate, 2),
|
200 |
+
}
|
201 |
+
)
|
202 |
|
203 |
+
retention_matrix = create_retention_matrix(pd.DataFrame(retention_data))
|
204 |
+
return retention_matrix
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
|
207 |
def prepare_retention_dataset(
|
208 |
retention_df: pd.DataFrame, unknown_df: pd.DataFrame
|
209 |
) -> pd.DataFrame:
|
210 |
+
print("Preparing retention dataset")
|
211 |
retention_df["trader_type"] = retention_df["staking"].apply(
|
212 |
lambda x: "non_Olas" if x == "non_Olas" else "Olas"
|
213 |
)
|
|
|
224 |
all_traders["creation_timestamp"] = pd.to_datetime(
|
225 |
all_traders["creation_timestamp"]
|
226 |
)
|
227 |
+
|
228 |
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
|
229 |
+
|
230 |
+
# Remove data from current week and onwards
|
231 |
+
now = datetime.now()
|
232 |
+
|
233 |
+
# Get start of the current week (Monday)
|
234 |
+
start_of_week = now - timedelta(days=(now.weekday()))
|
235 |
+
start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
|
236 |
+
|
237 |
+
all_traders["creation_date"] = all_traders["creation_timestamp"].dt.date
|
238 |
+
all_traders["creation_date"] = pd.to_datetime(all_traders["creation_date"])
|
239 |
+
# Filter the dataframe
|
240 |
+
filtered_traders = all_traders[all_traders["creation_date"] < start_of_week]
|
241 |
+
filtered_traders["month_year_week"] = (
|
242 |
+
filtered_traders["creation_timestamp"]
|
243 |
+
.dt.to_period("W")
|
244 |
+
.dt.start_time.dt.strftime("%b-%d-%Y")
|
245 |
)
|
246 |
+
|
247 |
+
print(filtered_traders.month_year_week.unique())
|
248 |
+
return filtered_traders
|
249 |
|
250 |
|
251 |
if __name__ == "__main__":
|
tabs/daily_graphs.py
CHANGED
@@ -19,15 +19,6 @@ color_mapping = [
|
|
19 |
]
|
20 |
|
21 |
|
22 |
-
def plot_daily_trades(trades_df: pd.DataFrame) -> gr.Plot:
|
23 |
-
# get daily trades
|
24 |
-
daily_trades_count = (
|
25 |
-
trades_df.groupby("month_year_week").size().reset_index(name="trades")
|
26 |
-
)
|
27 |
-
daily_trades_count.columns = daily_trades_count.columns.astype(str)
|
28 |
-
print("WIP")
|
29 |
-
|
30 |
-
|
31 |
def get_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
|
32 |
# Get current date
|
33 |
now = datetime.now()
|
@@ -35,12 +26,12 @@ def get_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
|
|
35 |
# Get start of the current week (Monday)
|
36 |
start_of_week = now - timedelta(days=now.weekday())
|
37 |
start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
|
38 |
-
print(f"start of the week = {start_of_week}")
|
39 |
|
40 |
# Get end of the current week (Sunday)
|
41 |
end_of_week = start_of_week + timedelta(days=6)
|
42 |
end_of_week = end_of_week.replace(hour=23, minute=59, second=59, microsecond=999999)
|
43 |
-
print(f"end of the week = {end_of_week}")
|
44 |
trades_df["creation_date"] = pd.to_datetime(trades_df["creation_date"])
|
45 |
# Filter the dataframe
|
46 |
return trades_df[
|
|
|
19 |
]
|
20 |
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def get_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
|
23 |
# Get current date
|
24 |
now = datetime.now()
|
|
|
26 |
# Get start of the current week (Monday)
|
27 |
start_of_week = now - timedelta(days=now.weekday())
|
28 |
start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
|
29 |
+
# print(f"start of the week = {start_of_week}")
|
30 |
|
31 |
# Get end of the current week (Sunday)
|
32 |
end_of_week = start_of_week + timedelta(days=6)
|
33 |
end_of_week = end_of_week.replace(hour=23, minute=59, second=59, microsecond=999999)
|
34 |
+
# print(f"end of the week = {end_of_week}")
|
35 |
trades_df["creation_date"] = pd.to_datetime(trades_df["creation_date"])
|
36 |
# Filter the dataframe
|
37 |
return trades_df[
|