cyberosa commited on
Commit
1c9dfec
·
1 Parent(s): ee5e1cf

corrections due to week format issues

Browse files
app.py CHANGED
@@ -129,28 +129,38 @@ def prepare_data():
129
  traders_data["trader_family"] = traders_data.apply(
130
  lambda x: get_traders_family(x), axis=1
131
  )
132
- print(traders_data.head())
133
 
134
  traders_data = traders_data.sort_values(by="creation_timestamp", ascending=True)
135
  unknown_traders = unknown_traders.sort_values(
136
  by="creation_timestamp", ascending=True
137
  )
138
  traders_data["month_year_week"] = (
139
- traders_data["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
140
  )
141
  unknown_traders["month_year_week"] = (
142
- unknown_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
143
  )
144
  closed_markets["month_year_week"] = (
145
- closed_markets["opening_datetime"].dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
146
  )
147
  return traders_data, closed_markets, daily_info, unknown_traders, retention_df
148
 
149
 
150
- traders_data, closed_markets, daily_info, unknown_traders, retention_df = prepare_data()
 
 
151
  retention_df = prepare_retention_dataset(
152
- retention_df=retention_df, unknown_df=unknown_traders
153
  )
 
 
154
 
155
  demo = gr.Blocks()
156
  # get weekly metrics by market creator: qs, pearl or all.
@@ -443,6 +453,10 @@ with demo:
443
 
444
  with gr.Row():
445
  gr.Markdown("# Cohort retention in pearl traders")
 
 
 
 
446
  with gr.Row():
447
  with gr.Column(scale=1):
448
  gr.Markdown("## Cohort retention of 🌊 Olas traders")
 
129
  traders_data["trader_family"] = traders_data.apply(
130
  lambda x: get_traders_family(x), axis=1
131
  )
132
+ # print(traders_data.head())
133
 
134
  traders_data = traders_data.sort_values(by="creation_timestamp", ascending=True)
135
  unknown_traders = unknown_traders.sort_values(
136
  by="creation_timestamp", ascending=True
137
  )
138
  traders_data["month_year_week"] = (
139
+ traders_data["creation_timestamp"]
140
+ .dt.to_period("W")
141
+ .dt.start_time.dt.strftime("%b-%d-%Y")
142
  )
143
  unknown_traders["month_year_week"] = (
144
+ unknown_traders["creation_timestamp"]
145
+ .dt.to_period("W")
146
+ .dt.start_time.dt.strftime("%b-%d-%Y")
147
  )
148
  closed_markets["month_year_week"] = (
149
+ closed_markets["opening_datetime"]
150
+ .dt.to_period("W")
151
+ .dt.start_time.dt.strftime("%b-%d-%Y")
152
  )
153
  return traders_data, closed_markets, daily_info, unknown_traders, retention_df
154
 
155
 
156
+ traders_data, closed_markets, daily_info, unknown_traders, raw_retention_df = (
157
+ prepare_data()
158
+ )
159
  retention_df = prepare_retention_dataset(
160
+ retention_df=raw_retention_df, unknown_df=unknown_traders
161
  )
162
+ print("max date of retention df")
163
+ print(max(retention_df.creation_timestamp))
164
 
165
  demo = gr.Blocks()
166
  # get weekly metrics by market creator: qs, pearl or all.
 
453
 
454
  with gr.Row():
455
  gr.Markdown("# Cohort retention in pearl traders")
456
+ with gr.Row():
457
+ gr.Markdown(
458
+ "The Cohort groups are organized by cohort weeks. A trader is part of a cohort group/week where it was detected the FIRST activity ever of that trader."
459
+ )
460
  with gr.Row():
461
  with gr.Column(scale=1):
462
  gr.Markdown("## Cohort retention of 🌊 Olas traders")
data/weekly_mech_calls.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39b4cf253e9fffc20d584d25410efb8428612413a9f002ba0314216dc56b0ca2
3
- size 54927
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:012e218c54998d9070cc6cbd301ef17188708c37653ec22a6fbff3634d04293d
3
+ size 54922
notebooks/retention_metrics.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
scripts/num_mech_calls.py CHANGED
@@ -60,7 +60,9 @@ def compute_total_mech_calls():
60
  tools["request_date"] = tools["request_time"].dt.date
61
  tools = tools.sort_values(by="request_time", ascending=True)
62
  tools["month_year_week"] = (
63
- tools["request_time"].dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
64
  )
65
 
66
  except Exception as e:
@@ -79,7 +81,9 @@ def compute_total_mech_calls():
79
  fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
80
  fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
81
  fpmmTrades["month_year_week"] = (
82
- fpmmTrades["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
83
  )
84
 
85
  nr_traders = len(fpmmTrades["trader_address"].unique())
 
60
  tools["request_date"] = tools["request_time"].dt.date
61
  tools = tools.sort_values(by="request_time", ascending=True)
62
  tools["month_year_week"] = (
63
+ tools["request_time"]
64
+ .dt.to_period("W")
65
+ .dt.start_time.dt.strftime("%b-%d-%Y")
66
  )
67
 
68
  except Exception as e:
 
81
  fpmmTrades["creation_date"] = fpmmTrades["creation_timestamp"].dt.date
82
  fpmmTrades = fpmmTrades.sort_values(by="creation_timestamp", ascending=True)
83
  fpmmTrades["month_year_week"] = (
84
+ fpmmTrades["creation_timestamp"]
85
+ .dt.to_period("W")
86
+ .dt.start_time.dt.strftime("%b-%d-%Y")
87
  )
88
 
89
  nr_traders = len(fpmmTrades["trader_address"].unique())
scripts/retention_metrics.py CHANGED
@@ -10,12 +10,21 @@ def calculate_wow_retention_by_type(
10
  filtered_df = df.loc[df["market_creator"] == market_creator]
11
  # Get unique traders per week and type
12
  weekly_traders = (
13
- filtered_df.groupby(["month_year_week", "trader_type"])["trader_address"]
 
 
14
  .nunique()
15
  .reset_index()
16
  )
17
- weekly_traders = weekly_traders.sort_values(["trader_type", "month_year_week"])
 
 
 
 
 
18
 
 
 
19
  # Calculate retention
20
  retention = []
21
  # Iterate through each trader type
@@ -25,7 +34,20 @@ def calculate_wow_retention_by_type(
25
  # Calculate retention for each week within this trader type
26
  for i in range(1, len(type_data)):
27
  current_week = type_data.iloc[i]["month_year_week"]
28
- previous_week = type_data.iloc[i - 1]["month_year_week"]
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  # Get traders in both weeks for this type
31
  current_traders = set(
@@ -62,7 +84,23 @@ def calculate_wow_retention_by_type(
62
  return pd.DataFrame(retention)
63
 
64
 
65
- # Cohort Retention
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def calculate_cohort_retention(
67
  df: pd.DataFrame, market_creator: str, trader_type: str
68
  ) -> pd.DataFrame:
@@ -70,7 +108,7 @@ def calculate_cohort_retention(
70
  (df["market_creator"] == market_creator) & (df["trader_type"] == trader_type)
71
  ]
72
  df_filtered = df_filtered.sort_values(by="creation_timestamp", ascending=True)
73
- # Get first week for each trader
74
  first_activity = (
75
  df_filtered.groupby("trader_address")
76
  .agg({"creation_timestamp": "min", "month_year_week": "first"})
@@ -94,54 +132,82 @@ def calculate_cohort_retention(
94
  on="trader_address",
95
  )
96
 
97
- # Calculate week number since first activity
98
- cohort_data["cohort_number"] = cohort_data["cohort_week"].map(week_to_number)
99
- cohort_data["activity_number"] = cohort_data["month_year_week"].map(week_to_number)
100
- cohort_data["week_number"] = (
101
- cohort_data["activity_number"] - cohort_data["cohort_number"]
102
- )
103
-
104
- # Calculate retention by cohort
105
- cohort_sizes = cohort_data.groupby("cohort_week")["trader_address"].nunique()
106
- retention_matrix = cohort_data.groupby(["cohort_week", "week_number"])[
107
- "trader_address"
108
- ].nunique()
109
- retention_matrix = retention_matrix.unstack(fill_value=0)
110
 
111
- # Convert to percentages
112
- retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100
 
113
 
114
- # Sort index (cohort_week) chronologically
115
- retention_matrix.index = pd.to_datetime(retention_matrix.index)
116
- retention_matrix = retention_matrix.sort_index()
117
-
118
- return retention_matrix.round(2)
 
 
 
 
 
 
 
 
119
 
 
 
 
 
 
120
 
121
- def merge_retention_dataset(
122
- traders_df: pd.DataFrame, unknown_df: pd.DataFrame
123
- ) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- traders_df["trader_type"] = traders_df["staking"].apply(
126
- lambda x: "non_Olas" if x == "non_Olas" else "Olas"
127
- )
128
- unknown_df["trader_type"] = "unclassified"
129
- all_traders = pd.concat([traders_df, unknown_df], ignore_index=True)
 
 
 
 
 
 
 
 
 
130
 
131
- all_traders["creation_timestamp"] = pd.to_datetime(
132
- all_traders["creation_timestamp"]
133
- )
134
- all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
135
- all_traders["month_year_week"] = (
136
- all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
137
- )
138
- return all_traders
139
 
140
 
141
  def prepare_retention_dataset(
142
  retention_df: pd.DataFrame, unknown_df: pd.DataFrame
143
  ) -> pd.DataFrame:
144
-
145
  retention_df["trader_type"] = retention_df["staking"].apply(
146
  lambda x: "non_Olas" if x == "non_Olas" else "Olas"
147
  )
@@ -158,11 +224,28 @@ def prepare_retention_dataset(
158
  all_traders["creation_timestamp"] = pd.to_datetime(
159
  all_traders["creation_timestamp"]
160
  )
 
161
  all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
162
- all_traders["month_year_week"] = (
163
- all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  )
165
- return all_traders
 
 
166
 
167
 
168
  if __name__ == "__main__":
 
10
  filtered_df = df.loc[df["market_creator"] == market_creator]
11
  # Get unique traders per week and type
12
  weekly_traders = (
13
+ filtered_df.groupby(["month_year_week", "trader_type"], sort=False)[
14
+ "trader_address"
15
+ ]
16
  .nunique()
17
  .reset_index()
18
  )
19
+ # weekly_traders = weekly_traders.sort_values(['trader_type', 'month_year_week'])
20
+ # Get ordered list of unique weeks - converting to datetime for proper sorting
21
+ all_weeks = filtered_df["month_year_week"].unique()
22
+ weeks_datetime = pd.to_datetime(all_weeks)
23
+ sorted_weeks_idx = weeks_datetime.argsort()
24
+ all_weeks = all_weeks[sorted_weeks_idx]
25
 
26
+ # Create mapping from week string to numeric index
27
+ week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
28
  # Calculate retention
29
  retention = []
30
  # Iterate through each trader type
 
34
  # Calculate retention for each week within this trader type
35
  for i in range(1, len(type_data)):
36
  current_week = type_data.iloc[i]["month_year_week"]
37
+ # print(f"current_week={current_week}")
38
+ week_number = week_to_number[current_week]
39
+ if week_to_number == 0:
40
+ # no previous week info
41
+ continue
42
+ previous_week_number = week_number - 1
43
+ # this should give only one value
44
+ previous_week = [
45
+ key
46
+ for key in week_to_number.keys()
47
+ if week_to_number[key] == previous_week_number
48
+ ][0]
49
+ # previous_week = type_data.iloc[i-1]['month_year_week']
50
+ # print(f"previous week = {previous_week}")
51
 
52
  # Get traders in both weeks for this type
53
  current_traders = set(
 
84
  return pd.DataFrame(retention)
85
 
86
 
87
+ def create_retention_matrix(cohort_retention_df: pd.DataFrame) -> pd.DataFrame:
88
+ # Pivot the data to create the retention matrix
89
+ retention_matrix = cohort_retention_df.pivot(
90
+ index="cohort_week", columns="weeks_since_cohort", values="retention_rate"
91
+ )
92
+
93
+ # Sort index chronologically
94
+ retention_matrix.index = pd.to_datetime(retention_matrix.index)
95
+ retention_matrix = retention_matrix.sort_index()
96
+
97
+ # Rename columns to show week numbers
98
+ # retention_matrix.columns = [f"Week {i}" for i in retention_matrix.columns]
99
+
100
+ return retention_matrix
101
+
102
+
103
+ # Wow Retention at the cohort level
104
  def calculate_cohort_retention(
105
  df: pd.DataFrame, market_creator: str, trader_type: str
106
  ) -> pd.DataFrame:
 
108
  (df["market_creator"] == market_creator) & (df["trader_type"] == trader_type)
109
  ]
110
  df_filtered = df_filtered.sort_values(by="creation_timestamp", ascending=True)
111
+ # Get first week of activity for each trader
112
  first_activity = (
113
  df_filtered.groupby("trader_address")
114
  .agg({"creation_timestamp": "min", "month_year_week": "first"})
 
132
  on="trader_address",
133
  )
134
 
135
+ # Get all unique weeks and cohorts
136
+ all_cohorts = cohort_data["cohort_week"].unique()
137
+ # print(f"all cohorts = {all_cohorts}")
138
+ retention_data = []
139
+ max_weeks = 8
140
+ for cohort in all_cohorts:
141
+ # print(f"analyzing cohort {cohort}")
142
+ # Get all traders in this cohort
143
+ cohort_traders = set(
144
+ cohort_data[cohort_data["cohort_week"] == cohort]["trader_address"]
145
+ )
146
+ cohort_size = len(cohort_traders)
147
+ # print(f"cohort size = {cohort_size}")
148
 
149
+ if cohort_size == 0:
150
+ print(f"NO new traders for cohort week={cohort}")
151
+ continue
152
 
153
+ # Calculate retention for each week after the cohort week
154
+ for week_idx, week in enumerate(all_weeks):
155
+ # print(f"Analyzing week = {week}")
156
+ weeks_since_cohort = week_idx - week_to_number[cohort]
157
+ if weeks_since_cohort < 0 or weeks_since_cohort > 8:
158
+ continue
159
+ # Get active traders from the cohort in current week
160
+ current_traders = set(
161
+ cohort_data[
162
+ (cohort_data["cohort_week"] == cohort)
163
+ & (cohort_data["month_year_week"] == week)
164
+ ]["trader_address"]
165
+ )
166
 
167
+ # Get active traders from the cohort in previous week
168
+ if week == cohort:
169
+ # For the first week, retention is 100% by definition
170
+ retained = len(current_traders)
171
+ retention_rate = 100 if len(current_traders) > 0 else 0
172
 
173
+ elif week_idx > 0:
174
+ previous_week = all_weeks[week_idx - 1]
175
+ previous_traders = set(
176
+ cohort_data[
177
+ (cohort_data["cohort_week"] == cohort)
178
+ & (cohort_data["month_year_week"] == previous_week)
179
+ ]["trader_address"]
180
+ )
181
+ retained = len(current_traders.intersection(previous_traders))
182
+ retention_rate = (
183
+ (retained / len(previous_traders)) * 100
184
+ if len(previous_traders) > 0
185
+ else 0
186
+ )
187
 
188
+ retention_data.append(
189
+ {
190
+ "cohort_week": cohort,
191
+ "week": week,
192
+ "weeks_since_cohort": weeks_since_cohort,
193
+ "cohort_size": cohort_size,
194
+ "active_traders": len(current_traders),
195
+ "retained_traders": retained,
196
+ "previous_traders": (
197
+ len(previous_traders) if week_idx > 0 else cohort_size
198
+ ),
199
+ "retention_rate": round(retention_rate, 2),
200
+ }
201
+ )
202
 
203
+ retention_matrix = create_retention_matrix(pd.DataFrame(retention_data))
204
+ return retention_matrix
 
 
 
 
 
 
205
 
206
 
207
  def prepare_retention_dataset(
208
  retention_df: pd.DataFrame, unknown_df: pd.DataFrame
209
  ) -> pd.DataFrame:
210
+ print("Preparing retention dataset")
211
  retention_df["trader_type"] = retention_df["staking"].apply(
212
  lambda x: "non_Olas" if x == "non_Olas" else "Olas"
213
  )
 
224
  all_traders["creation_timestamp"] = pd.to_datetime(
225
  all_traders["creation_timestamp"]
226
  )
227
+
228
  all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
229
+
230
+ # Remove data from current week and onwards
231
+ now = datetime.now()
232
+
233
+ # Get start of the current week (Monday)
234
+ start_of_week = now - timedelta(days=(now.weekday()))
235
+ start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
236
+
237
+ all_traders["creation_date"] = all_traders["creation_timestamp"].dt.date
238
+ all_traders["creation_date"] = pd.to_datetime(all_traders["creation_date"])
239
+ # Filter the dataframe
240
+ filtered_traders = all_traders[all_traders["creation_date"] < start_of_week]
241
+ filtered_traders["month_year_week"] = (
242
+ filtered_traders["creation_timestamp"]
243
+ .dt.to_period("W")
244
+ .dt.start_time.dt.strftime("%b-%d-%Y")
245
  )
246
+
247
+ print(filtered_traders.month_year_week.unique())
248
+ return filtered_traders
249
 
250
 
251
  if __name__ == "__main__":
tabs/daily_graphs.py CHANGED
@@ -19,15 +19,6 @@ color_mapping = [
19
  ]
20
 
21
 
22
- def plot_daily_trades(trades_df: pd.DataFrame) -> gr.Plot:
23
- # get daily trades
24
- daily_trades_count = (
25
- trades_df.groupby("month_year_week").size().reset_index(name="trades")
26
- )
27
- daily_trades_count.columns = daily_trades_count.columns.astype(str)
28
- print("WIP")
29
-
30
-
31
  def get_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
32
  # Get current date
33
  now = datetime.now()
@@ -35,12 +26,12 @@ def get_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
35
  # Get start of the current week (Monday)
36
  start_of_week = now - timedelta(days=now.weekday())
37
  start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
38
- print(f"start of the week = {start_of_week}")
39
 
40
  # Get end of the current week (Sunday)
41
  end_of_week = start_of_week + timedelta(days=6)
42
  end_of_week = end_of_week.replace(hour=23, minute=59, second=59, microsecond=999999)
43
- print(f"end of the week = {end_of_week}")
44
  trades_df["creation_date"] = pd.to_datetime(trades_df["creation_date"])
45
  # Filter the dataframe
46
  return trades_df[
 
19
  ]
20
 
21
 
 
 
 
 
 
 
 
 
 
22
  def get_current_week_data(trades_df: pd.DataFrame) -> pd.DataFrame:
23
  # Get current date
24
  now = datetime.now()
 
26
  # Get start of the current week (Monday)
27
  start_of_week = now - timedelta(days=now.weekday())
28
  start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
29
+ # print(f"start of the week = {start_of_week}")
30
 
31
  # Get end of the current week (Sunday)
32
  end_of_week = start_of_week + timedelta(days=6)
33
  end_of_week = end_of_week.replace(hour=23, minute=59, second=59, microsecond=999999)
34
+ # print(f"end of the week = {end_of_week}")
35
  trades_df["creation_date"] = pd.to_datetime(trades_df["creation_date"])
36
  # Filter the dataframe
37
  return trades_df[