cyberosa commited on
Commit
f26bf5c
·
1 Parent(s): 8a73d91

daily data of current week

Browse files
data/closed_markets_div.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6b6ff11a79e22ddcfcc3e24305ed6a0892e06888254b6360e89e2ad956529a1
3
- size 52009
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99b7ff8fafd742e4c7b4601adb95bd42cf560d6a81ac97819ea5748a6ba4b900
3
+ size 50378
data/unknown_daily_traders.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1480859c96b05ac94e54f2a69c0ad46d3b840c37111051bb2a0174aa60d90a73
3
- size 25003
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c74aad2a1150163d78233e9a54ffa262d195e43b68b9aba1e33536671075ef57
3
+ size 163247
data/unknown_traders.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3620eaba76778060f41059fb2b6ff6e92a6000eedfd9a9119b703f84cdda11ff
3
- size 194084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9be58c1de361e7c9df25ae05c54b77f6a6417e58e19d5d6ef8bd37516da1f70e
3
+ size 198407
data/weekly_mech_calls.parquet CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04536d62dc403476e4b21032b86e781f38aab5c08d69ebc9ccc917e005b9af95
3
- size 50976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a91eccf4392fc1cbdbb911fa2163a5aff3f4483692ab059083492047c3a4f55a
3
+ size 50574
scripts/wow_retentions.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from datetime import datetime, timedelta
3
+ from utils import DATA_DIR
4
+
5
+
6
+ # Basic Week over Week Retention
7
+ def calculate_wow_retention(
8
+ traders_df: pd.DataFrame, trader_filter: str
9
+ ) -> pd.DataFrame:
10
+ """Function to compute the wow retention at the week level"""
11
+ if trader_filter == "Olas":
12
+ df = traders_df.loc[traders_df["staking"] != "non_Olas"]
13
+ elif trader_filter == "non_Olas":
14
+ df = traders_df.loc[traders_df["staking"] == "non_Olas"]
15
+ else:
16
+ # unknown traders
17
+ print("Not implemented yet")
18
+ # Get unique traders per week
19
+ weekly_traders = (
20
+ df.groupby("month_year_week")["trader_address"].nunique().reset_index()
21
+ )
22
+
23
+ # Calculate retention
24
+ retention = []
25
+ for i in range(1, len(weekly_traders)):
26
+ current_week = weekly_traders.iloc[i]["month_year_week"]
27
+ previous_week = weekly_traders.iloc[i - 1]["month_year_week"]
28
+
29
+ # Get traders in both weeks
30
+ current_traders = set(
31
+ df[df["month_year_week"] == current_week]["trader_address"]
32
+ )
33
+ previous_traders = set(
34
+ df[df["month_year_week"] == previous_week]["trader_address"]
35
+ )
36
+
37
+ retained = len(current_traders.intersection(previous_traders))
38
+ retention_rate = (retained / len(previous_traders)) * 100
39
+
40
+ retention.append(
41
+ {
42
+ "month_year_week": current_week,
43
+ "retained_traders": retained,
44
+ "previous_traders": len(previous_traders),
45
+ "retention_rate": retention_rate,
46
+ }
47
+ )
48
+
49
+ return pd.DataFrame(retention)
50
+
51
+
52
+ # N-Week Rolling Retention
53
+ def calculate_nweek_retention(df: pd.DataFrame, n_weeks=4):
54
+ # Get first and last trade for each trader
55
+ trader_activity = (
56
+ df.groupby("trader_address")
57
+ .agg({"creation_timestamp": ["min", "max"]})
58
+ .reset_index()
59
+ )
60
+
61
+ trader_activity.columns = ["trader_address", "first_trade", "last_trade"]
62
+ trader_activity["weeks_active"] = (
63
+ pd.to_datetime(trader_activity["last_trade"])
64
+ - pd.to_datetime(trader_activity["first_trade"])
65
+ ).dt.days / 7
66
+
67
+ return {
68
+ "total_traders": len(trader_activity),
69
+ f"{n_weeks}_week_retained": len(
70
+ trader_activity[trader_activity["weeks_active"] >= n_weeks]
71
+ ),
72
+ "retention_rate": (
73
+ len(trader_activity[trader_activity["weeks_active"] >= n_weeks])
74
+ / len(trader_activity)
75
+ )
76
+ * 100,
77
+ }
78
+
79
+
80
+ # Cohort Retention
81
+ def calculate_cohort_retention(df, max_weeks=12):
82
+ # Get first week for each trader
83
+ # TODO check if first will retrieve the first week of the data or not
84
+ first_trades = (
85
+ df.groupby("trader_address")
86
+ .agg({"creation_timestamp": "min", "month_year_week": "first"})
87
+ .reset_index()
88
+ )
89
+ first_trades.columns = ["trader_address", "first_trade", "cohort_week"]
90
+
91
+ # Get ordered list of unique weeks - converting to datetime for proper sorting
92
+ all_weeks = df["month_year_week"].unique()
93
+ weeks_datetime = pd.to_datetime(all_weeks)
94
+ sorted_weeks_idx = weeks_datetime.argsort()
95
+ all_weeks = all_weeks[sorted_weeks_idx]
96
+
97
+ # Create mapping from week string to numeric index
98
+ week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
99
+
100
+ # Merge back to get all activities
101
+ cohort_data = pd.merge(
102
+ df, first_trades[["trader_address", "cohort_week"]], on="trader_address"
103
+ )
104
+
105
+ # Calculate week number since first activity
106
+ cohort_data["cohort_number"] = cohort_data["cohort_week"].map(week_to_number)
107
+ cohort_data["activity_number"] = cohort_data["month_year_week"].map(week_to_number)
108
+ cohort_data["week_number"] = (
109
+ cohort_data["activity_number"] - cohort_data["cohort_number"]
110
+ )
111
+
112
+ # Calculate retention by cohort
113
+ cohort_sizes = cohort_data.groupby("cohort_week")["trader_address"].nunique()
114
+ retention_matrix = cohort_data.groupby(["cohort_week", "week_number"])[
115
+ "trader_address"
116
+ ].nunique()
117
+ retention_matrix = retention_matrix.unstack(fill_value=0)
118
+
119
+ # Convert to percentages
120
+ retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100
121
+
122
+ # Limit to max_weeks if specified
123
+ if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
124
+ retention_matrix = retention_matrix.iloc[:, :max_weeks]
125
+
126
+ return retention_matrix.round(2)
127
+
128
+
129
+ if __name__ == "__main__":
130
+ # read trades dataset
131
+ trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
132
+ # First, create week numbers from timestamps
133
+ trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
134
+ trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
135
+ trades_df["month_year_week"] = (
136
+ trades_df["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
137
+ )
138
+ # Usage example:
139
+ wow_retention = calculate_wow_retention(trades_df, trader_filter="Olas")
140
+ rolling_retention = calculate_nweek_retention(trades_df, n_weeks=4)
141
+ cohort_retention = calculate_cohort_retention(trades_df)