cyberosa
commited on
Commit
·
f26bf5c
1
Parent(s):
8a73d91
daily data of current week
Browse files
data/closed_markets_div.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99b7ff8fafd742e4c7b4601adb95bd42cf560d6a81ac97819ea5748a6ba4b900
|
3 |
+
size 50378
|
data/unknown_daily_traders.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c74aad2a1150163d78233e9a54ffa262d195e43b68b9aba1e33536671075ef57
|
3 |
+
size 163247
|
data/unknown_traders.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9be58c1de361e7c9df25ae05c54b77f6a6417e58e19d5d6ef8bd37516da1f70e
|
3 |
+
size 198407
|
data/weekly_mech_calls.parquet
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a91eccf4392fc1cbdbb911fa2163a5aff3f4483692ab059083492047c3a4f55a
|
3 |
+
size 50574
|
scripts/wow_retentions.py
ADDED
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from datetime import datetime, timedelta
|
3 |
+
from utils import DATA_DIR
|
4 |
+
|
5 |
+
|
6 |
+
# Basic Week over Week Retention
|
7 |
+
def calculate_wow_retention(
|
8 |
+
traders_df: pd.DataFrame, trader_filter: str
|
9 |
+
) -> pd.DataFrame:
|
10 |
+
"""Function to compute the wow retention at the week level"""
|
11 |
+
if trader_filter == "Olas":
|
12 |
+
df = traders_df.loc[traders_df["staking"] != "non_Olas"]
|
13 |
+
elif trader_filter == "non_Olas":
|
14 |
+
df = traders_df.loc[traders_df["staking"] == "non_Olas"]
|
15 |
+
else:
|
16 |
+
# unknown traders
|
17 |
+
print("Not implemented yet")
|
18 |
+
# Get unique traders per week
|
19 |
+
weekly_traders = (
|
20 |
+
df.groupby("month_year_week")["trader_address"].nunique().reset_index()
|
21 |
+
)
|
22 |
+
|
23 |
+
# Calculate retention
|
24 |
+
retention = []
|
25 |
+
for i in range(1, len(weekly_traders)):
|
26 |
+
current_week = weekly_traders.iloc[i]["month_year_week"]
|
27 |
+
previous_week = weekly_traders.iloc[i - 1]["month_year_week"]
|
28 |
+
|
29 |
+
# Get traders in both weeks
|
30 |
+
current_traders = set(
|
31 |
+
df[df["month_year_week"] == current_week]["trader_address"]
|
32 |
+
)
|
33 |
+
previous_traders = set(
|
34 |
+
df[df["month_year_week"] == previous_week]["trader_address"]
|
35 |
+
)
|
36 |
+
|
37 |
+
retained = len(current_traders.intersection(previous_traders))
|
38 |
+
retention_rate = (retained / len(previous_traders)) * 100
|
39 |
+
|
40 |
+
retention.append(
|
41 |
+
{
|
42 |
+
"month_year_week": current_week,
|
43 |
+
"retained_traders": retained,
|
44 |
+
"previous_traders": len(previous_traders),
|
45 |
+
"retention_rate": retention_rate,
|
46 |
+
}
|
47 |
+
)
|
48 |
+
|
49 |
+
return pd.DataFrame(retention)
|
50 |
+
|
51 |
+
|
52 |
+
# N-Week Rolling Retention
|
53 |
+
def calculate_nweek_retention(df: pd.DataFrame, n_weeks=4):
|
54 |
+
# Get first and last trade for each trader
|
55 |
+
trader_activity = (
|
56 |
+
df.groupby("trader_address")
|
57 |
+
.agg({"creation_timestamp": ["min", "max"]})
|
58 |
+
.reset_index()
|
59 |
+
)
|
60 |
+
|
61 |
+
trader_activity.columns = ["trader_address", "first_trade", "last_trade"]
|
62 |
+
trader_activity["weeks_active"] = (
|
63 |
+
pd.to_datetime(trader_activity["last_trade"])
|
64 |
+
- pd.to_datetime(trader_activity["first_trade"])
|
65 |
+
).dt.days / 7
|
66 |
+
|
67 |
+
return {
|
68 |
+
"total_traders": len(trader_activity),
|
69 |
+
f"{n_weeks}_week_retained": len(
|
70 |
+
trader_activity[trader_activity["weeks_active"] >= n_weeks]
|
71 |
+
),
|
72 |
+
"retention_rate": (
|
73 |
+
len(trader_activity[trader_activity["weeks_active"] >= n_weeks])
|
74 |
+
/ len(trader_activity)
|
75 |
+
)
|
76 |
+
* 100,
|
77 |
+
}
|
78 |
+
|
79 |
+
|
80 |
+
# Cohort Retention
|
81 |
+
def calculate_cohort_retention(df, max_weeks=12):
|
82 |
+
# Get first week for each trader
|
83 |
+
# TODO check if first will retrieve the first week of the data or not
|
84 |
+
first_trades = (
|
85 |
+
df.groupby("trader_address")
|
86 |
+
.agg({"creation_timestamp": "min", "month_year_week": "first"})
|
87 |
+
.reset_index()
|
88 |
+
)
|
89 |
+
first_trades.columns = ["trader_address", "first_trade", "cohort_week"]
|
90 |
+
|
91 |
+
# Get ordered list of unique weeks - converting to datetime for proper sorting
|
92 |
+
all_weeks = df["month_year_week"].unique()
|
93 |
+
weeks_datetime = pd.to_datetime(all_weeks)
|
94 |
+
sorted_weeks_idx = weeks_datetime.argsort()
|
95 |
+
all_weeks = all_weeks[sorted_weeks_idx]
|
96 |
+
|
97 |
+
# Create mapping from week string to numeric index
|
98 |
+
week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
|
99 |
+
|
100 |
+
# Merge back to get all activities
|
101 |
+
cohort_data = pd.merge(
|
102 |
+
df, first_trades[["trader_address", "cohort_week"]], on="trader_address"
|
103 |
+
)
|
104 |
+
|
105 |
+
# Calculate week number since first activity
|
106 |
+
cohort_data["cohort_number"] = cohort_data["cohort_week"].map(week_to_number)
|
107 |
+
cohort_data["activity_number"] = cohort_data["month_year_week"].map(week_to_number)
|
108 |
+
cohort_data["week_number"] = (
|
109 |
+
cohort_data["activity_number"] - cohort_data["cohort_number"]
|
110 |
+
)
|
111 |
+
|
112 |
+
# Calculate retention by cohort
|
113 |
+
cohort_sizes = cohort_data.groupby("cohort_week")["trader_address"].nunique()
|
114 |
+
retention_matrix = cohort_data.groupby(["cohort_week", "week_number"])[
|
115 |
+
"trader_address"
|
116 |
+
].nunique()
|
117 |
+
retention_matrix = retention_matrix.unstack(fill_value=0)
|
118 |
+
|
119 |
+
# Convert to percentages
|
120 |
+
retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100
|
121 |
+
|
122 |
+
# Limit to max_weeks if specified
|
123 |
+
if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
|
124 |
+
retention_matrix = retention_matrix.iloc[:, :max_weeks]
|
125 |
+
|
126 |
+
return retention_matrix.round(2)
|
127 |
+
|
128 |
+
|
129 |
+
if __name__ == "__main__":
|
130 |
+
# read trades dataset
|
131 |
+
trades_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
132 |
+
# First, create week numbers from timestamps
|
133 |
+
trades_df["creation_timestamp"] = pd.to_datetime(trades_df["creation_timestamp"])
|
134 |
+
trades_df = trades_df.sort_values(by="creation_timestamp", ascending=True)
|
135 |
+
trades_df["month_year_week"] = (
|
136 |
+
trades_df["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
|
137 |
+
)
|
138 |
+
# Usage example:
|
139 |
+
wow_retention = calculate_wow_retention(trades_df, trader_filter="Olas")
|
140 |
+
rolling_retention = calculate_nweek_retention(trades_df, n_weeks=4)
|
141 |
+
cohort_retention = calculate_cohort_retention(trades_df)
|