File size: 10,185 Bytes
f26bf5c dff5e35 8834fdb f26bf5c 1c9dfec 8834fdb f26bf5c 1c9dfec f26bf5c 1c9dfec f26bf5c 8834fdb 1c9dfec 8834fdb dff5e35 8834fdb dff5e35 8834fdb f26bf5c 1c9dfec dff5e35 3058723 dff5e35 63c3662 211cb3f 3058723 1c9dfec 3058723 dff5e35 f26bf5c 3058723 f26bf5c 63c3662 f26bf5c 63c3662 3058723 63c3662 f26bf5c 1c9dfec 3d497f3 1c9dfec 3d497f3 1c9dfec f26bf5c 1c9dfec 3d497f3 1c9dfec f26bf5c 1c9dfec f26bf5c 1c9dfec 355fb10 1c9dfec 8834fdb 1c9dfec 8834fdb dff5e35 1c9dfec dff5e35 1c9dfec dff5e35 1c9dfec dff5e35 1c9dfec dff5e35 8834fdb 2628969 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
import pandas as pd
from datetime import datetime, timedelta
# Basic Week over Week Retention
def calculate_wow_retention_by_type(
df: pd.DataFrame, market_creator: str
) -> pd.DataFrame:
filtered_df = df.loc[df["market_creator"] == market_creator]
# Get unique traders per week and type
weekly_traders = (
filtered_df.groupby(["month_year_week", "trader_type"], sort=False)[
"trader_address"
]
.nunique()
.reset_index()
)
# weekly_traders = weekly_traders.sort_values(['trader_type', 'month_year_week'])
# Get ordered list of unique weeks - converting to datetime for proper sorting
all_weeks = filtered_df["month_year_week"].unique()
weeks_datetime = pd.to_datetime(all_weeks)
sorted_weeks_idx = weeks_datetime.argsort()
all_weeks = all_weeks[sorted_weeks_idx]
# Create mapping from week string to numeric index
week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
# Calculate retention
retention = []
# Iterate through each trader type
for trader_type in weekly_traders["trader_type"].unique():
type_data = weekly_traders[weekly_traders["trader_type"] == trader_type]
# Calculate retention for each week within this trader type
for i in range(1, len(type_data)):
current_week = type_data.iloc[i]["month_year_week"]
# print(f"current_week={current_week}")
week_number = week_to_number[current_week]
if week_to_number == 0:
# no previous week info
continue
previous_week_number = week_number - 1
# this should give only one value
previous_week = [
key
for key in week_to_number.keys()
if week_to_number[key] == previous_week_number
][0]
# print(f"previous week = {previous_week}")
# Get traders in both weeks for this type
current_traders = set(
filtered_df[
(filtered_df["month_year_week"] == current_week)
& (filtered_df["trader_type"] == trader_type)
]["trader_address"]
)
previous_traders = set(
filtered_df[
(filtered_df["month_year_week"] == previous_week)
& (filtered_df["trader_type"] == trader_type)
]["trader_address"]
)
retained = len(current_traders.intersection(previous_traders))
retention_rate = (
(retained / len(previous_traders)) * 100
if len(previous_traders) > 0
else 0
)
retention.append(
{
"trader_type": trader_type,
"week": current_week,
"retained_traders": retained,
"previous_traders": len(previous_traders),
"retention_rate": round(retention_rate, 2),
}
)
return pd.DataFrame(retention)
def create_retention_matrix(cohort_retention_df: pd.DataFrame) -> pd.DataFrame:
# Pivot the data to create the retention matrix
retention_matrix = cohort_retention_df.pivot(
index="cohort_week", columns="weeks_since_cohort", values="retention_rate"
)
# Sort index chronologically
retention_matrix.index = pd.to_datetime(retention_matrix.index)
retention_matrix = retention_matrix.sort_index()
# Rename columns to show week numbers
# retention_matrix.columns = [f"Week {i}" for i in retention_matrix.columns]
return retention_matrix
# Wow Retention at the cohort level
def calculate_cohort_retention(
df: pd.DataFrame, market_creator: str, trader_type: str
) -> pd.DataFrame:
df_filtered = df.loc[
(df["market_creator"] == market_creator) & (df["trader_type"] == trader_type)
]
if len(df_filtered) == 0:
return pd.DataFrame()
df_filtered = df_filtered.sort_values(by="creation_timestamp", ascending=True)
# Get first week of activity for each trader
first_activity = (
df_filtered.groupby("trader_address")
.agg({"creation_timestamp": "min", "month_year_week": "first"})
.reset_index()
)
first_activity.columns = ["trader_address", "first_activity", "cohort_week"]
# Get ordered list of unique weeks - converting to datetime for proper sorting
all_weeks = df_filtered["month_year_week"].unique()
weeks_datetime = pd.to_datetime(all_weeks)
sorted_weeks_idx = weeks_datetime.argsort()
all_weeks = all_weeks[sorted_weeks_idx]
# Create mapping from week string to numeric index
week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
# Merge back to get all activities
cohort_data = pd.merge(
df_filtered,
first_activity[["trader_address", "cohort_week"]],
on="trader_address",
)
# Get all unique weeks and cohorts
all_cohorts = cohort_data["cohort_week"].unique()
# extend the cohort
# print(f"all cohorts = {all_cohorts}")
retention_data = []
max_weeks = 8
# for cohort in all_cohorts:
for cohort_week_idx, cohort in enumerate(all_weeks):
# print(f"analyzing cohort {cohort}")
# Get all traders in this cohort
cohort_traders = set(
cohort_data[cohort_data["cohort_week"] == cohort]["trader_address"]
)
cohort_size = len(cohort_traders)
# print(f"cohort size = {cohort_size}")
# Calculate retention for each week after the cohort week
for week_idx, week in enumerate(all_weeks):
# print(f"Analyzing week = {week}")
weeks_since_cohort = week_idx - cohort_week_idx
if weeks_since_cohort < 0 or weeks_since_cohort > max_weeks:
continue
if cohort_size == 0:
print(f"NO new traders for cohort week={cohort}")
retention_data.append(
{
"cohort_week": cohort,
"week": week,
"weeks_since_cohort": weeks_since_cohort,
"cohort_size": cohort_size,
"active_traders": 0,
"retained_traders": 0,
"previous_traders": 0,
"retention_rate": round(0, 2),
}
)
continue
# Get active traders from the cohort in current week
current_traders = set(
cohort_data[
(cohort_data["cohort_week"] == cohort)
& (cohort_data["month_year_week"] == week)
]["trader_address"]
)
# Get active traders from the cohort in previous week
if week == cohort:
# For the first week, retention is 100% by definition
retained = len(current_traders)
retention_rate = 100 if len(current_traders) > 0 else 0
elif week_idx > 0:
previous_week = all_weeks[week_idx - 1]
previous_traders = set(
cohort_data[
(cohort_data["cohort_week"] == cohort)
& (cohort_data["month_year_week"] == previous_week)
]["trader_address"]
)
retained = len(current_traders.intersection(previous_traders))
retention_rate = (
(retained / len(previous_traders)) * 100
if len(previous_traders) > 0
else 0
)
retention_data.append(
{
"cohort_week": cohort,
"week": week,
"weeks_since_cohort": weeks_since_cohort,
"cohort_size": cohort_size,
"active_traders": len(current_traders),
"retained_traders": retained,
"previous_traders": (
len(previous_traders) if week_idx > 0 else cohort_size
),
"retention_rate": round(retention_rate, 2),
}
)
retention_matrix = create_retention_matrix(pd.DataFrame(retention_data))
return retention_matrix
def prepare_retention_dataset(
retention_df: pd.DataFrame, unknown_df: pd.DataFrame
) -> pd.DataFrame:
print("Preparing retention dataset")
retention_df["trader_type"] = retention_df["staking"].apply(
lambda x: "non_Olas" if x == "non_Olas" else "Olas"
)
retention_df.rename(columns={"request_time": "creation_timestamp"}, inplace=True)
retention_df = retention_df[
["trader_type", "market_creator", "trader_address", "creation_timestamp"]
]
unknown_df["trader_type"] = "unclassified"
unknown_df = unknown_df[
["trader_type", "market_creator", "trader_address", "creation_timestamp"]
]
all_traders = pd.concat([retention_df, unknown_df], ignore_index=True)
all_traders["creation_timestamp"] = pd.to_datetime(
all_traders["creation_timestamp"]
)
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
# Remove data from current week and onwards
now = datetime.now()
# Get start of the current week (Monday)
start_of_week = now - timedelta(days=(now.weekday()))
start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
all_traders["creation_date"] = all_traders["creation_timestamp"].dt.date
all_traders["creation_date"] = pd.to_datetime(all_traders["creation_date"])
# Filter the dataframe
filtered_traders = all_traders[all_traders["creation_date"] < start_of_week]
filtered_traders["month_year_week"] = (
filtered_traders["creation_timestamp"]
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
print(filtered_traders.month_year_week.unique())
return filtered_traders
if __name__ == "__main__":
print("WIP")
|