|
import pandas as pd |
|
from datetime import datetime, timedelta |
|
|
|
|
|
|
|
def calculate_wow_retention_by_type( |
|
df: pd.DataFrame, market_creator: str |
|
) -> pd.DataFrame: |
|
filtered_df = df.loc[df["market_creator"] == market_creator] |
|
|
|
weekly_traders = ( |
|
filtered_df.groupby(["month_year_week", "trader_type"], sort=False)[ |
|
"trader_address" |
|
] |
|
.nunique() |
|
.reset_index() |
|
) |
|
|
|
|
|
all_weeks = filtered_df["month_year_week"].unique() |
|
weeks_datetime = pd.to_datetime(all_weeks) |
|
sorted_weeks_idx = weeks_datetime.argsort() |
|
all_weeks = all_weeks[sorted_weeks_idx] |
|
|
|
|
|
week_to_number = {week: idx for idx, week in enumerate(all_weeks)} |
|
|
|
retention = [] |
|
|
|
for trader_type in weekly_traders["trader_type"].unique(): |
|
type_data = weekly_traders[weekly_traders["trader_type"] == trader_type] |
|
|
|
|
|
for i in range(1, len(type_data)): |
|
current_week = type_data.iloc[i]["month_year_week"] |
|
|
|
week_number = week_to_number[current_week] |
|
if week_to_number == 0: |
|
|
|
continue |
|
previous_week_number = week_number - 1 |
|
|
|
previous_week = [ |
|
key |
|
for key in week_to_number.keys() |
|
if week_to_number[key] == previous_week_number |
|
][0] |
|
|
|
|
|
|
|
current_traders = set( |
|
filtered_df[ |
|
(filtered_df["month_year_week"] == current_week) |
|
& (filtered_df["trader_type"] == trader_type) |
|
]["trader_address"] |
|
) |
|
|
|
previous_traders = set( |
|
filtered_df[ |
|
(filtered_df["month_year_week"] == previous_week) |
|
& (filtered_df["trader_type"] == trader_type) |
|
]["trader_address"] |
|
) |
|
|
|
retained = len(current_traders.intersection(previous_traders)) |
|
retention_rate = ( |
|
(retained / len(previous_traders)) * 100 |
|
if len(previous_traders) > 0 |
|
else 0 |
|
) |
|
|
|
retention.append( |
|
{ |
|
"trader_type": trader_type, |
|
"week": current_week, |
|
"retained_traders": retained, |
|
"previous_traders": len(previous_traders), |
|
"retention_rate": round(retention_rate, 2), |
|
} |
|
) |
|
|
|
return pd.DataFrame(retention) |
|
|
|
|
|
def create_retention_matrix(cohort_retention_df: pd.DataFrame) -> pd.DataFrame: |
|
|
|
retention_matrix = cohort_retention_df.pivot( |
|
index="cohort_week", columns="weeks_since_cohort", values="retention_rate" |
|
) |
|
|
|
|
|
retention_matrix.index = pd.to_datetime(retention_matrix.index) |
|
retention_matrix = retention_matrix.sort_index() |
|
|
|
|
|
|
|
|
|
return retention_matrix |
|
|
|
|
|
|
|
def calculate_cohort_retention( |
|
df: pd.DataFrame, market_creator: str, trader_type: str |
|
) -> pd.DataFrame: |
|
df_filtered = df.loc[ |
|
(df["market_creator"] == market_creator) & (df["trader_type"] == trader_type) |
|
] |
|
if len(df_filtered) == 0: |
|
return pd.DataFrame() |
|
df_filtered = df_filtered.sort_values(by="creation_timestamp", ascending=True) |
|
|
|
first_activity = ( |
|
df_filtered.groupby("trader_address") |
|
.agg({"creation_timestamp": "min", "month_year_week": "first"}) |
|
.reset_index() |
|
) |
|
first_activity.columns = ["trader_address", "first_activity", "cohort_week"] |
|
|
|
|
|
all_weeks = df_filtered["month_year_week"].unique() |
|
weeks_datetime = pd.to_datetime(all_weeks) |
|
sorted_weeks_idx = weeks_datetime.argsort() |
|
all_weeks = all_weeks[sorted_weeks_idx] |
|
|
|
|
|
week_to_number = {week: idx for idx, week in enumerate(all_weeks)} |
|
|
|
|
|
cohort_data = pd.merge( |
|
df_filtered, |
|
first_activity[["trader_address", "cohort_week"]], |
|
on="trader_address", |
|
) |
|
|
|
|
|
all_cohorts = cohort_data["cohort_week"].unique() |
|
|
|
|
|
retention_data = [] |
|
max_weeks = 8 |
|
|
|
for cohort_week_idx, cohort in enumerate(all_weeks): |
|
|
|
|
|
cohort_traders = set( |
|
cohort_data[cohort_data["cohort_week"] == cohort]["trader_address"] |
|
) |
|
cohort_size = len(cohort_traders) |
|
|
|
|
|
|
|
for week_idx, week in enumerate(all_weeks): |
|
|
|
weeks_since_cohort = week_idx - cohort_week_idx |
|
if weeks_since_cohort < 0 or weeks_since_cohort > max_weeks: |
|
continue |
|
if cohort_size == 0: |
|
print(f"NO new traders for cohort week={cohort}") |
|
retention_data.append( |
|
{ |
|
"cohort_week": cohort, |
|
"week": week, |
|
"weeks_since_cohort": weeks_since_cohort, |
|
"cohort_size": cohort_size, |
|
"active_traders": 0, |
|
"retained_traders": 0, |
|
"previous_traders": 0, |
|
"retention_rate": round(0, 2), |
|
} |
|
) |
|
continue |
|
|
|
current_traders = set( |
|
cohort_data[ |
|
(cohort_data["cohort_week"] == cohort) |
|
& (cohort_data["month_year_week"] == week) |
|
]["trader_address"] |
|
) |
|
|
|
|
|
if week == cohort: |
|
|
|
retained = len(current_traders) |
|
retention_rate = 100 if len(current_traders) > 0 else 0 |
|
|
|
elif week_idx > 0: |
|
previous_week = all_weeks[week_idx - 1] |
|
previous_traders = set( |
|
cohort_data[ |
|
(cohort_data["cohort_week"] == cohort) |
|
& (cohort_data["month_year_week"] == previous_week) |
|
]["trader_address"] |
|
) |
|
retained = len(current_traders.intersection(previous_traders)) |
|
retention_rate = ( |
|
(retained / len(previous_traders)) * 100 |
|
if len(previous_traders) > 0 |
|
else 0 |
|
) |
|
|
|
retention_data.append( |
|
{ |
|
"cohort_week": cohort, |
|
"week": week, |
|
"weeks_since_cohort": weeks_since_cohort, |
|
"cohort_size": cohort_size, |
|
"active_traders": len(current_traders), |
|
"retained_traders": retained, |
|
"previous_traders": ( |
|
len(previous_traders) if week_idx > 0 else cohort_size |
|
), |
|
"retention_rate": round(retention_rate, 2), |
|
} |
|
) |
|
|
|
retention_matrix = create_retention_matrix(pd.DataFrame(retention_data)) |
|
return retention_matrix |
|
|
|
|
|
def prepare_retention_dataset( |
|
retention_df: pd.DataFrame, unknown_df: pd.DataFrame |
|
) -> pd.DataFrame: |
|
print("Preparing retention dataset") |
|
retention_df["trader_type"] = retention_df["staking"].apply( |
|
lambda x: "non_Olas" if x == "non_Olas" else "Olas" |
|
) |
|
retention_df.rename(columns={"request_time": "creation_timestamp"}, inplace=True) |
|
retention_df = retention_df[ |
|
["trader_type", "market_creator", "trader_address", "creation_timestamp"] |
|
] |
|
unknown_df["trader_type"] = "unclassified" |
|
unknown_df = unknown_df[ |
|
["trader_type", "market_creator", "trader_address", "creation_timestamp"] |
|
] |
|
all_traders = pd.concat([retention_df, unknown_df], ignore_index=True) |
|
|
|
all_traders["creation_timestamp"] = pd.to_datetime( |
|
all_traders["creation_timestamp"] |
|
) |
|
|
|
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True) |
|
|
|
|
|
now = datetime.now() |
|
|
|
|
|
start_of_week = now - timedelta(days=(now.weekday())) |
|
start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0) |
|
|
|
all_traders["creation_date"] = all_traders["creation_timestamp"].dt.date |
|
all_traders["creation_date"] = pd.to_datetime(all_traders["creation_date"]) |
|
|
|
filtered_traders = all_traders[all_traders["creation_date"] < start_of_week] |
|
filtered_traders["month_year_week"] = ( |
|
filtered_traders["creation_timestamp"] |
|
.dt.to_period("W") |
|
.dt.start_time.dt.strftime("%b-%d-%Y") |
|
) |
|
|
|
print(filtered_traders.month_year_week.unique()) |
|
return filtered_traders |
|
|
|
|
|
if __name__ == "__main__": |
|
print("WIP") |
|
|