trader_agents_performance / scripts /retention_metrics.py
cyberosa
trying to fix error with loading files
2628969
import pandas as pd
from datetime import datetime, timedelta
# Basic Week over Week Retention
def calculate_wow_retention_by_type(
df: pd.DataFrame, market_creator: str
) -> pd.DataFrame:
filtered_df = df.loc[df["market_creator"] == market_creator]
# Get unique traders per week and type
weekly_traders = (
filtered_df.groupby(["month_year_week", "trader_type"], sort=False)[
"trader_address"
]
.nunique()
.reset_index()
)
# weekly_traders = weekly_traders.sort_values(['trader_type', 'month_year_week'])
# Get ordered list of unique weeks - converting to datetime for proper sorting
all_weeks = filtered_df["month_year_week"].unique()
weeks_datetime = pd.to_datetime(all_weeks)
sorted_weeks_idx = weeks_datetime.argsort()
all_weeks = all_weeks[sorted_weeks_idx]
# Create mapping from week string to numeric index
week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
# Calculate retention
retention = []
# Iterate through each trader type
for trader_type in weekly_traders["trader_type"].unique():
type_data = weekly_traders[weekly_traders["trader_type"] == trader_type]
# Calculate retention for each week within this trader type
for i in range(1, len(type_data)):
current_week = type_data.iloc[i]["month_year_week"]
# print(f"current_week={current_week}")
week_number = week_to_number[current_week]
if week_to_number == 0:
# no previous week info
continue
previous_week_number = week_number - 1
# this should give only one value
previous_week = [
key
for key in week_to_number.keys()
if week_to_number[key] == previous_week_number
][0]
# print(f"previous week = {previous_week}")
# Get traders in both weeks for this type
current_traders = set(
filtered_df[
(filtered_df["month_year_week"] == current_week)
& (filtered_df["trader_type"] == trader_type)
]["trader_address"]
)
previous_traders = set(
filtered_df[
(filtered_df["month_year_week"] == previous_week)
& (filtered_df["trader_type"] == trader_type)
]["trader_address"]
)
retained = len(current_traders.intersection(previous_traders))
retention_rate = (
(retained / len(previous_traders)) * 100
if len(previous_traders) > 0
else 0
)
retention.append(
{
"trader_type": trader_type,
"week": current_week,
"retained_traders": retained,
"previous_traders": len(previous_traders),
"retention_rate": round(retention_rate, 2),
}
)
return pd.DataFrame(retention)
def create_retention_matrix(cohort_retention_df: pd.DataFrame) -> pd.DataFrame:
# Pivot the data to create the retention matrix
retention_matrix = cohort_retention_df.pivot(
index="cohort_week", columns="weeks_since_cohort", values="retention_rate"
)
# Sort index chronologically
retention_matrix.index = pd.to_datetime(retention_matrix.index)
retention_matrix = retention_matrix.sort_index()
# Rename columns to show week numbers
# retention_matrix.columns = [f"Week {i}" for i in retention_matrix.columns]
return retention_matrix
# Wow Retention at the cohort level
def calculate_cohort_retention(
df: pd.DataFrame, market_creator: str, trader_type: str
) -> pd.DataFrame:
df_filtered = df.loc[
(df["market_creator"] == market_creator) & (df["trader_type"] == trader_type)
]
if len(df_filtered) == 0:
return pd.DataFrame()
df_filtered = df_filtered.sort_values(by="creation_timestamp", ascending=True)
# Get first week of activity for each trader
first_activity = (
df_filtered.groupby("trader_address")
.agg({"creation_timestamp": "min", "month_year_week": "first"})
.reset_index()
)
first_activity.columns = ["trader_address", "first_activity", "cohort_week"]
# Get ordered list of unique weeks - converting to datetime for proper sorting
all_weeks = df_filtered["month_year_week"].unique()
weeks_datetime = pd.to_datetime(all_weeks)
sorted_weeks_idx = weeks_datetime.argsort()
all_weeks = all_weeks[sorted_weeks_idx]
# Create mapping from week string to numeric index
week_to_number = {week: idx for idx, week in enumerate(all_weeks)}
# Merge back to get all activities
cohort_data = pd.merge(
df_filtered,
first_activity[["trader_address", "cohort_week"]],
on="trader_address",
)
# Get all unique weeks and cohorts
all_cohorts = cohort_data["cohort_week"].unique()
# extend the cohort
# print(f"all cohorts = {all_cohorts}")
retention_data = []
max_weeks = 8
# for cohort in all_cohorts:
for cohort_week_idx, cohort in enumerate(all_weeks):
# print(f"analyzing cohort {cohort}")
# Get all traders in this cohort
cohort_traders = set(
cohort_data[cohort_data["cohort_week"] == cohort]["trader_address"]
)
cohort_size = len(cohort_traders)
# print(f"cohort size = {cohort_size}")
# Calculate retention for each week after the cohort week
for week_idx, week in enumerate(all_weeks):
# print(f"Analyzing week = {week}")
weeks_since_cohort = week_idx - cohort_week_idx
if weeks_since_cohort < 0 or weeks_since_cohort > max_weeks:
continue
if cohort_size == 0:
print(f"NO new traders for cohort week={cohort}")
retention_data.append(
{
"cohort_week": cohort,
"week": week,
"weeks_since_cohort": weeks_since_cohort,
"cohort_size": cohort_size,
"active_traders": 0,
"retained_traders": 0,
"previous_traders": 0,
"retention_rate": round(0, 2),
}
)
continue
# Get active traders from the cohort in current week
current_traders = set(
cohort_data[
(cohort_data["cohort_week"] == cohort)
& (cohort_data["month_year_week"] == week)
]["trader_address"]
)
# Get active traders from the cohort in previous week
if week == cohort:
# For the first week, retention is 100% by definition
retained = len(current_traders)
retention_rate = 100 if len(current_traders) > 0 else 0
elif week_idx > 0:
previous_week = all_weeks[week_idx - 1]
previous_traders = set(
cohort_data[
(cohort_data["cohort_week"] == cohort)
& (cohort_data["month_year_week"] == previous_week)
]["trader_address"]
)
retained = len(current_traders.intersection(previous_traders))
retention_rate = (
(retained / len(previous_traders)) * 100
if len(previous_traders) > 0
else 0
)
retention_data.append(
{
"cohort_week": cohort,
"week": week,
"weeks_since_cohort": weeks_since_cohort,
"cohort_size": cohort_size,
"active_traders": len(current_traders),
"retained_traders": retained,
"previous_traders": (
len(previous_traders) if week_idx > 0 else cohort_size
),
"retention_rate": round(retention_rate, 2),
}
)
retention_matrix = create_retention_matrix(pd.DataFrame(retention_data))
return retention_matrix
def prepare_retention_dataset(
retention_df: pd.DataFrame, unknown_df: pd.DataFrame
) -> pd.DataFrame:
print("Preparing retention dataset")
retention_df["trader_type"] = retention_df["staking"].apply(
lambda x: "non_Olas" if x == "non_Olas" else "Olas"
)
retention_df.rename(columns={"request_time": "creation_timestamp"}, inplace=True)
retention_df = retention_df[
["trader_type", "market_creator", "trader_address", "creation_timestamp"]
]
unknown_df["trader_type"] = "unclassified"
unknown_df = unknown_df[
["trader_type", "market_creator", "trader_address", "creation_timestamp"]
]
all_traders = pd.concat([retention_df, unknown_df], ignore_index=True)
all_traders["creation_timestamp"] = pd.to_datetime(
all_traders["creation_timestamp"]
)
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
# Remove data from current week and onwards
now = datetime.now()
# Get start of the current week (Monday)
start_of_week = now - timedelta(days=(now.weekday()))
start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0)
all_traders["creation_date"] = all_traders["creation_timestamp"].dt.date
all_traders["creation_date"] = pd.to_datetime(all_traders["creation_date"])
# Filter the dataframe
filtered_traders = all_traders[all_traders["creation_date"] < start_of_week]
filtered_traders["month_year_week"] = (
filtered_traders["creation_timestamp"]
.dt.to_period("W")
.dt.start_time.dt.strftime("%b-%d-%Y")
)
print(filtered_traders.month_year_week.unique())
return filtered_traders
if __name__ == "__main__":
print("WIP")