import pandas as pd from datetime import datetime, timedelta # Basic Week over Week Retention def calculate_wow_retention_by_type( df: pd.DataFrame, market_creator: str ) -> pd.DataFrame: filtered_df = df.loc[df["market_creator"] == market_creator] # Get unique traders per week and type weekly_traders = ( filtered_df.groupby(["month_year_week", "trader_type"], sort=False)[ "trader_address" ] .nunique() .reset_index() ) # weekly_traders = weekly_traders.sort_values(['trader_type', 'month_year_week']) # Get ordered list of unique weeks - converting to datetime for proper sorting all_weeks = filtered_df["month_year_week"].unique() weeks_datetime = pd.to_datetime(all_weeks) sorted_weeks_idx = weeks_datetime.argsort() all_weeks = all_weeks[sorted_weeks_idx] # Create mapping from week string to numeric index week_to_number = {week: idx for idx, week in enumerate(all_weeks)} # Calculate retention retention = [] # Iterate through each trader type for trader_type in weekly_traders["trader_type"].unique(): type_data = weekly_traders[weekly_traders["trader_type"] == trader_type] # Calculate retention for each week within this trader type for i in range(1, len(type_data)): current_week = type_data.iloc[i]["month_year_week"] # print(f"current_week={current_week}") week_number = week_to_number[current_week] if week_to_number == 0: # no previous week info continue previous_week_number = week_number - 1 # this should give only one value previous_week = [ key for key in week_to_number.keys() if week_to_number[key] == previous_week_number ][0] # print(f"previous week = {previous_week}") # Get traders in both weeks for this type current_traders = set( filtered_df[ (filtered_df["month_year_week"] == current_week) & (filtered_df["trader_type"] == trader_type) ]["trader_address"] ) previous_traders = set( filtered_df[ (filtered_df["month_year_week"] == previous_week) & (filtered_df["trader_type"] == trader_type) ]["trader_address"] ) retained = len(current_traders.intersection(previous_traders)) retention_rate = ( (retained / len(previous_traders)) * 100 if len(previous_traders) > 0 else 0 ) retention.append( { "trader_type": trader_type, "week": current_week, "retained_traders": retained, "previous_traders": len(previous_traders), "retention_rate": round(retention_rate, 2), } ) return pd.DataFrame(retention) def create_retention_matrix(cohort_retention_df: pd.DataFrame) -> pd.DataFrame: # Pivot the data to create the retention matrix retention_matrix = cohort_retention_df.pivot( index="cohort_week", columns="weeks_since_cohort", values="retention_rate" ) # Sort index chronologically retention_matrix.index = pd.to_datetime(retention_matrix.index) retention_matrix = retention_matrix.sort_index() # Rename columns to show week numbers # retention_matrix.columns = [f"Week {i}" for i in retention_matrix.columns] return retention_matrix # Wow Retention at the cohort level def calculate_cohort_retention( df: pd.DataFrame, market_creator: str, trader_type: str ) -> pd.DataFrame: df_filtered = df.loc[ (df["market_creator"] == market_creator) & (df["trader_type"] == trader_type) ] if len(df_filtered) == 0: return pd.DataFrame() df_filtered = df_filtered.sort_values(by="creation_timestamp", ascending=True) # Get first week of activity for each trader first_activity = ( df_filtered.groupby("trader_address") .agg({"creation_timestamp": "min", "month_year_week": "first"}) .reset_index() ) first_activity.columns = ["trader_address", "first_activity", "cohort_week"] # Get ordered list of unique weeks - converting to datetime for proper sorting all_weeks = df_filtered["month_year_week"].unique() weeks_datetime = pd.to_datetime(all_weeks) sorted_weeks_idx = weeks_datetime.argsort() all_weeks = all_weeks[sorted_weeks_idx] # Create mapping from week string to numeric index week_to_number = {week: idx for idx, week in enumerate(all_weeks)} # Merge back to get all activities cohort_data = pd.merge( df_filtered, first_activity[["trader_address", "cohort_week"]], on="trader_address", ) # Get all unique weeks and cohorts all_cohorts = cohort_data["cohort_week"].unique() # extend the cohort # print(f"all cohorts = {all_cohorts}") retention_data = [] max_weeks = 8 # for cohort in all_cohorts: for cohort_week_idx, cohort in enumerate(all_weeks): # print(f"analyzing cohort {cohort}") # Get all traders in this cohort cohort_traders = set( cohort_data[cohort_data["cohort_week"] == cohort]["trader_address"] ) cohort_size = len(cohort_traders) # print(f"cohort size = {cohort_size}") # Calculate retention for each week after the cohort week for week_idx, week in enumerate(all_weeks): # print(f"Analyzing week = {week}") weeks_since_cohort = week_idx - cohort_week_idx if weeks_since_cohort < 0 or weeks_since_cohort > max_weeks: continue if cohort_size == 0: print(f"NO new traders for cohort week={cohort}") retention_data.append( { "cohort_week": cohort, "week": week, "weeks_since_cohort": weeks_since_cohort, "cohort_size": cohort_size, "active_traders": 0, "retained_traders": 0, "previous_traders": 0, "retention_rate": round(0, 2), } ) continue # Get active traders from the cohort in current week current_traders = set( cohort_data[ (cohort_data["cohort_week"] == cohort) & (cohort_data["month_year_week"] == week) ]["trader_address"] ) # Get active traders from the cohort in previous week if week == cohort: # For the first week, retention is 100% by definition retained = len(current_traders) retention_rate = 100 if len(current_traders) > 0 else 0 elif week_idx > 0: previous_week = all_weeks[week_idx - 1] previous_traders = set( cohort_data[ (cohort_data["cohort_week"] == cohort) & (cohort_data["month_year_week"] == previous_week) ]["trader_address"] ) retained = len(current_traders.intersection(previous_traders)) retention_rate = ( (retained / len(previous_traders)) * 100 if len(previous_traders) > 0 else 0 ) retention_data.append( { "cohort_week": cohort, "week": week, "weeks_since_cohort": weeks_since_cohort, "cohort_size": cohort_size, "active_traders": len(current_traders), "retained_traders": retained, "previous_traders": ( len(previous_traders) if week_idx > 0 else cohort_size ), "retention_rate": round(retention_rate, 2), } ) retention_matrix = create_retention_matrix(pd.DataFrame(retention_data)) return retention_matrix def prepare_retention_dataset( retention_df: pd.DataFrame, unknown_df: pd.DataFrame ) -> pd.DataFrame: print("Preparing retention dataset") retention_df["trader_type"] = retention_df["staking"].apply( lambda x: "non_Olas" if x == "non_Olas" else "Olas" ) retention_df.rename(columns={"request_time": "creation_timestamp"}, inplace=True) retention_df = retention_df[ ["trader_type", "market_creator", "trader_address", "creation_timestamp"] ] unknown_df["trader_type"] = "unclassified" unknown_df = unknown_df[ ["trader_type", "market_creator", "trader_address", "creation_timestamp"] ] all_traders = pd.concat([retention_df, unknown_df], ignore_index=True) all_traders["creation_timestamp"] = pd.to_datetime( all_traders["creation_timestamp"] ) all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True) # Remove data from current week and onwards now = datetime.now() # Get start of the current week (Monday) start_of_week = now - timedelta(days=(now.weekday())) start_of_week = start_of_week.replace(hour=0, minute=0, second=0, microsecond=0) all_traders["creation_date"] = all_traders["creation_timestamp"].dt.date all_traders["creation_date"] = pd.to_datetime(all_traders["creation_date"]) # Filter the dataframe filtered_traders = all_traders[all_traders["creation_date"] < start_of_week] filtered_traders["month_year_week"] = ( filtered_traders["creation_timestamp"] .dt.to_period("W") .dt.start_time.dt.strftime("%b-%d-%Y") ) print(filtered_traders.month_year_week.unique()) return filtered_traders if __name__ == "__main__": print("WIP")