cyberosa commited on
Commit
dff5e35
·
1 Parent(s): 5f3c579

corrected version of wow retention

Browse files
app.py CHANGED
@@ -93,14 +93,22 @@ def get_all_data():
93
  FROM read_parquet('./data/unknown_traders.parquet')
94
  """
95
  df4 = con.execute(query4).fetchdf()
96
- con.close()
97
 
98
- return df1, df2, df3, df4
 
 
 
 
 
 
 
99
 
100
 
101
  def prepare_data():
102
 
103
- all_trades, closed_markets, daily_info, unknown_traders = get_all_data()
 
 
104
 
105
  all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date
106
 
@@ -135,12 +143,12 @@ def prepare_data():
135
  closed_markets["month_year_week"] = (
136
  closed_markets["opening_datetime"].dt.to_period("W").dt.strftime("%b-%d-%Y")
137
  )
138
- return traders_data, closed_markets, daily_info, unknown_traders
139
 
140
 
141
- traders_data, closed_markets, daily_info, unknown_traders = prepare_data()
142
  retention_df = prepare_retention_dataset(
143
- traders_df=traders_data, unknown_df=unknown_traders
144
  )
145
 
146
  demo = gr.Blocks()
@@ -406,17 +414,37 @@ with demo:
406
  with gr.Row():
407
  gr.Markdown("# Wow retention by trader type")
408
  with gr.Row():
409
- wow_retention = calculate_wow_retention_by_type(retention_df)
410
- wow_retention_plot = plot_wow_retention_by_type(
411
- wow_retention=wow_retention
412
- )
413
- with gr.Row():
414
- gr.Markdown("# Cohort retention")
415
- with gr.Row():
416
- cohort_retention = calculate_cohort_retention(df=retention_df)
417
- cohort_retention_plot = plot_cohort_retention_heatmap(
418
- retention_matrix=cohort_retention
419
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
420
 
421
  with gr.TabItem("⚙️ Active traders"):
422
  with gr.Row():
 
93
  FROM read_parquet('./data/unknown_traders.parquet')
94
  """
95
  df4 = con.execute(query4).fetchdf()
 
96
 
97
+ # Query to fetch retention activity data
98
+ query5 = f"""
99
+ SELECT *
100
+ FROM read_parquet('./data/retention_activity.parquet')
101
+ """
102
+ df5 = con.execute(query5).fetchdf()
103
+ con.close()
104
+ return df1, df2, df3, df4, df5
105
 
106
 
107
  def prepare_data():
108
 
109
+ all_trades, closed_markets, daily_info, unknown_traders, retention_df = (
110
+ get_all_data()
111
+ )
112
 
113
  all_trades["creation_date"] = all_trades["creation_timestamp"].dt.date
114
 
 
143
  closed_markets["month_year_week"] = (
144
  closed_markets["opening_datetime"].dt.to_period("W").dt.strftime("%b-%d-%Y")
145
  )
146
+ return traders_data, closed_markets, daily_info, unknown_traders, retention_df
147
 
148
 
149
+ traders_data, closed_markets, daily_info, unknown_traders, retention_df = prepare_data()
150
  retention_df = prepare_retention_dataset(
151
+ retention_df=retention_df, unknown_df=unknown_traders
152
  )
153
 
154
  demo = gr.Blocks()
 
414
  with gr.Row():
415
  gr.Markdown("# Wow retention by trader type")
416
  with gr.Row():
417
+ with gr.Column(scale=1):
418
+ gr.Markdown("## Wow retention in Pearl markets")
419
+ wow_retention = calculate_wow_retention_by_type(
420
+ retention_df, market_creator="pearl"
421
+ )
422
+ wow_retention_plot = plot_wow_retention_by_type(
423
+ wow_retention=wow_retention
424
+ )
425
+ with gr.Column(scale=1):
426
+ gr.Markdown("## Wow retention in Quickstart markets")
427
+ wow_retention = calculate_wow_retention_by_type(
428
+ retention_df, market_creator="quickstart"
429
+ )
430
+ wow_retention_plot = plot_wow_retention_by_type(
431
+ wow_retention=wow_retention
432
+ )
433
+
434
+ # with gr.Row():
435
+ # gr.Markdown("# Cohort retention in pearl traders")
436
+ # with gr.Row():
437
+ # cohort_retention = calculate_cohort_retention(df=retention_df)
438
+ # cohort_retention_plot = plot_cohort_retention_heatmap(
439
+ # retention_matrix=cohort_retention
440
+ # )
441
+ # with gr.Row():
442
+ # gr.Markdown("# Cohort retention in qs traders")
443
+ # with gr.Row():
444
+ # cohort_retention = calculate_cohort_retention(df=retention_df)
445
+ # cohort_retention_plot = plot_cohort_retention_heatmap(
446
+ # retention_matrix=cohort_retention
447
+ # )
448
 
449
  with gr.TabItem("⚙️ Active traders"):
450
  with gr.Row():
notebooks/retention_metrics.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 2,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
@@ -12,6 +12,178 @@
12
  "import gc"
13
  ]
14
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  {
16
  "cell_type": "markdown",
17
  "metadata": {},
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 10,
6
  "metadata": {},
7
  "outputs": [],
8
  "source": [
 
12
  "import gc"
13
  ]
14
  },
15
+ {
16
+ "cell_type": "markdown",
17
+ "metadata": {},
18
+ "source": [
19
+ "# Get all activity info from tools.parquet"
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": 11,
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": [
28
+ "retention_df = pd.read_parquet(\"../data/retention_activity.parquet\")"
29
+ ]
30
+ },
31
+ {
32
+ "cell_type": "code",
33
+ "execution_count": 12,
34
+ "metadata": {},
35
+ "outputs": [
36
+ {
37
+ "data": {
38
+ "text/plain": [
39
+ "Index(['trader_address', 'request_time', 'market_creator', 'request_date',\n",
40
+ " 'staking', 'month_year_week'],\n",
41
+ " dtype='object')"
42
+ ]
43
+ },
44
+ "execution_count": 12,
45
+ "metadata": {},
46
+ "output_type": "execute_result"
47
+ }
48
+ ],
49
+ "source": [
50
+ "retention_df.columns"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 13,
56
+ "metadata": {},
57
+ "outputs": [
58
+ {
59
+ "data": {
60
+ "text/html": [
61
+ "<div>\n",
62
+ "<style scoped>\n",
63
+ " .dataframe tbody tr th:only-of-type {\n",
64
+ " vertical-align: middle;\n",
65
+ " }\n",
66
+ "\n",
67
+ " .dataframe tbody tr th {\n",
68
+ " vertical-align: top;\n",
69
+ " }\n",
70
+ "\n",
71
+ " .dataframe thead th {\n",
72
+ " text-align: right;\n",
73
+ " }\n",
74
+ "</style>\n",
75
+ "<table border=\"1\" class=\"dataframe\">\n",
76
+ " <thead>\n",
77
+ " <tr style=\"text-align: right;\">\n",
78
+ " <th></th>\n",
79
+ " <th>trader_address</th>\n",
80
+ " <th>request_time</th>\n",
81
+ " <th>market_creator</th>\n",
82
+ " <th>request_date</th>\n",
83
+ " <th>staking</th>\n",
84
+ " <th>month_year_week</th>\n",
85
+ " </tr>\n",
86
+ " </thead>\n",
87
+ " <tbody>\n",
88
+ " <tr>\n",
89
+ " <th>0</th>\n",
90
+ " <td>0x721de88cee9be146c8f0c7ef1a4188bee36494d6</td>\n",
91
+ " <td>2024-10-25 00:00:20+00:00</td>\n",
92
+ " <td>quickstart</td>\n",
93
+ " <td>2024-10-25</td>\n",
94
+ " <td>non_staking</td>\n",
95
+ " <td>Oct-25-2024</td>\n",
96
+ " </tr>\n",
97
+ " <tr>\n",
98
+ " <th>1</th>\n",
99
+ " <td>0x8a1d5f22b5a3bea34697b85e7b4ad894bf9ee36a</td>\n",
100
+ " <td>2024-10-25 00:00:25+00:00</td>\n",
101
+ " <td>quickstart</td>\n",
102
+ " <td>2024-10-25</td>\n",
103
+ " <td>non_staking</td>\n",
104
+ " <td>Oct-25-2024</td>\n",
105
+ " </tr>\n",
106
+ " <tr>\n",
107
+ " <th>2</th>\n",
108
+ " <td>0xf839eaf4b42eadd917b46d7b6da0dd0e1fd6f684</td>\n",
109
+ " <td>2024-10-25 00:00:55+00:00</td>\n",
110
+ " <td>quickstart</td>\n",
111
+ " <td>2024-10-25</td>\n",
112
+ " <td>non_staking</td>\n",
113
+ " <td>Oct-25-2024</td>\n",
114
+ " </tr>\n",
115
+ " <tr>\n",
116
+ " <th>3</th>\n",
117
+ " <td>0x01274796ce41aa8e8312e05a427ffb4b0d2148f6</td>\n",
118
+ " <td>2024-10-25 00:00:55+00:00</td>\n",
119
+ " <td>quickstart</td>\n",
120
+ " <td>2024-10-25</td>\n",
121
+ " <td>non_staking</td>\n",
122
+ " <td>Oct-25-2024</td>\n",
123
+ " </tr>\n",
124
+ " <tr>\n",
125
+ " <th>4</th>\n",
126
+ " <td>0xc20678890f94d0162593c46fe5da67d9a4b7a6fb</td>\n",
127
+ " <td>2024-10-25 00:01:05+00:00</td>\n",
128
+ " <td>quickstart</td>\n",
129
+ " <td>2024-10-25</td>\n",
130
+ " <td>non_staking</td>\n",
131
+ " <td>Oct-25-2024</td>\n",
132
+ " </tr>\n",
133
+ " </tbody>\n",
134
+ "</table>\n",
135
+ "</div>"
136
+ ],
137
+ "text/plain": [
138
+ " trader_address request_time \\\n",
139
+ "0 0x721de88cee9be146c8f0c7ef1a4188bee36494d6 2024-10-25 00:00:20+00:00 \n",
140
+ "1 0x8a1d5f22b5a3bea34697b85e7b4ad894bf9ee36a 2024-10-25 00:00:25+00:00 \n",
141
+ "2 0xf839eaf4b42eadd917b46d7b6da0dd0e1fd6f684 2024-10-25 00:00:55+00:00 \n",
142
+ "3 0x01274796ce41aa8e8312e05a427ffb4b0d2148f6 2024-10-25 00:00:55+00:00 \n",
143
+ "4 0xc20678890f94d0162593c46fe5da67d9a4b7a6fb 2024-10-25 00:01:05+00:00 \n",
144
+ "\n",
145
+ " market_creator request_date staking month_year_week \n",
146
+ "0 quickstart 2024-10-25 non_staking Oct-25-2024 \n",
147
+ "1 quickstart 2024-10-25 non_staking Oct-25-2024 \n",
148
+ "2 quickstart 2024-10-25 non_staking Oct-25-2024 \n",
149
+ "3 quickstart 2024-10-25 non_staking Oct-25-2024 \n",
150
+ "4 quickstart 2024-10-25 non_staking Oct-25-2024 "
151
+ ]
152
+ },
153
+ "execution_count": 13,
154
+ "metadata": {},
155
+ "output_type": "execute_result"
156
+ }
157
+ ],
158
+ "source": [
159
+ "retention_df.head()"
160
+ ]
161
+ },
162
+ {
163
+ "cell_type": "code",
164
+ "execution_count": 14,
165
+ "metadata": {},
166
+ "outputs": [
167
+ {
168
+ "data": {
169
+ "text/plain": [
170
+ "staking\n",
171
+ "non_Olas 764956\n",
172
+ "non_staking 275246\n",
173
+ "pearl 56487\n",
174
+ "quickstart 48511\n",
175
+ "Name: count, dtype: int64"
176
+ ]
177
+ },
178
+ "execution_count": 14,
179
+ "metadata": {},
180
+ "output_type": "execute_result"
181
+ }
182
+ ],
183
+ "source": [
184
+ "retention_df.staking.value_counts()"
185
+ ]
186
+ },
187
  {
188
  "cell_type": "markdown",
189
  "metadata": {},
scripts/retention_metrics.py CHANGED
@@ -4,10 +4,13 @@ from scripts.utils import DATA_DIR
4
 
5
 
6
  # Basic Week over Week Retention
7
- def calculate_wow_retention_by_type(df: pd.DataFrame) -> pd.DataFrame:
 
 
 
8
  # Get unique traders per week and type
9
  weekly_traders = (
10
- df.groupby(["month_year_week", "trader_type"])["trader_address"]
11
  .nunique()
12
  .reset_index()
13
  )
@@ -26,16 +29,16 @@ def calculate_wow_retention_by_type(df: pd.DataFrame) -> pd.DataFrame:
26
 
27
  # Get traders in both weeks for this type
28
  current_traders = set(
29
- df[
30
- (df["month_year_week"] == current_week)
31
- & (df["trader_type"] == trader_type)
32
  ]["trader_address"]
33
  )
34
 
35
  previous_traders = set(
36
- df[
37
- (df["month_year_week"] == previous_week)
38
- & (df["trader_type"] == trader_type)
39
  ]["trader_address"]
40
  )
41
 
@@ -60,10 +63,13 @@ def calculate_wow_retention_by_type(df: pd.DataFrame) -> pd.DataFrame:
60
 
61
 
62
  # Cohort Retention
63
- def calculate_cohort_retention(df, max_weeks=12) -> pd.DataFrame:
 
 
 
64
  # Get first week for each trader
65
  first_trades = (
66
- df.groupby("trader_address")
67
  .agg({"creation_timestamp": "min", "month_year_week": "first"})
68
  .reset_index()
69
  )
@@ -111,7 +117,7 @@ def calculate_cohort_retention(df, max_weeks=12) -> pd.DataFrame:
111
  return retention_matrix.round(2)
112
 
113
 
114
- def prepare_retention_dataset(
115
  traders_df: pd.DataFrame, unknown_df: pd.DataFrame
116
  ) -> pd.DataFrame:
117
 
@@ -131,6 +137,33 @@ def prepare_retention_dataset(
131
  return all_traders
132
 
133
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  if __name__ == "__main__":
135
  # read all datasets
136
  traders_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
 
4
 
5
 
6
  # Basic Week over Week Retention
7
+ def calculate_wow_retention_by_type(
8
+ df: pd.DataFrame, market_creator: str
9
+ ) -> pd.DataFrame:
10
+ filtered_df = df.loc[df["market_creator"] == market_creator]
11
  # Get unique traders per week and type
12
  weekly_traders = (
13
+ filtered_df.groupby(["month_year_week", "trader_type"])["trader_address"]
14
  .nunique()
15
  .reset_index()
16
  )
 
29
 
30
  # Get traders in both weeks for this type
31
  current_traders = set(
32
+ filtered_df[
33
+ (filtered_df["month_year_week"] == current_week)
34
+ & (filtered_df["trader_type"] == trader_type)
35
  ]["trader_address"]
36
  )
37
 
38
  previous_traders = set(
39
+ filtered_df[
40
+ (filtered_df["month_year_week"] == previous_week)
41
+ & (filtered_df["trader_type"] == trader_type)
42
  ]["trader_address"]
43
  )
44
 
 
63
 
64
 
65
  # Cohort Retention
66
+ def calculate_cohort_retention(
67
+ df: pd.DataFrame, trader_type: str, max_weeks=12
68
+ ) -> pd.DataFrame:
69
+ df_filtered = df.loc[df["trader_type"] == trader_type]
70
  # Get first week for each trader
71
  first_trades = (
72
+ df_filtered.groupby("trader_address")
73
  .agg({"creation_timestamp": "min", "month_year_week": "first"})
74
  .reset_index()
75
  )
 
117
  return retention_matrix.round(2)
118
 
119
 
120
+ def merge_retention_dataset(
121
  traders_df: pd.DataFrame, unknown_df: pd.DataFrame
122
  ) -> pd.DataFrame:
123
 
 
137
  return all_traders
138
 
139
 
140
+ def prepare_retention_dataset(
141
+ retention_df: pd.DataFrame, unknown_df: pd.DataFrame
142
+ ) -> pd.DataFrame:
143
+
144
+ retention_df["trader_type"] = retention_df["staking"].apply(
145
+ lambda x: "non_Olas" if x == "non_Olas" else "Olas"
146
+ )
147
+ retention_df.rename(columns={"request_time": "creation_timestamp"}, inplace=True)
148
+ retention_df = retention_df[
149
+ ["trader_type", "market_creator", "trader_address", "creation_timestamp"]
150
+ ]
151
+ unknown_df["trader_type"] = "unclassified"
152
+ unknown_df = unknown_df[
153
+ ["trader_type", "market_creator", "trader_address", "creation_timestamp"]
154
+ ]
155
+ all_traders = pd.concat([retention_df, unknown_df], ignore_index=True)
156
+
157
+ all_traders["creation_timestamp"] = pd.to_datetime(
158
+ all_traders["creation_timestamp"]
159
+ )
160
+ all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
161
+ all_traders["month_year_week"] = (
162
+ all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
163
+ )
164
+ return all_traders
165
+
166
+
167
  if __name__ == "__main__":
168
  # read all datasets
169
  traders_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
tabs/retention_plots.py CHANGED
@@ -53,7 +53,8 @@ def plot_wow_retention_by_type(wow_retention):
53
  )
54
 
55
 
56
- def plot_cohort_retention_heatmap(retention_matrix):
 
57
  # Create a copy of the matrix to avoid modifying the original
58
  retention_matrix = retention_matrix.copy()
59
 
 
53
  )
54
 
55
 
56
+ def plot_cohort_retention_heatmap(retention_matrix: pd.DataFrame):
57
+
58
  # Create a copy of the matrix to avoid modifying the original
59
  retention_matrix = retention_matrix.copy()
60