ksitarz-sigmoidal commited on
Commit
c04e197
·
verified ·
1 Parent(s): fab68f7

feat: add versions of files without duplicates

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -36,7 +36,6 @@ if uploaded_file is not None and new_website_id:
36
 
37
  # Create DataFrame for website_event data
38
  df_website_event = df[website_event_columns]
39
- df_website_event.to_csv('website_event.csv', index=False)
40
  st.download_button(label="Download Website Event CSV", data=df_website_event.to_csv(index=False), file_name='website_event.csv', mime='text/csv')
41
 
42
  # Define the columns for the session table
@@ -47,8 +46,18 @@ if uploaded_file is not None and new_website_id:
47
  ]
48
 
49
  # Create DataFrame for session data
50
- df_session = df[session_columns].drop_duplicates('session_id')
51
- df_session.to_csv('session.csv', index=False)
52
  st.download_button(label="Download Session CSV", data=df_session.to_csv(index=False), file_name='session.csv', mime='text/csv')
53
 
54
- st.success("Successfully generated website_event.csv and session.csv")
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # Create DataFrame for website_event data
38
  df_website_event = df[website_event_columns]
 
39
  st.download_button(label="Download Website Event CSV", data=df_website_event.to_csv(index=False), file_name='website_event.csv', mime='text/csv')
40
 
41
  # Define the columns for the session table
 
46
  ]
47
 
48
  # Create DataFrame for session data
49
+ df_session = df[session_columns]
 
50
  st.download_button(label="Download Session CSV", data=df_session.to_csv(index=False), file_name='session.csv', mime='text/csv')
51
 
52
+ # Versions of files without duplicates
53
+ df_website_event_nodup = df[website_event_columns].drop_duplicates('event_id')
54
+ df_session_nodup = df[session_columns].drop_duplicates('session_id')
55
+ st.text(f"Versions with duplicates removed ({len(df_website_event) - len(df_website_event_nodup)} events removed, {len(df_session) - len(df_session_nodup)} session entries removed):")
56
+
57
+ # Create DataFrame for website_event data without duplicates
58
+ st.download_button(label="Download Website Event CSV (No Duplicates)", data=df_website_event_nodup.to_csv(index=False), file_name='website_event_no_duplicates.csv', mime='text/csv')
59
+
60
+ # Create DataFrame for session data without duplicates
61
+ st.download_button(label="Download Session CSV (No Duplicates)", data=df_session_nodup.to_csv(index=False), file_name='session_no_duplicates.csv', mime='text/csv')
62
+
63
+ st.success("Successfully generated csv files")