Spaces:
Sleeping
Sleeping
ksitarz-sigmoidal
commited on
feat: add versions of files without duplicates
Browse files
app.py
CHANGED
@@ -36,7 +36,6 @@ if uploaded_file is not None and new_website_id:
|
|
36 |
|
37 |
# Create DataFrame for website_event data
|
38 |
df_website_event = df[website_event_columns]
|
39 |
-
df_website_event.to_csv('website_event.csv', index=False)
|
40 |
st.download_button(label="Download Website Event CSV", data=df_website_event.to_csv(index=False), file_name='website_event.csv', mime='text/csv')
|
41 |
|
42 |
# Define the columns for the session table
|
@@ -47,8 +46,18 @@ if uploaded_file is not None and new_website_id:
|
|
47 |
]
|
48 |
|
49 |
# Create DataFrame for session data
|
50 |
-
df_session = df[session_columns]
|
51 |
-
df_session.to_csv('session.csv', index=False)
|
52 |
st.download_button(label="Download Session CSV", data=df_session.to_csv(index=False), file_name='session.csv', mime='text/csv')
|
53 |
|
54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
# Create DataFrame for website_event data
|
38 |
df_website_event = df[website_event_columns]
|
|
|
39 |
st.download_button(label="Download Website Event CSV", data=df_website_event.to_csv(index=False), file_name='website_event.csv', mime='text/csv')
|
40 |
|
41 |
# Define the columns for the session table
|
|
|
46 |
]
|
47 |
|
48 |
# Create DataFrame for session data
|
49 |
+
df_session = df[session_columns]
|
|
|
50 |
st.download_button(label="Download Session CSV", data=df_session.to_csv(index=False), file_name='session.csv', mime='text/csv')
|
51 |
|
52 |
+
# Versions of files without duplicates
|
53 |
+
df_website_event_nodup = df[website_event_columns].drop_duplicates('event_id')
|
54 |
+
df_session_nodup = df[session_columns].drop_duplicates('session_id')
|
55 |
+
st.text(f"Versions with duplicates removed ({len(df_website_event) - len(df_website_event_nodup)} events removed, {len(df_session) - len(df_session_nodup)} session entries removed):")
|
56 |
+
|
57 |
+
# Create DataFrame for website_event data without duplicates
|
58 |
+
st.download_button(label="Download Website Event CSV (No Duplicates)", data=df_website_event_nodup.to_csv(index=False), file_name='website_event_no_duplicates.csv', mime='text/csv')
|
59 |
+
|
60 |
+
# Create DataFrame for session data without duplicates
|
61 |
+
st.download_button(label="Download Session CSV (No Duplicates)", data=df_session_nodup.to_csv(index=False), file_name='session_no_duplicates.csv', mime='text/csv')
|
62 |
+
|
63 |
+
st.success("Successfully generated csv files")
|