Spaces:

Alealejandrooo
/

MindBody_VS_Medserv

Running

App Files Files Community

Alealejandrooo commited on May 12, 2024

Commit

18c6efd

verified ·

1 Parent(s): 4a989bd

First commit

Browse files

Files changed (2) hide show

app.py +99 -0
process.py +121 -0

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import gradio as gr
+import os
+from process import process_data
+def makeButtonClickableFiles(files):
+    """Makes a button interactive only if all files in the list have correct extensions.
+    Args:
+        files (list): List of uploaded file objects.
+    Returns:
+        _type_: Button state (interactive or not) and possibly a warning message.
+    """
+    if not files:
+        return gr.Button(interactive=False)
+    allowed_extensions = ["xls", "xlsx"]
+    for file in files:
+        base_name = os.path.basename(file.name)
+        # Extract the file extension and check if it's in the allowed list.
+        if base_name.split('.')[-1].lower() not in allowed_extensions:
+            raise gr.Error(f"Unsupported file: {base_name}.Allowed extensions: .xls .xlsx")
+    return gr.Button(interactive=True)
+# Define a Gradio interface
+with gr.Blocks() as demo:
+    with gr.Row():
+        header = gr.Markdown(("<h1>MindBody VS. Medserv Checker </h1>"))
+    with gr.Row():
+        with gr.Column():
+            file_uploader_mindbody = gr.Files(
+            label=("Upload MindBody"),
+            file_count="multiple",
+            file_types=[".xlsx", '.xls'],
+            container=True,
+            interactive=True,
+            scale=1,
+            )
+        with gr.Column():
+            file_uploader_medserv = gr.Files(
+            label=("Upload Medserv"),
+            file_count= "multiple",
+            file_types=[".xlsx", '.xls'],
+            container=True,
+            interactive=True,
+            scale=1,
+        )
+    with gr.Row():
+        tollerance = gr.Slider(0, 7, value = 1, step = 1, interactive = True,  label="Days Tollerance",
+                  info="Set the number of days of tolerance to match the sale dates between MindBody and Medserve (0 = no tolerance / exact match).")
+    with gr.Row():
+        file_process_button = gr.Button(
+        value="PROCESS FILES",
+        interactive=False,
+        )
+    with gr.Row():
+        processed_file = gr.Files(
+        label=("Output File"),
+        file_count="single",
+        interactive=False,
+        elem_classes="gradio-file",
+    )
+    file_uploader_mindbody.change(
+                        fn=makeButtonClickableFiles,
+                        inputs=[file_uploader_mindbody],
+                        outputs=[file_process_button])
+    file_uploader_medserv.change(
+                        fn=makeButtonClickableFiles,
+                        inputs=[file_uploader_medserv],
+                        outputs=[file_process_button])
+    file_process_button.click(
+                        fn = process_data,
+                        inputs = [file_uploader_mindbody, file_uploader_medserv, tollerance],
+                        outputs = processed_file)
+if __name__ == "__main__":
+    demo.queue().launch()

process.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import pandas as pd
+import gradio as gr
+import re
+from datetime import timedelta
+def process_data(files_mindbody, files_medserv, tollerance, progress=gr.Progress()):
+    mindbody = load_data(files_mindbody)
+    medserv = load_data(files_medserv)
+    # Split 'Client' names into first name and last name components for both DataFrames
+    medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
+    mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
+    # Initialize an empty list to store unmatched rows
+    unmatched_rows = []
+    rows = len(mindbody)
+    # Iterate through each row in the mindbody DataFrame
+    for idx in progress.tqdm(range(rows), desc='Analyzing files...'):
+        # Extract relevant information from the current row
+        date = mindbody.iloc[idx]['DOS']
+        first_name = mindbody.iloc[idx]['First Name']
+        last_name = mindbody.iloc[idx]['Last Name']
+        # Define the range of dates to search for a match in medserv
+        date_range = [date - timedelta(days= tollerance), date, date + timedelta(days=tollerance)]
+        # Filter medserv based on the date range and name criteria
+        matches = medserv[((medserv['DOS'].isin(date_range)) &
+                        ((medserv['First Name'] == first_name) |
+                            (medserv['Last Name'] == last_name)))]
+        # If no match is found, append the row to the unmatched_rows list
+        if matches.empty:
+            unmatched_rows.append(mindbody.iloc[idx])
+    # Create a DataFrame from the unmatched_rows list
+    unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
+    # Specify the columns to include in the output Excel file
+    columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location']
+    # Format the 'DOS' column to remove time part
+    unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
+    output_file_path = 'Comparison Results.xlsx'
+    unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
+    return output_file_path
+def load_data(files):
+    # Check if a single file or multiple files are provided
+    filepaths = [file.name for file in files]
+    # Load and concatenate multiple files if provided
+    dfs = []
+    for filepath in filepaths:
+        if filepath.endswith('.xlsx') or filepath.endswith('.xls'):
+            dfs.append(pd.read_excel(filepath))
+        else:
+            raise gr.Error("Unsupported file format: Please provide a .xls or .xlsx file")
+    # Concatenate dataframes if more than one file is provided
+    if len(dfs) > 1:
+        df = pd.concat(dfs, ignore_index=True)
+    else:
+        df = dfs[0]
+    # Find and rename the date column to 'DOS'
+    date_column = find_date_column(df)
+    if date_column:
+        df.rename(columns={date_column: 'DOS'}, inplace=True)
+    # Find and rename the name column to 'Client'
+    name_column = find_name_column(df)
+    if name_column:
+        df.rename(columns={name_column: 'Client'}, inplace=True)
+    return df
+def find_name_column(df):
+    name_pattern = r"^[A-Za-z'-]+,\s[A-Za-z'-]+(?:\s[A-Za-z'-]+)*$"  # Regex pattern for last name, first name(s)
+    max_count = 0
+    name_column = None
+    for column in df.columns:
+        # Count matches of the name pattern in each column
+        matches = df[column].astype(str).apply(lambda x: bool(re.match(name_pattern, x)))
+        valid_count = matches.sum()  # Sum of True values indicating valid names
+        # Select the column with the maximum count of valid names
+        if valid_count > max_count:
+            max_count = valid_count
+            name_column = column
+    return name_column
+def find_date_column(df):
+    date_pattern = r"\b\d{2,4}[-/]\d{1,2}[-/]\d{2,4}\b"  # Regex pattern for common date formats
+    max_count = 0
+    date_column = None
+    for column in df.columns:
+        # Count matches of the date pattern in each column
+        matches = df[column].astype(str).str.contains(date_pattern, na=False)
+        valid_count = matches.sum()  # Sum of True values indicating valid dates
+        # Select the column with the maximum count of valid dates
+        if valid_count > max_count:
+            max_count = valid_count
+            date_column = column
+    return date_column