Alealejandrooo commited on
Commit
18c6efd
·
verified ·
1 Parent(s): 4a989bd

First commit

Browse files
Files changed (2) hide show
  1. app.py +99 -0
  2. process.py +121 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+
4
+ from process import process_data
5
+
6
+ def makeButtonClickableFiles(files):
7
+ """Makes a button interactive only if all files in the list have correct extensions.
8
+
9
+ Args:
10
+ files (list): List of uploaded file objects.
11
+
12
+ Returns:
13
+ _type_: Button state (interactive or not) and possibly a warning message.
14
+ """
15
+ if not files:
16
+ return gr.Button(interactive=False)
17
+
18
+ allowed_extensions = ["xls", "xlsx"]
19
+ for file in files:
20
+ base_name = os.path.basename(file.name)
21
+ # Extract the file extension and check if it's in the allowed list.
22
+ if base_name.split('.')[-1].lower() not in allowed_extensions:
23
+ raise gr.Error(f"Unsupported file: {base_name}.Allowed extensions: .xls .xlsx")
24
+
25
+ return gr.Button(interactive=True)
26
+
27
+
28
+ # Define a Gradio interface
29
+
30
+ with gr.Blocks() as demo:
31
+
32
+ with gr.Row():
33
+ header = gr.Markdown(("<h1>MindBody VS. Medserv Checker </h1>"))
34
+
35
+ with gr.Row():
36
+
37
+ with gr.Column():
38
+ file_uploader_mindbody = gr.Files(
39
+ label=("Upload MindBody"),
40
+ file_count="multiple",
41
+ file_types=[".xlsx", '.xls'],
42
+ container=True,
43
+ interactive=True,
44
+ scale=1,
45
+ )
46
+
47
+
48
+ with gr.Column():
49
+ file_uploader_medserv = gr.Files(
50
+ label=("Upload Medserv"),
51
+ file_count= "multiple",
52
+ file_types=[".xlsx", '.xls'],
53
+ container=True,
54
+ interactive=True,
55
+ scale=1,
56
+ )
57
+
58
+ with gr.Row():
59
+ tollerance = gr.Slider(0, 7, value = 1, step = 1, interactive = True, label="Days Tollerance",
60
+ info="Set the number of days of tolerance to match the sale dates between MindBody and Medserve (0 = no tolerance / exact match).")
61
+
62
+ with gr.Row():
63
+
64
+ file_process_button = gr.Button(
65
+ value="PROCESS FILES",
66
+ interactive=False,
67
+ )
68
+
69
+ with gr.Row():
70
+ processed_file = gr.Files(
71
+ label=("Output File"),
72
+ file_count="single",
73
+ interactive=False,
74
+ elem_classes="gradio-file",
75
+ )
76
+
77
+
78
+
79
+
80
+ file_uploader_mindbody.change(
81
+ fn=makeButtonClickableFiles,
82
+ inputs=[file_uploader_mindbody],
83
+ outputs=[file_process_button])
84
+
85
+
86
+ file_uploader_medserv.change(
87
+ fn=makeButtonClickableFiles,
88
+ inputs=[file_uploader_medserv],
89
+ outputs=[file_process_button])
90
+
91
+
92
+ file_process_button.click(
93
+ fn = process_data,
94
+ inputs = [file_uploader_mindbody, file_uploader_medserv, tollerance],
95
+ outputs = processed_file)
96
+
97
+
98
+ if __name__ == "__main__":
99
+ demo.queue().launch()
process.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import gradio as gr
3
+ import re
4
+ from datetime import timedelta
5
+
6
+
7
+ def process_data(files_mindbody, files_medserv, tollerance, progress=gr.Progress()):
8
+
9
+ mindbody = load_data(files_mindbody)
10
+ medserv = load_data(files_medserv)
11
+
12
+ # Split 'Client' names into first name and last name components for both DataFrames
13
+ medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
14
+ mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
15
+ # Initialize an empty list to store unmatched rows
16
+ unmatched_rows = []
17
+
18
+ rows = len(mindbody)
19
+
20
+ # Iterate through each row in the mindbody DataFrame
21
+ for idx in progress.tqdm(range(rows), desc='Analyzing files...'):
22
+ # Extract relevant information from the current row
23
+ date = mindbody.iloc[idx]['DOS']
24
+ first_name = mindbody.iloc[idx]['First Name']
25
+ last_name = mindbody.iloc[idx]['Last Name']
26
+
27
+ # Define the range of dates to search for a match in medserv
28
+ date_range = [date - timedelta(days= tollerance), date, date + timedelta(days=tollerance)]
29
+
30
+ # Filter medserv based on the date range and name criteria
31
+ matches = medserv[((medserv['DOS'].isin(date_range)) &
32
+ ((medserv['First Name'] == first_name) |
33
+ (medserv['Last Name'] == last_name)))]
34
+
35
+ # If no match is found, append the row to the unmatched_rows list
36
+ if matches.empty:
37
+ unmatched_rows.append(mindbody.iloc[idx])
38
+
39
+ # Create a DataFrame from the unmatched_rows list
40
+ unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
41
+
42
+ # Specify the columns to include in the output Excel file
43
+ columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location']
44
+
45
+ # Format the 'DOS' column to remove time part
46
+ unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
47
+
48
+ output_file_path = 'Comparison Results.xlsx'
49
+ unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
50
+
51
+ return output_file_path
52
+
53
+
54
+
55
+ def load_data(files):
56
+ # Check if a single file or multiple files are provided
57
+ filepaths = [file.name for file in files]
58
+
59
+ # Load and concatenate multiple files if provided
60
+ dfs = []
61
+ for filepath in filepaths:
62
+ if filepath.endswith('.xlsx') or filepath.endswith('.xls'):
63
+ dfs.append(pd.read_excel(filepath))
64
+ else:
65
+ raise gr.Error("Unsupported file format: Please provide a .xls or .xlsx file")
66
+
67
+ # Concatenate dataframes if more than one file is provided
68
+ if len(dfs) > 1:
69
+ df = pd.concat(dfs, ignore_index=True)
70
+ else:
71
+ df = dfs[0]
72
+
73
+ # Find and rename the date column to 'DOS'
74
+ date_column = find_date_column(df)
75
+ if date_column:
76
+ df.rename(columns={date_column: 'DOS'}, inplace=True)
77
+
78
+ # Find and rename the name column to 'Client'
79
+ name_column = find_name_column(df)
80
+ if name_column:
81
+ df.rename(columns={name_column: 'Client'}, inplace=True)
82
+
83
+ return df
84
+
85
+
86
+ def find_name_column(df):
87
+ name_pattern = r"^[A-Za-z'-]+,\s[A-Za-z'-]+(?:\s[A-Za-z'-]+)*$" # Regex pattern for last name, first name(s)
88
+
89
+ max_count = 0
90
+ name_column = None
91
+
92
+ for column in df.columns:
93
+ # Count matches of the name pattern in each column
94
+ matches = df[column].astype(str).apply(lambda x: bool(re.match(name_pattern, x)))
95
+ valid_count = matches.sum() # Sum of True values indicating valid names
96
+
97
+ # Select the column with the maximum count of valid names
98
+ if valid_count > max_count:
99
+ max_count = valid_count
100
+ name_column = column
101
+
102
+ return name_column
103
+
104
+
105
+ def find_date_column(df):
106
+ date_pattern = r"\b\d{2,4}[-/]\d{1,2}[-/]\d{2,4}\b" # Regex pattern for common date formats
107
+
108
+ max_count = 0
109
+ date_column = None
110
+
111
+ for column in df.columns:
112
+ # Count matches of the date pattern in each column
113
+ matches = df[column].astype(str).str.contains(date_pattern, na=False)
114
+ valid_count = matches.sum() # Sum of True values indicating valid dates
115
+
116
+ # Select the column with the maximum count of valid dates
117
+ if valid_count > max_count:
118
+ max_count = valid_count
119
+ date_column = column
120
+
121
+ return date_column