Alealejandrooo
commited on
First commit
Browse files- app.py +99 -0
- process.py +121 -0
app.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import os
|
3 |
+
|
4 |
+
from process import process_data
|
5 |
+
|
6 |
+
def makeButtonClickableFiles(files):
|
7 |
+
"""Makes a button interactive only if all files in the list have correct extensions.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
files (list): List of uploaded file objects.
|
11 |
+
|
12 |
+
Returns:
|
13 |
+
_type_: Button state (interactive or not) and possibly a warning message.
|
14 |
+
"""
|
15 |
+
if not files:
|
16 |
+
return gr.Button(interactive=False)
|
17 |
+
|
18 |
+
allowed_extensions = ["xls", "xlsx"]
|
19 |
+
for file in files:
|
20 |
+
base_name = os.path.basename(file.name)
|
21 |
+
# Extract the file extension and check if it's in the allowed list.
|
22 |
+
if base_name.split('.')[-1].lower() not in allowed_extensions:
|
23 |
+
raise gr.Error(f"Unsupported file: {base_name}.Allowed extensions: .xls .xlsx")
|
24 |
+
|
25 |
+
return gr.Button(interactive=True)
|
26 |
+
|
27 |
+
|
28 |
+
# Define a Gradio interface
|
29 |
+
|
30 |
+
with gr.Blocks() as demo:
|
31 |
+
|
32 |
+
with gr.Row():
|
33 |
+
header = gr.Markdown(("<h1>MindBody VS. Medserv Checker </h1>"))
|
34 |
+
|
35 |
+
with gr.Row():
|
36 |
+
|
37 |
+
with gr.Column():
|
38 |
+
file_uploader_mindbody = gr.Files(
|
39 |
+
label=("Upload MindBody"),
|
40 |
+
file_count="multiple",
|
41 |
+
file_types=[".xlsx", '.xls'],
|
42 |
+
container=True,
|
43 |
+
interactive=True,
|
44 |
+
scale=1,
|
45 |
+
)
|
46 |
+
|
47 |
+
|
48 |
+
with gr.Column():
|
49 |
+
file_uploader_medserv = gr.Files(
|
50 |
+
label=("Upload Medserv"),
|
51 |
+
file_count= "multiple",
|
52 |
+
file_types=[".xlsx", '.xls'],
|
53 |
+
container=True,
|
54 |
+
interactive=True,
|
55 |
+
scale=1,
|
56 |
+
)
|
57 |
+
|
58 |
+
with gr.Row():
|
59 |
+
tollerance = gr.Slider(0, 7, value = 1, step = 1, interactive = True, label="Days Tollerance",
|
60 |
+
info="Set the number of days of tolerance to match the sale dates between MindBody and Medserve (0 = no tolerance / exact match).")
|
61 |
+
|
62 |
+
with gr.Row():
|
63 |
+
|
64 |
+
file_process_button = gr.Button(
|
65 |
+
value="PROCESS FILES",
|
66 |
+
interactive=False,
|
67 |
+
)
|
68 |
+
|
69 |
+
with gr.Row():
|
70 |
+
processed_file = gr.Files(
|
71 |
+
label=("Output File"),
|
72 |
+
file_count="single",
|
73 |
+
interactive=False,
|
74 |
+
elem_classes="gradio-file",
|
75 |
+
)
|
76 |
+
|
77 |
+
|
78 |
+
|
79 |
+
|
80 |
+
file_uploader_mindbody.change(
|
81 |
+
fn=makeButtonClickableFiles,
|
82 |
+
inputs=[file_uploader_mindbody],
|
83 |
+
outputs=[file_process_button])
|
84 |
+
|
85 |
+
|
86 |
+
file_uploader_medserv.change(
|
87 |
+
fn=makeButtonClickableFiles,
|
88 |
+
inputs=[file_uploader_medserv],
|
89 |
+
outputs=[file_process_button])
|
90 |
+
|
91 |
+
|
92 |
+
file_process_button.click(
|
93 |
+
fn = process_data,
|
94 |
+
inputs = [file_uploader_mindbody, file_uploader_medserv, tollerance],
|
95 |
+
outputs = processed_file)
|
96 |
+
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
demo.queue().launch()
|
process.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import gradio as gr
|
3 |
+
import re
|
4 |
+
from datetime import timedelta
|
5 |
+
|
6 |
+
|
7 |
+
def process_data(files_mindbody, files_medserv, tollerance, progress=gr.Progress()):
|
8 |
+
|
9 |
+
mindbody = load_data(files_mindbody)
|
10 |
+
medserv = load_data(files_medserv)
|
11 |
+
|
12 |
+
# Split 'Client' names into first name and last name components for both DataFrames
|
13 |
+
medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
|
14 |
+
mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
|
15 |
+
# Initialize an empty list to store unmatched rows
|
16 |
+
unmatched_rows = []
|
17 |
+
|
18 |
+
rows = len(mindbody)
|
19 |
+
|
20 |
+
# Iterate through each row in the mindbody DataFrame
|
21 |
+
for idx in progress.tqdm(range(rows), desc='Analyzing files...'):
|
22 |
+
# Extract relevant information from the current row
|
23 |
+
date = mindbody.iloc[idx]['DOS']
|
24 |
+
first_name = mindbody.iloc[idx]['First Name']
|
25 |
+
last_name = mindbody.iloc[idx]['Last Name']
|
26 |
+
|
27 |
+
# Define the range of dates to search for a match in medserv
|
28 |
+
date_range = [date - timedelta(days= tollerance), date, date + timedelta(days=tollerance)]
|
29 |
+
|
30 |
+
# Filter medserv based on the date range and name criteria
|
31 |
+
matches = medserv[((medserv['DOS'].isin(date_range)) &
|
32 |
+
((medserv['First Name'] == first_name) |
|
33 |
+
(medserv['Last Name'] == last_name)))]
|
34 |
+
|
35 |
+
# If no match is found, append the row to the unmatched_rows list
|
36 |
+
if matches.empty:
|
37 |
+
unmatched_rows.append(mindbody.iloc[idx])
|
38 |
+
|
39 |
+
# Create a DataFrame from the unmatched_rows list
|
40 |
+
unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
|
41 |
+
|
42 |
+
# Specify the columns to include in the output Excel file
|
43 |
+
columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location']
|
44 |
+
|
45 |
+
# Format the 'DOS' column to remove time part
|
46 |
+
unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
|
47 |
+
|
48 |
+
output_file_path = 'Comparison Results.xlsx'
|
49 |
+
unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
|
50 |
+
|
51 |
+
return output_file_path
|
52 |
+
|
53 |
+
|
54 |
+
|
55 |
+
def load_data(files):
|
56 |
+
# Check if a single file or multiple files are provided
|
57 |
+
filepaths = [file.name for file in files]
|
58 |
+
|
59 |
+
# Load and concatenate multiple files if provided
|
60 |
+
dfs = []
|
61 |
+
for filepath in filepaths:
|
62 |
+
if filepath.endswith('.xlsx') or filepath.endswith('.xls'):
|
63 |
+
dfs.append(pd.read_excel(filepath))
|
64 |
+
else:
|
65 |
+
raise gr.Error("Unsupported file format: Please provide a .xls or .xlsx file")
|
66 |
+
|
67 |
+
# Concatenate dataframes if more than one file is provided
|
68 |
+
if len(dfs) > 1:
|
69 |
+
df = pd.concat(dfs, ignore_index=True)
|
70 |
+
else:
|
71 |
+
df = dfs[0]
|
72 |
+
|
73 |
+
# Find and rename the date column to 'DOS'
|
74 |
+
date_column = find_date_column(df)
|
75 |
+
if date_column:
|
76 |
+
df.rename(columns={date_column: 'DOS'}, inplace=True)
|
77 |
+
|
78 |
+
# Find and rename the name column to 'Client'
|
79 |
+
name_column = find_name_column(df)
|
80 |
+
if name_column:
|
81 |
+
df.rename(columns={name_column: 'Client'}, inplace=True)
|
82 |
+
|
83 |
+
return df
|
84 |
+
|
85 |
+
|
86 |
+
def find_name_column(df):
|
87 |
+
name_pattern = r"^[A-Za-z'-]+,\s[A-Za-z'-]+(?:\s[A-Za-z'-]+)*$" # Regex pattern for last name, first name(s)
|
88 |
+
|
89 |
+
max_count = 0
|
90 |
+
name_column = None
|
91 |
+
|
92 |
+
for column in df.columns:
|
93 |
+
# Count matches of the name pattern in each column
|
94 |
+
matches = df[column].astype(str).apply(lambda x: bool(re.match(name_pattern, x)))
|
95 |
+
valid_count = matches.sum() # Sum of True values indicating valid names
|
96 |
+
|
97 |
+
# Select the column with the maximum count of valid names
|
98 |
+
if valid_count > max_count:
|
99 |
+
max_count = valid_count
|
100 |
+
name_column = column
|
101 |
+
|
102 |
+
return name_column
|
103 |
+
|
104 |
+
|
105 |
+
def find_date_column(df):
|
106 |
+
date_pattern = r"\b\d{2,4}[-/]\d{1,2}[-/]\d{2,4}\b" # Regex pattern for common date formats
|
107 |
+
|
108 |
+
max_count = 0
|
109 |
+
date_column = None
|
110 |
+
|
111 |
+
for column in df.columns:
|
112 |
+
# Count matches of the date pattern in each column
|
113 |
+
matches = df[column].astype(str).str.contains(date_pattern, na=False)
|
114 |
+
valid_count = matches.sum() # Sum of True values indicating valid dates
|
115 |
+
|
116 |
+
# Select the column with the maximum count of valid dates
|
117 |
+
if valid_count > max_count:
|
118 |
+
max_count = valid_count
|
119 |
+
date_column = column
|
120 |
+
|
121 |
+
return date_column
|