Alealejandrooo commited on
Commit
62af0a0
·
verified ·
1 Parent(s): 3cd2d59

updated process.py

Browse files
Files changed (1) hide show
  1. process.py +78 -56
process.py CHANGED
@@ -4,71 +4,93 @@ import re
4
  from datetime import timedelta
5
 
6
 
7
- def process_data(files_mindbody, files_medserv, tollerance, progress=gr.Progress()):
8
-
9
- mindbody = load_data(files_mindbody)
10
- medserv = load_data(files_medserv)
11
-
12
- # Split 'Client' names into first name and last name components for both DataFrames
13
- medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
14
- mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
15
-
16
- mindbody['DOS'] = pd.to_datetime(mindbody['DOS'], format='%d/%m/%Y')
17
-
18
-
19
- # Split dates if they contain commas in the 'DOS' column of medserv
20
- medserv['DOS'] = medserv['DOS'].astype(str)
21
- medserv['DOS'] = medserv['DOS'].str.split(',')
22
- medserv = medserv.explode('DOS')
23
-
24
- # Attempt to convert dates using multiple formats
25
- formats_to_try = ['%d/%m/%Y', '%Y-%m-%d'] # Add more formats as needed
26
- for format_to_try in formats_to_try:
27
- try:
28
- medserv['DOS'] = pd.to_datetime(medserv['DOS'].str.strip(), format=format_to_try)
29
- break # Break out of loop if conversion succeeds
30
- except ValueError:
31
- continue # Continue to next format if conversion fails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- # Initialize an empty list to store unmatched rows
34
  unmatched_rows = []
35
 
36
- rows = len(mindbody)
 
37
 
38
- # Iterate through each row in the mindbody DataFrame
39
- for idx in progress.tqdm(range(rows), desc='Analyzing files...'):
40
- # Extract relevant information from the current row
41
- date = mindbody.iloc[idx]['DOS']
42
- first_name = mindbody.iloc[idx]['First Name']
43
- last_name = mindbody.iloc[idx]['Last Name']
44
-
45
- # Define the range of dates to search for a match in medserv
46
- date_range = [date - timedelta(days=i) for i in range(tollerance, -tollerance-1, -1)]
47
- # Remove the time component from the dates in date_range
48
- date_range = [d.date() for d in date_range]
49
-
50
- # Filter medserv based on the date range and name criteria
51
- matches = medserv[((medserv['DOS'].dt.date.isin(date_range)) &
52
- ((medserv['First Name'].str.lower() == first_name.lower()) |
53
- (medserv['Last Name'].str.lower() == last_name.lower())))]
54
-
55
- # If no match is found, append the row to the unmatched_rows list
56
- if matches.empty:
57
- unmatched_rows.append(mindbody.iloc[idx])
58
 
59
- # Create a DataFrame from the unmatched_rows list
60
- unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
 
 
61
 
62
- # Specify the columns to include in the output Excel file
63
- columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location', 'Item Total']
 
 
 
64
 
65
- # Format the 'DOS' column to remove time part
66
- unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
 
67
 
68
- output_file_path = 'Comparison Results.xlsx'
69
- unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
70
 
71
- return output_file_path
 
 
 
 
 
 
 
 
 
72
 
73
 
74
 
 
4
  from datetime import timedelta
5
 
6
 
7
+ def process_data(files_mindbody, files_medserv, tolerance, progress=gr.tqdm):
8
+
9
+ try:
10
+ mindbody = load_data(files_mindbody)
11
+ medserv = load_data(files_medserv)
12
+ except Exception as e:
13
+ print(f"An error occurred while loading data: {e}")
14
+ return None
15
+
16
+ try:
17
+ # Remove multiple commas from the 'Client' column
18
+ medserv['Client'] = medserv['Client'].str.replace(r',+', ',', regex=True)
19
+ mindbody['Client'] = mindbody['Client'].str.replace(r',+', ',', regex=True)
20
+
21
+ # Split 'Client' names into first name and last name components for both DataFrames
22
+ medserv[['Last Name', 'First Name']] = medserv['Client'].str.split(',', expand=True)
23
+ mindbody[['Last Name', 'First Name']] = mindbody['Client'].str.split(',', expand=True)
24
+ except Exception as e:
25
+ print(f"An error occurred while processing client names: {e}")
26
+
27
+ try:
28
+ mindbody['DOS'] = pd.to_datetime(mindbody['DOS'], format='%d/%m/%Y')
29
+ except Exception as e:
30
+ print(f"An error occurred while converting dates in mindbody: {e}")
31
+
32
+ try:
33
+ # Split dates if they contain commas in the 'DOS' column of medserv
34
+ medserv['DOS'] = medserv['DOS'].astype(str)
35
+ medserv['DOS'] = medserv['DOS'].str.split(',')
36
+ medserv = medserv.explode('DOS')
37
+
38
+ # Attempt to convert dates using multiple formats
39
+ formats_to_try = ['%d/%m/%Y', '%Y-%m-%d'] # Add more formats as needed
40
+ for format_to_try in formats_to_try:
41
+ try:
42
+ medserv['DOS'] = pd.to_datetime(medserv['DOS'].str.strip(), format=format_to_try)
43
+ break # Break out of loop if conversion succeeds
44
+ except ValueError:
45
+ continue # Continue to next format if conversion fails
46
+ except Exception as e:
47
+ print(f"An error occurred while processing dates in medserv: {e}")
48
 
 
49
  unmatched_rows = []
50
 
51
+ try:
52
+ rows = len(mindbody)
53
 
54
+ # Iterate through each row in the mindbody DataFrame
55
+ for idx in progress(range(rows), desc='Analyzing files...'):
56
+ # Extract relevant information from the current row
57
+ date = mindbody.iloc[idx]['DOS']
58
+ first_name = mindbody.iloc[idx]['First Name']
59
+ last_name = mindbody.iloc[idx]['Last Name']
60
+
61
+ # Define the range of dates to search for a match in medserv
62
+ date_range = [date - timedelta(days=i) for i in range(tolerance, -tolerance-1, -1)]
63
+ # Remove the time component from the dates in date_range
64
+ date_range = [d.date() for d in date_range]
 
 
 
 
 
 
 
 
 
65
 
66
+ # Filter medserv based on the date range and name criteria
67
+ matches = medserv[((medserv['DOS'].dt.date.isin(date_range)) &
68
+ ((medserv['First Name'].str.lower() == first_name.lower()) |
69
+ (medserv['Last Name'].str.lower() == last_name.lower())))]
70
 
71
+ # If no match is found, append the row to the unmatched_rows list
72
+ if matches.empty:
73
+ unmatched_rows.append(mindbody.iloc[idx])
74
+ except Exception as e:
75
+ print(f"An error occurred while analyzing files: {e}")
76
 
77
+ try:
78
+ # Create a DataFrame from the unmatched_rows list
79
+ unmatched_df = pd.DataFrame(unmatched_rows, columns=mindbody.columns)
80
 
81
+ # Specify the columns to include in the output Excel file
82
+ columns_to_include = ['DOS', 'Client ID', 'Client', 'Sale ID', 'Item name', 'Location', 'Item Total']
83
 
84
+ # Format the 'DOS' column to remove time part
85
+ unmatched_df['DOS'] = unmatched_df['DOS'].dt.strftime('%d-%m-%Y')
86
+
87
+ output_file_path = 'Comparison Results.xlsx'
88
+ unmatched_df[columns_to_include].to_excel(output_file_path, index=False)
89
+
90
+ return output_file_path
91
+ except Exception as e:
92
+ print(f"An error occurred while creating the output file: {e}")
93
+ return None
94
 
95
 
96