Hemasagar commited on
Commit
8512dad
·
verified ·
1 Parent(s): 127478b

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +25 -25
utils.py CHANGED
@@ -48,38 +48,38 @@ def create_docs(user_pdf_list):
48
 
49
 
50
 
51
- for filename in user_pdf_list:
52
 
53
- print(filename)
54
- raw_data=get_pdf_text(filename)
55
- print("pdf_Data",raw_data)
56
- # print("extracted raw data")
57
 
58
- llm_extracted_data=extracted_data(raw_data)
59
- print("llm_extracted_data",llm_extracted_data)
60
- #print(llm_extracted_data)
61
- #print("llm extracted data")
62
- #Adding items to our list - Adding data & its metadata
63
 
64
- pattern = r'{(.+)}'
65
- match = re.search(pattern, llm_extracted_data, re.DOTALL)
66
 
67
-
68
 
69
- if match:
70
- extracted_text = match.group(1)
71
- # Converting the extracted text to a dictionary
72
- data_dict = eval('{' + extracted_text + '}')
73
- print(data_dict)
74
- else:
75
- print("No match found.")
76
- # Initialize data_dict
77
- data_dict = {}
78
 
79
 
80
- df=df.append([data_dict], ignore_index=True)
81
  print("********************DONE***************")
82
  # df=df.append(save_to_dataframe(llm_extracted_data), ignore_index=True)
83
 
84
- df.head()
85
- return df
 
48
 
49
 
50
 
51
+ # for filename in user_pdf_list:
52
 
53
+ print(filename)
54
+ raw_data=get_pdf_text(filename)
55
+ print("pdf_Data",raw_data)
56
+ # print("extracted raw data")
57
 
58
+ llm_extracted_data=extracted_data(raw_data)
59
+ print("llm_extracted_data",llm_extracted_data)
60
+ #print(llm_extracted_data)
61
+ #print("llm extracted data")
62
+ #Adding items to our list - Adding data & its metadata
63
 
64
+ pattern = r'{(.+)}'
65
+ match = re.search(pattern, llm_extracted_data, re.DOTALL)
66
 
67
+
68
 
69
+ if match:
70
+ extracted_text = match.group(1)
71
+ # Converting the extracted text to a dictionary
72
+ data_dict = eval('{' + extracted_text + '}')
73
+ print(data_dict)
74
+ else:
75
+ print("No match found.")
76
+ # Initialize data_dict
77
+ data_dict = {}
78
 
79
 
80
+ # df=df.append([data_dict], ignore_index=True)
81
  print("********************DONE***************")
82
  # df=df.append(save_to_dataframe(llm_extracted_data), ignore_index=True)
83
 
84
+ llm_extracted_data
85
+ return llm_extracted_data