Alastair Jepps commited on
Commit
2ac1ad7
·
1 Parent(s): d12863a

type hinting

Browse files
Files changed (2) hide show
  1. environment.yml +0 -2
  2. index.py +33 -57
environment.yml CHANGED
@@ -4,7 +4,6 @@ channels:
4
  - defaults
5
  dependencies:
6
  - python-docx
7
- - pypdf2
8
  - python=3.11
9
  - gradio=4.29.0
10
  - python-dotenv
@@ -14,6 +13,5 @@ dependencies:
14
  - langchain
15
  - langsmith
16
  - langchainhub
17
- - pdfminer.six
18
  - pytesseract
19
  - pdf2image
 
4
  - defaults
5
  dependencies:
6
  - python-docx
 
7
  - python=3.11
8
  - gradio=4.29.0
9
  - python-dotenv
 
13
  - langchain
14
  - langsmith
15
  - langchainhub
 
16
  - pytesseract
17
  - pdf2image
index.py CHANGED
@@ -3,19 +3,17 @@ import os
3
  import json
4
  import time
5
  import io
 
6
  from dotenv import load_dotenv
7
  from docx import Document
8
- import PyPDF2
9
  from langchain_anthropic import ChatAnthropic
10
  from pdfminer.high_level import extract_text
11
  import re
12
  from langchain_core.output_parsers import StrOutputParser
13
- from langchain_core.prompts import ChatPromptTemplate
14
  from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
15
  from langchain import hub
16
  import pytesseract
17
  from pdf2image import convert_from_path
18
- import io
19
  import logging
20
 
21
  # Set up logging
@@ -25,17 +23,17 @@ logging.basicConfig(level=logging.ERROR)
25
  load_dotenv()
26
 
27
  try:
28
- model = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=os.getenv("ANTHROPIC_API_KEY"))
29
- hub_prompt = hub.pull("talent_assistant")
30
  except Exception as e:
31
  logging.error(f"Error initializing ChatAnthropic or pulling hub prompt: {str(e)}")
32
  model = None
33
  hub_prompt = None
34
 
35
- def check_password(username, password):
36
  return username == os.getenv("GRADIO_USERNAME") and password == os.getenv("GRADIO_PASSWORD")
37
 
38
- def extract_human_message_template(chat_prompt):
39
  try:
40
  for message in chat_prompt.messages:
41
  if isinstance(message, HumanMessagePromptTemplate):
@@ -44,7 +42,8 @@ def extract_human_message_template(chat_prompt):
44
  logging.error(f"Error extracting human message template: {str(e)}")
45
  return None
46
 
47
- def clean_bullet_points(text):
 
48
  try:
49
  text = re.sub(r'(?m)^e\s', '• ', text)
50
  text = re.sub(r'(?m)^eo\s', ' ◦ ', text)
@@ -53,12 +52,12 @@ def clean_bullet_points(text):
53
  logging.error(f"Error cleaning bullet points: {str(e)}")
54
  return text
55
 
56
- def pdf_to_text_ocr(file_path):
57
  try:
58
- images = convert_from_path(file_path)
59
- text = ""
60
  for image in images:
61
- page_text = pytesseract.image_to_string(image, config='--psm 6')
62
  try:
63
  page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8')
64
  except UnicodeEncodeError:
@@ -76,64 +75,41 @@ def pdf_to_text_ocr(file_path):
76
  text = ""
77
  return text
78
 
79
- def process_questions(*args):
80
  return "hubba hubba hubba"
81
 
82
- def process_match(*args):
83
  try:
84
  global hub_prompt
85
- prompt = extract_human_message_template(hub_prompt)
86
  if prompt:
87
  prompt.template = prompt.template.replace('{{CV}}', '{CV}')
88
  prompt.template = prompt.template.replace('{{JOB_DESCRIPTION}}', '{JOB_DESCRIPTION}')
89
 
90
  chain = prompt | model | StrOutputParser()
91
- response = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]})
92
  except Exception as e:
93
  logging.error(f"Error in process_match: {str(e)}")
94
  response = "An error occurred while processing the match."
95
  return response
96
 
97
- def pdf_to_text_miner(file_path):
98
- try:
99
- text = extract_text(file_path)
100
- text = re.sub(r'\n\s*\n', '\n\n', text)
101
- text = re.sub(r'([A-Z]+)(\n|.)*?:', r'\n\1:\n', text)
102
- text = text.strip()
103
- except Exception as e:
104
- logging.error(f"Error in pdf_to_text_miner: {str(e)}")
105
- text = ""
106
- return text
107
-
108
- def pdf_to_text(file_path):
109
- try:
110
- text = ""
111
- with open(file_path, "rb") as file:
112
- reader = PyPDF2.PdfFileReader(file)
113
- for page in range(reader.getNumPages()):
114
- text += reader.getPage(page).extract_text() + "\n"
115
- except Exception as e:
116
- logging.error(f"Error in pdf_to_text: {str(e)}")
117
- text = ""
118
- return text
119
-
120
- def wrapper_function(cv, jd):
121
  try:
122
- score = process_match(cv, jd)
123
- questions = process_questions(cv, jd)
124
  except Exception as e:
125
  logging.error(f"Error in wrapper_function: {str(e)}")
126
  score = "An error occurred while processing the match."
127
  questions = "An error occurred while generating questions."
128
  return score, questions
129
 
130
- def create_app():
131
  with gr.Blocks() as app:
132
  gr.Markdown("# Kingmakers Talent Prototype")
133
 
134
- active_tab = gr.State("CV/JD Match")
135
 
136
- def file_process(file):
137
  try:
138
  if file.endswith('.pdf'):
139
  return pdf_to_text_ocr(file)
@@ -143,7 +119,7 @@ def create_app():
143
  logging.error(f"Error in file_process: {str(e)}")
144
  return "An error occurred while processing the file."
145
 
146
- def update_active_tab(tab_name):
147
  return tab_name
148
 
149
  with gr.Tabs() as generation_mode_tabs:
@@ -152,20 +128,20 @@ def create_app():
152
  with gr.Column(scale=1):
153
  with gr.Tabs() as mode_tabs:
154
  with gr.TabItem("CV/JD Match") as text_to_image_tab:
155
- jd = gr.Textbox(label="Job Description")
156
- jd_file = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"])
157
- jd_file.change(fn=file_process, inputs=jd_file,outputs=jd)
158
 
159
- cv = gr.Textbox(label="CV")
160
- cv_file = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"])
161
- cv_file.change(fn=file_process,inputs=cv_file,outputs=cv)
162
 
163
- generate_btn = gr.Button("Generate")
164
 
165
  with gr.Column(scale=1):
166
- score = gr.Textbox(label="Score")
167
- questions = gr.Textbox(label="Questions")
168
- save_btn = gr.Button("Send to Greenhouse")
169
 
170
  generate_btn.click(
171
  fn=wrapper_function,
@@ -177,7 +153,7 @@ def create_app():
177
 
178
  if __name__ == "__main__":
179
  try:
180
- app = create_app()
181
  app.launch(debug=True) # auth=check_password Added share=True to create a public link
182
  except Exception as e:
183
  logging.error(f"Error launching the app: {str(e)}")
 
3
  import json
4
  import time
5
  import io
6
+ from typing import Tuple, Optional, List
7
  from dotenv import load_dotenv
8
  from docx import Document
 
9
  from langchain_anthropic import ChatAnthropic
10
  from pdfminer.high_level import extract_text
11
  import re
12
  from langchain_core.output_parsers import StrOutputParser
 
13
  from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
14
  from langchain import hub
15
  import pytesseract
16
  from pdf2image import convert_from_path
 
17
  import logging
18
 
19
  # Set up logging
 
23
  load_dotenv()
24
 
25
  try:
26
+ model: Optional[ChatAnthropic] = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=os.getenv("ANTHROPIC_API_KEY"))
27
+ hub_prompt: Optional[ChatPromptTemplate] = hub.pull("talent_assistant")
28
  except Exception as e:
29
  logging.error(f"Error initializing ChatAnthropic or pulling hub prompt: {str(e)}")
30
  model = None
31
  hub_prompt = None
32
 
33
+ def check_password(username: str, password: str) -> bool:
34
  return username == os.getenv("GRADIO_USERNAME") and password == os.getenv("GRADIO_PASSWORD")
35
 
36
+ def extract_human_message_template(chat_prompt: ChatPromptTemplate) -> Optional[HumanMessagePromptTemplate]:
37
  try:
38
  for message in chat_prompt.messages:
39
  if isinstance(message, HumanMessagePromptTemplate):
 
42
  logging.error(f"Error extracting human message template: {str(e)}")
43
  return None
44
 
45
+
46
+ def clean_bullet_points(text: str) -> str:
47
  try:
48
  text = re.sub(r'(?m)^e\s', '• ', text)
49
  text = re.sub(r'(?m)^eo\s', ' ◦ ', text)
 
52
  logging.error(f"Error cleaning bullet points: {str(e)}")
53
  return text
54
 
55
+ def pdf_to_text_ocr(file_path: str) -> str:
56
  try:
57
+ images: List[Image.Image] = convert_from_path(file_path)
58
+ text: str = ""
59
  for image in images:
60
+ page_text: str = pytesseract.image_to_string(image, config='--psm 6')
61
  try:
62
  page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8')
63
  except UnicodeEncodeError:
 
75
  text = ""
76
  return text
77
 
78
+ def process_questions(*args: str) -> str:
79
  return "hubba hubba hubba"
80
 
81
+ def process_match(*args: str) -> str:
82
  try:
83
  global hub_prompt
84
+ prompt: Optional[HumanMessagePromptTemplate] = extract_human_message_template(hub_prompt)
85
  if prompt:
86
  prompt.template = prompt.template.replace('{{CV}}', '{CV}')
87
  prompt.template = prompt.template.replace('{{JOB_DESCRIPTION}}', '{JOB_DESCRIPTION}')
88
 
89
  chain = prompt | model | StrOutputParser()
90
+ response: str = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]})
91
  except Exception as e:
92
  logging.error(f"Error in process_match: {str(e)}")
93
  response = "An error occurred while processing the match."
94
  return response
95
 
96
+ def wrapper_function(cv: str, jd: str) -> Tuple[str, str]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  try:
98
+ score: str = process_match(cv, jd)
99
+ questions: str = process_questions(cv, jd)
100
  except Exception as e:
101
  logging.error(f"Error in wrapper_function: {str(e)}")
102
  score = "An error occurred while processing the match."
103
  questions = "An error occurred while generating questions."
104
  return score, questions
105
 
106
+ def create_app() -> gr.Blocks:
107
  with gr.Blocks() as app:
108
  gr.Markdown("# Kingmakers Talent Prototype")
109
 
110
+ active_tab: gr.State = gr.State("CV/JD Match")
111
 
112
+ def file_process(file: str) -> str:
113
  try:
114
  if file.endswith('.pdf'):
115
  return pdf_to_text_ocr(file)
 
119
  logging.error(f"Error in file_process: {str(e)}")
120
  return "An error occurred while processing the file."
121
 
122
+ def update_active_tab(tab_name: str) -> str:
123
  return tab_name
124
 
125
  with gr.Tabs() as generation_mode_tabs:
 
128
  with gr.Column(scale=1):
129
  with gr.Tabs() as mode_tabs:
130
  with gr.TabItem("CV/JD Match") as text_to_image_tab:
131
+ jd: gr.Textbox = gr.Textbox(label="Job Description")
132
+ jd_file: gr.File = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"])
133
+ jd_file.change(fn=file_process, inputs=jd_file, outputs=jd)
134
 
135
+ cv: gr.Textbox = gr.Textbox(label="CV")
136
+ cv_file: gr.File = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"])
137
+ cv_file.change(fn=file_process, inputs=cv_file, outputs=cv)
138
 
139
+ generate_btn: gr.Button = gr.Button("Generate")
140
 
141
  with gr.Column(scale=1):
142
+ score: gr.Textbox = gr.Textbox(label="Score")
143
+ questions: gr.Textbox = gr.Textbox(label="Questions")
144
+ save_btn: gr.Button = gr.Button("Send to Greenhouse")
145
 
146
  generate_btn.click(
147
  fn=wrapper_function,
 
153
 
154
  if __name__ == "__main__":
155
  try:
156
+ app: gr.Blocks = create_app()
157
  app.launch(debug=True) # auth=check_password Added share=True to create a public link
158
  except Exception as e:
159
  logging.error(f"Error launching the app: {str(e)}")