Spaces:

amj808
/

talent

Sleeping

App Files Files Community

Alastair Jepps commited on Jul 21, 2024

Commit

2ac1ad7

1 Parent(s): d12863a

type hinting

Browse files

Files changed (2) hide show

environment.yml +0 -2
index.py +33 -57

environment.yml CHANGED Viewed

@@ -4,7 +4,6 @@ channels:
   - defaults
 dependencies:
   - python-docx
-  - pypdf2
   - python=3.11
   - gradio=4.29.0
   - python-dotenv
@@ -14,6 +13,5 @@ dependencies:
     - langchain
     - langsmith
     - langchainhub
-    - pdfminer.six
     - pytesseract
     - pdf2image

   - defaults
 dependencies:
   - python-docx
   - python=3.11
   - gradio=4.29.0
   - python-dotenv
     - langchain
     - langsmith
     - langchainhub
     - pytesseract
     - pdf2image

index.py CHANGED Viewed

@@ -3,19 +3,17 @@ import os
 import json
 import time
 import io
 from dotenv import load_dotenv
 from docx import Document
-import PyPDF2
 from langchain_anthropic import ChatAnthropic
 from pdfminer.high_level import extract_text
 import re
 from langchain_core.output_parsers import StrOutputParser
-from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
 from langchain import hub
 import pytesseract
 from pdf2image import convert_from_path
-import io
 import logging
 # Set up logging
@@ -25,17 +23,17 @@ logging.basicConfig(level=logging.ERROR)
 load_dotenv()
 try:
-    model = ChatAnthropic(model="claude-3-5-sonnet-20240620",  api_key=os.getenv("ANTHROPIC_API_KEY"))
-    hub_prompt = hub.pull("talent_assistant")
 except Exception as e:
     logging.error(f"Error initializing ChatAnthropic or pulling hub prompt: {str(e)}")
     model = None
     hub_prompt = None
-def check_password(username, password):
     return username == os.getenv("GRADIO_USERNAME") and password == os.getenv("GRADIO_PASSWORD")
-def extract_human_message_template(chat_prompt):
     try:
         for message in chat_prompt.messages:
             if isinstance(message, HumanMessagePromptTemplate):
@@ -44,7 +42,8 @@ def extract_human_message_template(chat_prompt):
         logging.error(f"Error extracting human message template: {str(e)}")
     return None
-def clean_bullet_points(text):
     try:
         text = re.sub(r'(?m)^e\s', '• ', text)
         text = re.sub(r'(?m)^eo\s', '  ◦ ', text)
@@ -53,12 +52,12 @@ def clean_bullet_points(text):
         logging.error(f"Error cleaning bullet points: {str(e)}")
     return text
-def pdf_to_text_ocr(file_path):
     try:
-        images = convert_from_path(file_path)
-        text = ""
         for image in images:
-            page_text = pytesseract.image_to_string(image, config='--psm 6')
             try:
                 page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8')
             except UnicodeEncodeError:
@@ -76,64 +75,41 @@ def pdf_to_text_ocr(file_path):
         text = ""
     return text
-def process_questions(*args):
     return "hubba hubba hubba"
-def process_match(*args):
     try:
         global hub_prompt
-        prompt = extract_human_message_template(hub_prompt)
         if prompt:
             prompt.template = prompt.template.replace('{{CV}}', '{CV}')
             prompt.template = prompt.template.replace('{{JOB_DESCRIPTION}}', '{JOB_DESCRIPTION}')
         chain = prompt | model | StrOutputParser()
-        response = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]})
     except Exception as e:
         logging.error(f"Error in process_match: {str(e)}")
         response = "An error occurred while processing the match."
     return response
-def pdf_to_text_miner(file_path):
-    try:
-        text = extract_text(file_path)
-        text = re.sub(r'\n\s*\n', '\n\n', text)
-        text = re.sub(r'([A-Z]+)(\n|.)*?:', r'\n\1:\n', text)
-        text = text.strip()
-    except Exception as e:
-        logging.error(f"Error in pdf_to_text_miner: {str(e)}")
-        text = ""
-    return text
-def pdf_to_text(file_path):
-    try:
-        text = ""
-        with open(file_path, "rb") as file:
-            reader = PyPDF2.PdfFileReader(file)
-            for page in range(reader.getNumPages()):
-                text += reader.getPage(page).extract_text() + "\n"
-    except Exception as e:
-        logging.error(f"Error in pdf_to_text: {str(e)}")
-        text = ""
-    return text
-def wrapper_function(cv, jd):
     try:
-        score = process_match(cv, jd)
-        questions = process_questions(cv, jd)
     except Exception as e:
         logging.error(f"Error in wrapper_function: {str(e)}")
         score = "An error occurred while processing the match."
         questions = "An error occurred while generating questions."
     return score, questions
-def create_app():
     with gr.Blocks() as app:
         gr.Markdown("# Kingmakers Talent Prototype")
-        active_tab = gr.State("CV/JD Match")
-        def file_process(file):
             try:
                 if file.endswith('.pdf'):
                     return pdf_to_text_ocr(file)
@@ -143,7 +119,7 @@ def create_app():
                 logging.error(f"Error in file_process: {str(e)}")
                 return "An error occurred while processing the file."
-        def update_active_tab(tab_name):
             return tab_name
         with gr.Tabs() as generation_mode_tabs:
@@ -152,20 +128,20 @@ def create_app():
                     with gr.Column(scale=1):
                         with gr.Tabs() as mode_tabs:
                             with gr.TabItem("CV/JD Match") as text_to_image_tab:
-                                jd = gr.Textbox(label="Job Description")
-                                jd_file = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"])
-                                jd_file.change(fn=file_process, inputs=jd_file,outputs=jd)
-                                cv = gr.Textbox(label="CV")
-                                cv_file = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"])
-                                cv_file.change(fn=file_process,inputs=cv_file,outputs=cv)
-                        generate_btn = gr.Button("Generate")
                     with gr.Column(scale=1):
-                        score = gr.Textbox(label="Score")
-                        questions = gr.Textbox(label="Questions")
-                        save_btn = gr.Button("Send to Greenhouse")
                 generate_btn.click(
                     fn=wrapper_function,
@@ -177,7 +153,7 @@ def create_app():
 if __name__ == "__main__":
     try:
-        app = create_app()
         app.launch(debug=True)  # auth=check_password Added share=True to create a public link
     except Exception as e:
         logging.error(f"Error launching the app: {str(e)}")

 import json
 import time
 import io
+from typing import Tuple, Optional, List
 from dotenv import load_dotenv
 from docx import Document
 from langchain_anthropic import ChatAnthropic
 from pdfminer.high_level import extract_text
 import re
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate
 from langchain import hub
 import pytesseract
 from pdf2image import convert_from_path
 import logging
 # Set up logging
 load_dotenv()
 try:
+    model: Optional[ChatAnthropic] = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=os.getenv("ANTHROPIC_API_KEY"))
+    hub_prompt: Optional[ChatPromptTemplate] = hub.pull("talent_assistant")
 except Exception as e:
     logging.error(f"Error initializing ChatAnthropic or pulling hub prompt: {str(e)}")
     model = None
     hub_prompt = None
+def check_password(username: str, password: str) -> bool:
     return username == os.getenv("GRADIO_USERNAME") and password == os.getenv("GRADIO_PASSWORD")
+def extract_human_message_template(chat_prompt: ChatPromptTemplate) -> Optional[HumanMessagePromptTemplate]:
     try:
         for message in chat_prompt.messages:
             if isinstance(message, HumanMessagePromptTemplate):
         logging.error(f"Error extracting human message template: {str(e)}")
     return None
+def clean_bullet_points(text: str) -> str:
     try:
         text = re.sub(r'(?m)^e\s', '• ', text)
         text = re.sub(r'(?m)^eo\s', '  ◦ ', text)
         logging.error(f"Error cleaning bullet points: {str(e)}")
     return text
+def pdf_to_text_ocr(file_path: str) -> str:
     try:
+        images: List[Image.Image] = convert_from_path(file_path)
+        text: str = ""
         for image in images:
+            page_text: str = pytesseract.image_to_string(image, config='--psm 6')
             try:
                 page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8')
             except UnicodeEncodeError:
         text = ""
     return text
+def process_questions(*args: str) -> str:
     return "hubba hubba hubba"
+def process_match(*args: str) -> str:
     try:
         global hub_prompt
+        prompt: Optional[HumanMessagePromptTemplate] = extract_human_message_template(hub_prompt)
         if prompt:
             prompt.template = prompt.template.replace('{{CV}}', '{CV}')
             prompt.template = prompt.template.replace('{{JOB_DESCRIPTION}}', '{JOB_DESCRIPTION}')
         chain = prompt | model | StrOutputParser()
+        response: str = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]})
     except Exception as e:
         logging.error(f"Error in process_match: {str(e)}")
         response = "An error occurred while processing the match."
     return response
+def wrapper_function(cv: str, jd: str) -> Tuple[str, str]:
     try:
+        score: str = process_match(cv, jd)
+        questions: str = process_questions(cv, jd)
     except Exception as e:
         logging.error(f"Error in wrapper_function: {str(e)}")
         score = "An error occurred while processing the match."
         questions = "An error occurred while generating questions."
     return score, questions
+def create_app() -> gr.Blocks:
     with gr.Blocks() as app:
         gr.Markdown("# Kingmakers Talent Prototype")
+        active_tab: gr.State = gr.State("CV/JD Match")
+        def file_process(file: str) -> str:
             try:
                 if file.endswith('.pdf'):
                     return pdf_to_text_ocr(file)
                 logging.error(f"Error in file_process: {str(e)}")
                 return "An error occurred while processing the file."
+        def update_active_tab(tab_name: str) -> str:
             return tab_name
         with gr.Tabs() as generation_mode_tabs:
                     with gr.Column(scale=1):
                         with gr.Tabs() as mode_tabs:
                             with gr.TabItem("CV/JD Match") as text_to_image_tab:
+                                jd: gr.Textbox = gr.Textbox(label="Job Description")
+                                jd_file: gr.File = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"])
+                                jd_file.change(fn=file_process, inputs=jd_file, outputs=jd)
+                                cv: gr.Textbox = gr.Textbox(label="CV")
+                                cv_file: gr.File = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"])
+                                cv_file.change(fn=file_process, inputs=cv_file, outputs=cv)
+                        generate_btn: gr.Button = gr.Button("Generate")
                     with gr.Column(scale=1):
+                        score: gr.Textbox = gr.Textbox(label="Score")
+                        questions: gr.Textbox = gr.Textbox(label="Questions")
+                        save_btn: gr.Button = gr.Button("Send to Greenhouse")
                 generate_btn.click(
                     fn=wrapper_function,
 if __name__ == "__main__":
     try:
+        app: gr.Blocks = create_app()
         app.launch(debug=True)  # auth=check_password Added share=True to create a public link
     except Exception as e:
         logging.error(f"Error launching the app: {str(e)}")