import gradio as gr import os import json import time import io from typing import Tuple, Optional, List from dotenv import load_dotenv from docx import Document from langchain_anthropic import ChatAnthropic from pdfminer.high_level import extract_text import re from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate from langchain import hub import pytesseract from pdf2image import convert_from_path import logging # Set up logging logging.basicConfig(level=logging.ERROR) # Load environment variables from .env file load_dotenv() try: model: Optional[ChatAnthropic] = ChatAnthropic(model="claude-3-5-sonnet-20240620", api_key=os.getenv("ANTHROPIC_API_KEY")) hub_prompt: Optional[ChatPromptTemplate] = hub.pull("talent_assistant") except Exception as e: logging.error(f"Error initializing ChatAnthropic or pulling hub prompt: {str(e)}") model = None hub_prompt = None def check_password(username: str, password: str) -> bool: return username == os.getenv("GRADIO_USERNAME") and password == os.getenv("GRADIO_PASSWORD") def extract_human_message_template(chat_prompt: ChatPromptTemplate) -> Optional[HumanMessagePromptTemplate]: try: for message in chat_prompt.messages: if isinstance(message, HumanMessagePromptTemplate): return message.prompt except Exception as e: logging.error(f"Error extracting human message template: {str(e)}") return None def clean_bullet_points(text: str) -> str: try: text = re.sub(r'(?m)^e\s', '• ', text) text = re.sub(r'(?m)^eo\s', ' ◦ ', text) text = re.sub(r'(?m)^\+\s', '• ', text) except Exception as e: logging.error(f"Error cleaning bullet points: {str(e)}") return text def pdf_to_text_ocr(file_path: str) -> str: try: images: List[Image.Image] = convert_from_path(file_path) text: str = "" for image in images: page_text: str = pytesseract.image_to_string(image, config='--psm 6') try: page_text = page_text.encode('utf-8', errors='ignore').decode('utf-8') except UnicodeEncodeError: page_text = page_text.encode('iso-8859-1', errors='ignore').decode('iso-8859-1') text += page_text + "\n\n" text = text.replace('-\n', '') text = re.sub(r' +', ' ', text) text = re.sub(r'\n{3,}', '\n\n', text) text = re.sub(r'[^\x20-\x7E\n]', '', text) text = text.strip() text = clean_bullet_points(text) except Exception as e: logging.error(f"Error in pdf_to_text_ocr: {str(e)}") text = "" return text def process_questions(*args: str) -> str: return "hubba hubba hubba" def process_match(*args: str) -> str: try: global hub_prompt prompt: Optional[HumanMessagePromptTemplate] = extract_human_message_template(hub_prompt) if prompt: prompt.template = prompt.template.replace('{{CV}}', '{CV}') prompt.template = prompt.template.replace('{{JOB_DESCRIPTION}}', '{JOB_DESCRIPTION}') chain = prompt | model | StrOutputParser() response: str = chain.invoke({"JOB_DESCRIPTION": args[1], "CV": args[0]}) except Exception as e: logging.error(f"Error in process_match: {str(e)}") response = "An error occurred while processing the match." return response def wrapper_function(cv: str, jd: str) -> Tuple[str, str]: try: score: str = process_match(cv, jd) questions: str = process_questions(cv, jd) except Exception as e: logging.error(f"Error in wrapper_function: {str(e)}") score = "An error occurred while processing the match." questions = "An error occurred while generating questions." return score, questions def create_app() -> gr.Blocks: with gr.Blocks() as app: gr.Markdown("# Kingmakers Talent Prototype") active_tab: gr.State = gr.State("CV/JD Match") def file_process(file: str) -> str: try: if file.endswith('.pdf'): return pdf_to_text_ocr(file) else: return open(file, 'r').read() except Exception as e: logging.error(f"Error in file_process: {str(e)}") return "An error occurred while processing the file." def update_active_tab(tab_name: str) -> str: return tab_name with gr.Tabs() as generation_mode_tabs: with gr.TabItem("Generate"): with gr.Row(): with gr.Column(scale=1): with gr.Tabs() as mode_tabs: with gr.TabItem("CV/JD Match") as text_to_image_tab: jd: gr.Textbox = gr.Textbox(label="Job Description") jd_file: gr.File = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"]) jd_file.change(fn=file_process, inputs=jd_file, outputs=jd) cv: gr.Textbox = gr.Textbox(label="CV") cv_file: gr.File = gr.File(label=".pdf, .doc or .txt" , file_types=[".pdf", ".doc", ".txt"]) cv_file.change(fn=file_process, inputs=cv_file, outputs=cv) generate_btn: gr.Button = gr.Button("Generate") with gr.Column(scale=1): score: gr.Textbox = gr.Textbox(label="Score") questions: gr.Textbox = gr.Textbox(label="Questions") save_btn: gr.Button = gr.Button("Send to Greenhouse") generate_btn.click( fn=wrapper_function, inputs=[cv, jd], outputs=[score, questions] ) return app if __name__ == "__main__": try: app: gr.Blocks = create_app() app.launch(debug=True) # auth=check_password Added share=True to create a public link except Exception as e: logging.error(f"Error launching the app: {str(e)}")