Spaces:

DreamStream-1
/

HR-1

Sleeping

App Files Files Community

DreamStream-1 commited on Oct 24, 2024

Commit

95ac724

verified ·

1 Parent(s): 1991a27

Update app.py

Browse files

Files changed (1) hide show

app.py +99 -90

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
 import os
 import re
-import io
 from datetime import datetime
-import PyPDF2
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
 from groq import Groq
 import gradio as gr
 from docxtpl import DocxTemplate
@@ -48,49 +47,64 @@ def extract_skills_llama(text):
     except Exception as e:
         raise RuntimeError(f"Error during skill extraction: {e}")
-# --- Job Description Processing --- #
-def process_job_description(job_description_text):
-    """Processes the job description text and extracts relevant skills."""
-    job_description_text = preprocess_text(job_description_text)
-    return extract_skills_llama(job_description_text)
-# --- Text Preprocessing --- #
-def preprocess_text(text):
-    """Preprocesses text for analysis (lowercase, punctuation removal)."""
-    text = text.lower()
-    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
-    return re.sub(r'\s+', ' ', text).strip()  # Remove extra whitespace
-# --- Resume Similarity Calculation --- #
-def calculate_resume_similarity(resume_text, job_description_text):
-    """Calculates similarity score between resume and job description using a sentence transformer model."""
-    model_name = "cross-encoder/stsb-roberta-base"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForSequenceClassification.from_pretrained(model_name)
-    inputs = tokenizer(resume_text, job_description_text, return_tensors="pt", padding=True, truncation=True)
-    with torch.no_grad():
-        outputs = model(**inputs)
-        similarity_score = torch.sigmoid(outputs.logits).item()
-    return similarity_score
-# --- Communication Generation --- #
-def communication_generator(resume_skills, job_description_skills, similarity_score, max_length=150):
-    """Generates a communication response based on the extracted skills from the resume and job description."""
     model_name = "google/flan-t5-base"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-    # Assess candidate fit based on similarity score
-    fit_status = "fit for the job" if similarity_score >= 0.7 else "not a fit for the job"
-    # Create a more detailed communication message
     message = (
-        f"After a thorough review of the candidate's resume, we found a significant alignment "
-        f"between their skills and the job description requirements. The candidate possesses the following "
-        f"key skills: {', '.join(resume_skills)}. These align well with the job requirements, particularly in areas such as "
-        f"{', '.join(job_description_skills)}. The candidate’s diverse expertise suggests they would make a valuable addition to our team. "
-        f"We believe the candidate is {fit_status}. If further evaluation is needed, please let us know how we can assist."
     )
     inputs = tokenizer(message, return_tensors="pt", padding=True, truncation=True)
@@ -111,70 +125,65 @@ def sentiment_analysis(text):
         predicted_sentiment = torch.argmax(outputs.logits).item()
     return ["Negative", "Neutral", "Positive"][predicted_sentiment]
-# --- Resume Analysis Function --- #
 def analyze_resume(resume_file, job_description_file):
-    """Analyzes the resume and job description, returning similarity score, skills, and communication response."""
-    # Extract resume text based on file type
     try:
         resume_text = extract_text_from_file(resume_file.name)
         job_description_text = extract_text_from_file(job_description_file.name)
     except ValueError as ve:
         return str(ve)
-    # Analyze texts
-    job_description_skills = process_job_description(job_description_text)
     resume_skills = extract_skills_llama(resume_text)
-    similarity_score = calculate_resume_similarity(resume_text, job_description_text)
-    communication_response = communication_generator(resume_skills, job_description_skills, similarity_score)
     sentiment = sentiment_analysis(resume_text)
     return (
-        f"Similarity Score: {similarity_score:.2f}",
         communication_response,
         f"Sentiment: {sentiment}",
-        ", ".join(resume_skills),
-        ", ".join(job_description_skills),
     )
-# --- Offer Letter Generation --- #
-def generate_offer_letter(template_file, candidate_name, role, start_date, hours):
-    """Generates an offer letter from a template."""
-    try:
-        start_date = datetime.strptime(start_date, "%Y-%m-%d").strftime("%B %d, %Y")
-    except ValueError:
-        return "Invalid date format. Please use YYYY-MM-DD."
-    context = {
-        'candidate_name': candidate_name,
-        'role': role,
-        'start_date': start_date,
-        'hours': hours
-    }
-    doc = DocxTemplate(template_file)
-    doc.render(context)
-    offer_letter_path = f"{candidate_name.replace(' ', '_')}_offer_letter.docx"
-    doc.save(offer_letter_path)
-    return offer_letter_path
 # --- Gradio Interface --- #
-iface = gr.Interface(
-    fn=analyze_resume,
-    inputs=[
-        gr.File(label="Upload Resume (PDF/TXT)"),
-        gr.File(label="Upload Job Description (PDF/TXT)")
-    ],
-    outputs=[
-        gr.Textbox(label="Similarity Score"),
-        gr.Textbox(label="Communication Response"),
-        gr.Textbox(label="Sentiment Analysis"),
-        gr.Textbox(label="Extracted Resume Skills"),
-        gr.Textbox(label="Extracted Job Description Skills"),
-    ],
-    title="Resume and Job Description Analyzer",
-    description="This tool analyzes a resume against a job description to extract skills, calculate similarity, and generate communication responses."
-)
-iface.launch()

 import os
 import re
 from datetime import datetime
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForSeq2SeqLM
+from sentence_transformers import SentenceTransformer, util
 from groq import Groq
 import gradio as gr
 from docxtpl import DocxTemplate
     except Exception as e:
         raise RuntimeError(f"Error during skill extraction: {e}")
+# --- Qualification and Experience Extraction --- #
+def extract_qualifications(text):
+    """Extracts qualifications from text (e.g., degrees, certifications)."""
+    # Simplified logic to extract qualifications (can be improved)
+    qualifications = re.findall(r'(bachelor|master|phd|certified|degree)', text, re.IGNORECASE)
+    return qualifications if qualifications else ['No specific qualifications found']
+def extract_experience(text):
+    """Extracts years of experience from the text."""
+    experience_years = re.findall(r'(\d+)\s*(years|year) of experience', text, re.IGNORECASE)
+    job_titles = re.findall(r'\b(software engineer|developer|manager|analyst)\b', text, re.IGNORECASE)
+    experience_years = [int(year[0]) for year in experience_years]
+    return experience_years, job_titles
+# --- Matching Function using Semantic Similarity --- #
+def calculate_semantic_similarity(text1, text2):
+    """Calculates semantic similarity using a sentence transformer model and returns the score as a percentage."""
+    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
+    embeddings1 = model.encode(text1, convert_to_tensor=True)
+    embeddings2 = model.encode(text2, convert_to_tensor=True)
+    similarity_score = util.pytorch_cos_sim(embeddings1, embeddings2).item()
+    # Convert similarity score to percentage
+    similarity_percentage = similarity_score * 100
+    return similarity_percentage
+# --- Thresholds --- #
+def categorize_similarity(score):
+    """Categorizes the similarity score into thresholds for better insights."""
+    if score >= 80:
+        return "High Match"
+    elif score >= 50:
+        return "Moderate Match"
+    else:
+        return "Low Match"
+# --- Communication Generation with Enhanced Response --- #
+def communication_generator(resume_skills, job_description_skills, skills_similarity, qualifications_similarity, experience_similarity, max_length=200):
+    """Generates a more detailed communication response based on similarity scores."""
     model_name = "google/flan-t5-base"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
+    # Assess candidate fit based on similarity scores
+    fit_status = "strong fit" if skills_similarity >= 80 and qualifications_similarity >= 80 and experience_similarity >= 80 else \
+                 "moderate fit" if skills_similarity >= 50 else "weak fit"
+    # Create a detailed communication message based on match levels
     message = (
+        f"After a detailed analysis of the candidate's resume, we found the following insights:\n\n"
+        f"- **Skills Match**: {skills_similarity:.2f}% ({categorize_similarity(skills_similarity)})\n"
+        f"- **Qualifications Match**: {qualifications_similarity:.2f}% ({categorize_similarity(qualifications_similarity)})\n"
+        f"- **Experience Match**: {experience_similarity:.2f}% ({categorize_similarity(experience_similarity)})\n\n"
+        f"The overall assessment indicates that the candidate is a {fit_status} for the role. "
+        f"Skills such as {', '.join(resume_skills)} align {categorize_similarity(skills_similarity).lower()} with the job's requirements of {', '.join(job_description_skills)}. "
+        f"In terms of qualifications and experience, the candidate shows a {categorize_similarity(qualifications_similarity).lower()} match with the role's needs. "
+        f"Based on these findings, we believe the candidate could potentially excel in the role, "
+        f"but additional evaluation or interviews are recommended for further clarification."
     )
     inputs = tokenizer(message, return_tensors="pt", padding=True, truncation=True)
         predicted_sentiment = torch.argmax(outputs.logits).item()
     return ["Negative", "Neutral", "Positive"][predicted_sentiment]
+# --- Updated Resume Analysis Function --- #
 def analyze_resume(resume_file, job_description_file):
+    """Analyzes the resume and job description, returning similarity score, skills, qualifications, and experience matching."""
+    # Extract resume and job description text
     try:
         resume_text = extract_text_from_file(resume_file.name)
         job_description_text = extract_text_from_file(job_description_file.name)
     except ValueError as ve:
         return str(ve)
+    # Extract skills, qualifications, and experience
     resume_skills = extract_skills_llama(resume_text)
+    job_description_skills = process_job_description(job_description_text)
+    resume_qualifications = extract_qualifications(resume_text)
+    job_description_qualifications = extract_qualifications(job_description_text)
+    resume_experience, resume_job_titles = extract_experience(resume_text)
+    job_description_experience, job_description_titles = extract_experience(job_description_text)
+    # Calculate semantic similarity for different sections in percentages
+    skills_similarity = calculate_semantic_similarity(' '.join(resume_skills), ' '.join(job_description_skills))
+    qualifications_similarity = calculate_semantic_similarity(' '.join(resume_qualifications), ' '.join(job_description_qualifications))
+    experience_similarity = calculate_semantic_similarity(' '.join([str(e) for e in resume_experience]), ' '.join([str(e) for e in job_description_experience]))
+    # Generate a communication response based on the similarity percentages
+    communication_response = communication_generator(
+        resume_skills, job_description_skills,
+        skills_similarity, qualifications_similarity, experience_similarity
+    )
+    # Perform Sentiment Analysis
     sentiment = sentiment_analysis(resume_text)
+    # Return the results including thresholds and percentage scores
     return (
+        f"Skills Similarity: {skills_similarity:.2f}% ({categorize_similarity(skills_similarity)})",
+        f"Qualifications Similarity: {qualifications_similarity:.2f}% ({categorize_similarity(qualifications_similarity)})",
+        f"Experience Similarity: {experience_similarity:.2f}% ({categorize_similarity(experience_similarity)})",
         communication_response,
         f"Sentiment: {sentiment}",
+        f"Resume Skills: {', '.join(resume_skills)}",
+        f"Job Description Skills: {', '.join(job_description_skills)}",
+        f"Resume Qualifications: {', '.join(resume_qualifications)}",
+        f"Job Description Qualifications: {', '.join(job_description_qualifications)}",
+        f"Resume Experience: {', '.join([f'{y} years' for y, _ in resume_experience])}",
+        f"Job Description Experience: {', '.join([f'{y} years' for y, _ in job_description_experience])}"
     )
 # --- Gradio Interface --- #
+def process_job_description(job_description_text):
+    """Simplified job description processing for skills (can be extended)."""
+    return re.findall(r'\b(Python|AWS|Machine Learning|Deep Learning|NLP|Docker|Kubernetes)\b', job_description_text, re.IGNORECASE)
+def gradio_app(resume_file, job_description_file):
+    return analyze_resume(resume_file, job_description_file)
+# --- Launch Gradio App --- #
+gr.Interface(fn=gradio_app,
+             inputs=["file", "file"],
+             outputs="text",
+             title="Resume and Job Description Matching Tool",
+             description="Upload a resume and a job description to assess the matching scores for skills, qualifications, and experience."
+            ).launch()