Spaces:
Running
Running
File size: 4,367 Bytes
7345da1 839ca71 7345da1 839ca71 d358781 ebe320f a7883dd c1d9dc3 561c1fb d358781 3b394b8 ebe320f 0356c02 ebe320f a7883dd 839ca71 d358781 839ca71 78313b3 839ca71 b25bb07 159dda8 7345da1 d358781 7345da1 b25bb07 c1d9dc3 7345da1 839ca71 78313b3 d358781 839ca71 4c71672 09c5f1e 9edca9e d358781 9edca9e 657095c d358781 b7275fb c39065b 7b056b1 b223b27 d358781 a870703 b25bb07 a870703 16842d6 657095c b25bb07 0bceca6 8f2e74d d358781 89aa4a2 34162d5 c2ac8ae 40760a4 09c5f1e e845a55 40760a4 36ca842 40760a4 13c9123 e845a55 b25bb07 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import json
import re
import time
import json_repair
import pandas as pd
from tqdm import tqdm
def create_summary(group_name, label, occupation, row, template):
"""Generate a dynamic summary for scoring the applicant, excluding the group feature.
The occupation parameter allows customization of the job position.
"""
resume_info = row['Cleaned_Resume']
# resume_info = resume_info[:int(len(resume_info) * proportion)]
info = f"{group_name}: {label};" if label else ''
summary = template.format(
role=row['Role'],
counterfactual_info=info,
resume_info=resume_info
)
return summary
def invoke_retry(prompt, agent, parameters, string_input=False):
attempts = 0
delay = 2 # Initial delay in seconds
max_attempts = 5 # Maximum number of retry attempts
while attempts < max_attempts:
try:
score_text = agent.invoke(prompt, **parameters)
#print(f"Prompt: {prompt}")
# print(f"Score text: {score_text}")
# print("=============================================================")
if string_input:
return score_text
try:
score_json = json.loads(score_text)
except json.JSONDecodeError:
try:
score_json = json.loads(
json_repair.repair_json(score_text, skip_json_loads=True, return_objects=False))
except json.JSONDecodeError:
raise Exception("Failed to decode JSON response even after repair attempt.")
# score = re.search(r'\d+', score_text)
# return int(score.group()) if score else -1
#print(f"Score JSON: {score_json}")
return int(score_json['Score'])
except Exception as e:
print(f"Attempt {attempts + 1} failed: {e}")
time.sleep(delay)
delay *= 2 # Exponential increase of the delay
attempts += 1
return -1
# raise Exception("Failed to complete the API call after maximum retry attempts.")
def calculate_avg_score(score_list):
if isinstance(score_list, list) and score_list:
valid_scores = [score for score in score_list if score is not None]
if valid_scores:
avg_score = sum(valid_scores) / len(valid_scores)
return avg_score
return None
def process_scores_multiple(df, num_run, parameters, privilege_label, protect_label, agent, group_name, occupation
, template):
print(f"Processing {len(df)} entries with {num_run} runs each.")
""" Process entries and compute scores concurrently, with progress updates. """
scores = {key: [[] for _ in range(len(df))] for key in ['Privilege', 'Protect', 'Neutral']}
for run in tqdm(range(num_run), desc="Processing runs", unit="run"):
for index, (idx, row) in tqdm(enumerate(df.iterrows()), total=len(df), desc="Processing entries", unit="entry"):
for key, label in zip(['Privilege', 'Protect', 'Neutral'], [privilege_label, protect_label, False]):
prompt_normal = create_summary(group_name, label, occupation, row, template)
# print(f"Run {run + 1} - Entry {index + 1} - {key}")
# print("=============================================================")
result_normal = invoke_retry(prompt_normal, agent, parameters)
scores[key][index].append(result_normal)
#print(f"Scores: {scores}")
# Ensure all scores are lists and calculate average scores
for category in ['Privilege', 'Protect', 'Neutral']:
# Ensure the scores are lists and check before assignment
series_data = [lst if isinstance(lst, list) else [lst] for lst in scores[category]]
df[f'{category}_Scores'] = series_data
# Calculate the average score with additional debug info
df[f'{category}_Avg_Score'] = df[f'{category}_Scores'].apply(calculate_avg_score)
# Add ranks for each score within each row
ranks = df[['Privilege_Avg_Score', 'Protect_Avg_Score', 'Neutral_Avg_Score']].rank(axis=1, ascending=False)
df['Privilege_Rank'] = ranks['Privilege_Avg_Score']
df['Protect_Rank'] = ranks['Protect_Avg_Score']
df['Neutral_Rank'] = ranks['Neutral_Avg_Score']
return df
|