|
import random |
|
import logging |
|
|
|
|
|
ethnicity_map = [ |
|
"European American", "English", "Irish", "Italian", "German", "Polish", |
|
"French", "Scottish", "Scandinavian", "Eastern European", "Jewish", |
|
"Middle Eastern", "African-American", "Afro-Caribbean", "African", |
|
"Afro-Latinx", "Mexican", "Puerto Rican", "Cuban", "Dominican", |
|
"Salvadoran", "Guatemalan", "Colombian", "Venezuelan", "Nicaraguan", |
|
"Honduran", "Argentinian", "Chilean", "Peruvian", "Ecuadorian", |
|
"Panamanian", "Bolivian", "Costa Rican", "Chinese", "Japanese", |
|
"Korean", "Vietnamese", "Filipino", "Thai", "Cambodian", "Laotian", |
|
"Burmese", "Malaysian", "Indonesian", "Indian", "Pakistani", |
|
"Bangladeshi", "Sri Lankan", "Nepalese", "Bhutanese", "Maldivian", |
|
"Native Hawaiian", "Samoan", "Tongan", "Chamorro", "Fijian", |
|
"Tahitian", "Palauan", "Marshallese", "Cherokee", "Navajo", "Sioux", |
|
"Chippewa", "Apache", "Blackfeet", "Choctaw", "Inuit", "Yupik", |
|
"Aleut", "Egyptian", "Moroccan", "Algerian", "Tunisian", "Iraqi", |
|
"Syrian", "Palestinian", "Jordanian", "Kurdish", "Turkish", |
|
"Mixed-race", "Afro-Latino", "Eurasian", "Mestizo", "Armenian", |
|
"Assyrian", "Chaldean", "Somali Bantu", "Hmong", "Tibetan", "Aboriginal" |
|
] |
|
|
|
|
|
person_terms = [ |
|
"person", "people", "man", "woman", "child", "boy", "girl", |
|
"men", "women", "children", "boys", "girls" |
|
] |
|
|
|
|
|
term_check = ethnicity_map + [ |
|
"White", "Black", "European", "Asian", "Latino", "Hispanic", |
|
"Native American", "South American", "East Asian", |
|
"South Asian", "Southeast Asian", "Pacific Islander", "Middle Eastern" |
|
] |
|
|
|
def contains_specified_term(prompt): |
|
""" |
|
Check if the prompt contains any specified ethnicity or other relevant terms. |
|
|
|
Args: |
|
prompt (str): The input prompt to check. |
|
|
|
Returns: |
|
bool: True if a term is found, False otherwise. |
|
""" |
|
|
|
for term in term_check + ethnicity_map: |
|
if term.lower() in prompt.lower(): |
|
logging.debug(f"Specified term '{term}' found in prompt: {prompt}") |
|
return True |
|
return False |
|
|
|
def select_random_ethnicity(): |
|
""" |
|
Randomly select an ethnicity from the ethnicity map. |
|
|
|
Returns: |
|
str: A selected ethnicity (e.g., "European American", "African"). |
|
""" |
|
ethnicity = random.choice(ethnicity_map) |
|
logging.debug(f"Selected ethnicity: {ethnicity}") |
|
return ethnicity |
|
|
|
def modify_prompt_for_group(prompt): |
|
""" |
|
Modify prompts containing groups to have a mix of different ethnicities. |
|
|
|
Args: |
|
prompt (str): The input prompt that contains group terms. |
|
|
|
Returns: |
|
str: The modified prompt with a mix of ethnicities. |
|
""" |
|
if contains_specified_term(prompt): |
|
return prompt |
|
|
|
|
|
mixed_ethnicities = [] |
|
for _ in range(3): |
|
ethnicity = select_random_ethnicity() |
|
mixed_ethnicities.append(ethnicity + " person") |
|
|
|
logging.debug(f"Generated mixed ethnicities: {mixed_ethnicities}") |
|
|
|
return prompt.replace("group of people", ', '.join(mixed_ethnicities)) |
|
|
|
def modify_prompt(prompt): |
|
""" |
|
Modify the prompt by replacing generic demographic terms or groups with specific |
|
ethnicities based on equal probability for all. |
|
|
|
Args: |
|
prompt (str): The input prompt to modify. |
|
|
|
Returns: |
|
str: The modified prompt with ethnicities inserted. |
|
""" |
|
logging.debug(f"Modifying prompt: {prompt}") |
|
|
|
if contains_specified_term(prompt): |
|
logging.debug(f"Specified terms detected. No modification applied: {prompt}") |
|
return prompt |
|
|
|
words = prompt.split() |
|
for i, word in enumerate(words): |
|
if word in person_terms: |
|
selected_ethnicity = select_random_ethnicity() |
|
words[i] = f"{selected_ethnicity} {word}" |
|
logging.debug(f"Replaced '{word}' with '{selected_ethnicity} {word}'") |
|
|
|
modified_prompt = ' '.join(words) |
|
logging.debug(f"Modified prompt: {modified_prompt}") |
|
return modified_prompt |
|
|