import random import logging # Updated ethnicity map ethnicity_map = [ "European American", "English", "Irish", "Italian", "German", "Polish", "French", "Scottish", "Scandinavian", "Eastern European", "Jewish", "Middle Eastern", "African-American", "Afro-Caribbean", "African", "Afro-Latinx", "Mexican", "Puerto Rican", "Cuban", "Dominican", "Salvadoran", "Guatemalan", "Colombian", "Venezuelan", "Nicaraguan", "Honduran", "Argentinian", "Chilean", "Peruvian", "Ecuadorian", "Panamanian", "Bolivian", "Costa Rican", "Chinese", "Japanese", "Korean", "Vietnamese", "Filipino", "Thai", "Cambodian", "Laotian", "Burmese", "Malaysian", "Indonesian", "Indian", "Pakistani", "Bangladeshi", "Sri Lankan", "Nepalese", "Bhutanese", "Maldivian", "Native Hawaiian", "Samoan", "Tongan", "Chamorro", "Fijian", "Tahitian", "Palauan", "Marshallese", "Cherokee", "Navajo", "Sioux", "Chippewa", "Apache", "Blackfeet", "Choctaw", "Inuit", "Yupik", "Aleut", "Egyptian", "Moroccan", "Algerian", "Tunisian", "Iraqi", "Syrian", "Palestinian", "Jordanian", "Kurdish", "Turkish", "Mixed-race", "Afro-Latino", "Eurasian", "Mestizo", "Armenian", "Assyrian", "Chaldean", "Somali Bantu", "Hmong", "Tibetan", "Aboriginal" ] # Expanded list of person-related terms (including plurals) person_terms = [ "person", "people", "man", "woman", "child", "boy", "girl", "men", "women", "children", "boys", "girls" ] # term_check with redundant terms removed (terms that already exist in ethnicity_map) term_check = ethnicity_map + [ "White", "Black", "European", "Asian", "Latino", "Hispanic", "Native American", "South American", "East Asian", "South Asian", "Southeast Asian", "Pacific Islander", "Middle Eastern" ] def contains_specified_term(prompt): """ Check if the prompt contains any specified ethnicity or other relevant terms. Args: prompt (str): The input prompt to check. Returns: bool: True if a term is found, False otherwise. """ # Check both term_check and ethnicity_map for any matches in the prompt for term in term_check + ethnicity_map: if term.lower() in prompt.lower(): logging.debug(f"Specified term '{term}' found in prompt: {prompt}") return True return False def select_random_ethnicity(): """ Randomly select an ethnicity from the ethnicity map. Returns: str: A selected ethnicity (e.g., "European American", "African"). """ ethnicity = random.choice(ethnicity_map) logging.debug(f"Selected ethnicity: {ethnicity}") return ethnicity def modify_prompt_for_group(prompt): """ Modify prompts containing groups to have a mix of different ethnicities. Args: prompt (str): The input prompt that contains group terms. Returns: str: The modified prompt with a mix of ethnicities. """ if contains_specified_term(prompt): return prompt # Generate a diverse group mixed_ethnicities = [] for _ in range(3): # Adjust number for more diversity in the group ethnicity = select_random_ethnicity() mixed_ethnicities.append(ethnicity + " person") logging.debug(f"Generated mixed ethnicities: {mixed_ethnicities}") return prompt.replace("group of people", ', '.join(mixed_ethnicities)) def modify_prompt(prompt): """ Modify the prompt by replacing generic demographic terms or groups with specific ethnicities based on equal probability for all. Args: prompt (str): The input prompt to modify. Returns: str: The modified prompt with ethnicities inserted. """ logging.debug(f"Modifying prompt: {prompt}") if contains_specified_term(prompt): logging.debug(f"Specified terms detected. No modification applied: {prompt}") return prompt words = prompt.split() for i, word in enumerate(words): if word in person_terms: # Replace any term related to a person selected_ethnicity = select_random_ethnicity() words[i] = f"{selected_ethnicity} {word}" logging.debug(f"Replaced '{word}' with '{selected_ethnicity} {word}'") modified_prompt = ' '.join(words) logging.debug(f"Modified prompt: {modified_prompt}") return modified_prompt