File size: 4,359 Bytes
c4b197e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import random
import logging

# Updated ethnicity map
ethnicity_map = [
    "European American", "English", "Irish", "Italian", "German", "Polish", 
    "French", "Scottish", "Scandinavian", "Eastern European", "Jewish", 
    "Middle Eastern", "African-American", "Afro-Caribbean", "African", 
    "Afro-Latinx", "Mexican", "Puerto Rican", "Cuban", "Dominican", 
    "Salvadoran", "Guatemalan", "Colombian", "Venezuelan", "Nicaraguan", 
    "Honduran", "Argentinian", "Chilean", "Peruvian", "Ecuadorian", 
    "Panamanian", "Bolivian", "Costa Rican", "Chinese", "Japanese", 
    "Korean", "Vietnamese", "Filipino", "Thai", "Cambodian", "Laotian", 
    "Burmese", "Malaysian", "Indonesian", "Indian", "Pakistani", 
    "Bangladeshi", "Sri Lankan", "Nepalese", "Bhutanese", "Maldivian", 
    "Native Hawaiian", "Samoan", "Tongan", "Chamorro", "Fijian", 
    "Tahitian", "Palauan", "Marshallese", "Cherokee", "Navajo", "Sioux", 
    "Chippewa", "Apache", "Blackfeet", "Choctaw", "Inuit", "Yupik", 
    "Aleut", "Egyptian", "Moroccan", "Algerian", "Tunisian", "Iraqi", 
    "Syrian", "Palestinian", "Jordanian", "Kurdish", "Turkish", 
    "Mixed-race", "Afro-Latino", "Eurasian", "Mestizo", "Armenian", 
    "Assyrian", "Chaldean", "Somali Bantu", "Hmong", "Tibetan", "Aboriginal"
]

# Expanded list of person-related terms (including plurals)
person_terms = [
    "person", "people", "man", "woman", "child", "boy", "girl",
    "men", "women", "children", "boys", "girls"
]

# term_check with redundant terms removed (terms that already exist in ethnicity_map)
term_check = ethnicity_map + [
    "White", "Black", "European", "Asian", "Latino", "Hispanic", 
    "Native American", "South American", "East Asian", 
    "South Asian", "Southeast Asian", "Pacific Islander", "Middle Eastern"
]

def contains_specified_term(prompt):
    """
    Check if the prompt contains any specified ethnicity or other relevant terms.
    
    Args:
    prompt (str): The input prompt to check.
    
    Returns:
    bool: True if a term is found, False otherwise.
    """
    # Check both term_check and ethnicity_map for any matches in the prompt
    for term in term_check + ethnicity_map:
        if term.lower() in prompt.lower():
            logging.debug(f"Specified term '{term}' found in prompt: {prompt}")
            return True
    return False

def select_random_ethnicity():
    """
    Randomly select an ethnicity from the ethnicity map.
    
    Returns:
    str: A selected ethnicity (e.g., "European American", "African").
    """
    ethnicity = random.choice(ethnicity_map)
    logging.debug(f"Selected ethnicity: {ethnicity}")
    return ethnicity

def modify_prompt_for_group(prompt):
    """
    Modify prompts containing groups to have a mix of different ethnicities.
    
    Args:
    prompt (str): The input prompt that contains group terms.
    
    Returns:
    str: The modified prompt with a mix of ethnicities.
    """
    if contains_specified_term(prompt):
        return prompt

    # Generate a diverse group
    mixed_ethnicities = []
    for _ in range(3):  # Adjust number for more diversity in the group
        ethnicity = select_random_ethnicity()
        mixed_ethnicities.append(ethnicity + " person")
    
    logging.debug(f"Generated mixed ethnicities: {mixed_ethnicities}")
    
    return prompt.replace("group of people", ', '.join(mixed_ethnicities))

def modify_prompt(prompt):
    """
    Modify the prompt by replacing generic demographic terms or groups with specific
    ethnicities based on equal probability for all.
    
    Args:
    prompt (str): The input prompt to modify.
    
    Returns:
    str: The modified prompt with ethnicities inserted.
    """
    logging.debug(f"Modifying prompt: {prompt}")
    
    if contains_specified_term(prompt):
        logging.debug(f"Specified terms detected. No modification applied: {prompt}")
        return prompt

    words = prompt.split()
    for i, word in enumerate(words):
        if word in person_terms:  # Replace any term related to a person
            selected_ethnicity = select_random_ethnicity()
            words[i] = f"{selected_ethnicity} {word}"
            logging.debug(f"Replaced '{word}' with '{selected_ethnicity} {word}'")

    modified_prompt = ' '.join(words)
    logging.debug(f"Modified prompt: {modified_prompt}")
    return modified_prompt