Spaces:

ashmib
/

green-city-finder

Running

App Files Files Community

Ashmi Banerjee commited on Sep 16, 2024

Commit

adbebe0

1 Parent(s): bfcfb0e

update sustainability prompt and post processing

Browse files

Files changed (8) hide show

README.md +5 -3
src/augmentation/prompt_generation.py +7 -33
src/augmentation/prompts.py +35 -0
src/pipeline.py +14 -2
src/post_processing/post_process.py +19 -0
src/text_generation/mapper.py +2 -2
src/text_generation/model_init.py +1 -0
src/ui/components/inputs.py +1 -1

README.md CHANGED Viewed

@@ -19,7 +19,7 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 - [x] Sustainability - database paths - move to HF
-- [ ] Fix it for the new models e.g. Llama and others
 - [ ] Add the space secrets to have it running online
   - [ ] Fix the google application json file
@@ -28,6 +28,8 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 - [x] Add emissions calculation and starting point
 - [x] Add more cities to starting point
-- [ ] Experiment with the sustainability & without sustainability prompt
 - [ ] Adapt the gradio examples to the right format
-- [x] UI refactoring

 - [x] Sustainability - database paths - move to HF
+- [x] Fix it for the new models e.g. Llama and others
 - [ ] Add the space secrets to have it running online
   - [ ] Fix the google application json file
 - [x] Add emissions calculation and starting point
 - [x] Add more cities to starting point
+- [x] Experiment with the sustainability & without sustainability prompt
 - [ ] Adapt the gradio examples to the right format
+- [x] UI refactoring
+- [ ] Log the different queries and the results
+- [ ] Clear does not clear the country

src/augmentation/prompt_generation.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 logger = logging.getLogger(__name__)
 logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
 def generate_prompt(query, context, template=None):
     """
@@ -20,22 +20,7 @@ def generate_prompt(query, context, template=None):
     if template:
         SYS_PROMPT = template
     else:
-        SYS_PROMPT = """You are an AI recommendation system. Your task is to recommend cities in Europe for travel
-        based on the user's question. You should use the provided contexts to suggest a list of the 3 best cities
-        that are best suited to the user's question, as well as the month of travel. If the user has already provided
-        the month of travel in the question, use the same month; otherwise, provide the ideal month of travel. Each
-        recommendation should also contain an explanation of why it is being recommended, based on the context. Your
-        answer must begin with "I recommend " followed by the city name and why you recommended it. Your answers are
-        correct, high-quality, and written by a domain expert. If the provided context does not contain the answer,
-        simply state, "The provided context does not have the answer." """
-    USER_PROMPT = """ Question: {} Which city do you recommend and why?
-    Context: Here are the options: {}
-    Answer:
-    """
     formatted_prompt = f"{USER_PROMPT.format(query, context)}"
     messages = [
@@ -108,7 +93,7 @@ def format_context(context):
     return formatted_context
-def augment_prompt(query, context, **params):
     """
     Function that accepts the user query as input, obtains relevant documents and augments the prompt with the
     retrieved context, which can be passed to the LLM.
@@ -121,27 +106,16 @@ def augment_prompt(query, context, **params):
     """
     # what about the cities without s-fairness scores? i.e. they don't have seasonality data
-    prompt_with_sustainability = """You are an AI recommendation system. Your task is to recommend cities in Europe
-    for travel based on the user's question. You should use the provided contexts to suggest the city that is best
-    suited to the user's question. You recommend a list of the top 3 most sustainable cities to the user, as well as
-    the best month of travel. Each recommendation should also contain an explanation of why it is being recommended,
-    on sustainability grounds based on the context. The context contains a sustainability score for each city,
-    also known as the s-fairness score, along with the ideal month of travel. A lower s-fairness score indicates that
-    the city is a better destination for the month provided. A city without a sustainability score should not be
-    considered. You should only consider the s-fairness score while choosing the best city. However, your answer
-    should not contain the numeric score itself or any mention of the sustainability score. Your answer must begin
-    with "I recommend " followed by the city names and why you recommended it. Your answers are correct, high-quality,
-    and written by a domain expert. If the provided context does not contain the answer, simply state, "The provided
-    context does not have the answer. """
     # format context
     formatted_context = format_context(context)
     if "sustainability" in params["params"] and params["params"]["sustainability"]:
-        prompt = generate_prompt(query, formatted_context, prompt_with_sustainability)
     else:
-        prompt = generate_prompt(query, formatted_context)
     return prompt

 logger = logging.getLogger(__name__)
 logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
+from src.augmentation.prompts import SYSTEM_PROMPT, SUSTAINABILITY_PROMPT, USER_PROMPT
 def generate_prompt(query, context, template=None):
     """
     if template:
         SYS_PROMPT = template
     else:
+        SYS_PROMPT = SYSTEM_PROMPT
     formatted_prompt = f"{USER_PROMPT.format(query, context)}"
     messages = [
     return formatted_context
+def augment_prompt(query: str, starting_point: str, context: dict, **params: dict):
     """
     Function that accepts the user query as input, obtains relevant documents and augments the prompt with the
     retrieved context, which can be passed to the LLM.
     """
     # what about the cities without s-fairness scores? i.e. they don't have seasonality data
+    updated_query = f"With {starting_point} as the starting point, {query}"
+    prompt_with_sustainability = SUSTAINABILITY_PROMPT
     # format context
     formatted_context = format_context(context)
     if "sustainability" in params["params"] and params["params"]["sustainability"]:
+        prompt = generate_prompt(updated_query, formatted_context, prompt_with_sustainability)
     else:
+        prompt = generate_prompt(updated_query, formatted_context)
     return prompt

src/augmentation/prompts.py ADDED Viewed

	@@ -0,0 +1,35 @@

+USER_PROMPT = \
+    """ Question: {} Which city do you recommend and why?
+        Context: Here are the options: {}
+        Answer:
+        """
+SYSTEM_PROMPT =\
+    """You are an AI recommendation system focused on sustainable travel.
+    Your task is to recommend European cities for travel based on the user's query and starting point.
+    Using the provided context, suggest the top 3 cities that are the most sustainable, along with the best month to visit for the user with respect to their starting point.
+    Each recommendation should:
+    1. Include the ideal mode of travel from the user's starting location which has the lowest levels of emissions.
+    2. Offer an explanation as to why the city is recommended, focusing on sustainability factors such as popularity, emissions and seasonal footfall.
+    Your answer must begin with "I recommend " followed by the city names and why you recommended it.
+    Your answers are correct, high-quality, and written by a domain expert.
+    If the provided context does not contain the answer, simply state,
+    "The provided context does not have the answer. """
+SUSTAINABILITY_PROMPT =\
+    """You are an AI recommendation system focused on sustainable travel.
+    Your task is to recommend European cities for travel based on the user's query and starting point.
+    Using the provided context, suggest the top 3 cities that are the most sustainable, along with the best month to visit for the user with respect to their starting point.
+    Each recommendation should:
+    1. Be based on the value of the s-fairness score provided in the context. A lower s-fairness score indicates that the city is a better destination for the month provided. A city without a sustainability score should not be considered.
+    2. The system should discourage travel during peak seasons and promote travel during off and shoulder seasons.
+    3. It should recommend hidden gems or off-beat destinations compared to the most popular ones.
+    4. Include the ideal mode of travel from the user's starting location which has the lowest levels of emissions.
+    5. Offer an explanation as to why the city is recommended, focusing on sustainability factors such as popularity, emissions and seasonal footfall.
+    You should only consider the s-fairness score while choosing the best city.
+    However, your answer should not contain the numeric score itself or any mention of the sustainability score.
+    Your answer must begin with "I recommend " followed by the city names and why you recommended it.
+    Your answers are correct, high-quality, and written by a domain expert.
+    If the provided context does not contain the answer, simply state,
+    "The provided context does not have the answer. """

src/pipeline.py CHANGED Viewed

@@ -23,6 +23,7 @@ import logging
 logger = logging.getLogger(__name__)
 logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
 from src.text_generation.mapper import MODEL_MAPPER
 TEST_DIR = "../tests/"
@@ -96,6 +97,7 @@ def pipeline(starting_point: str,
     try:
         prompt = pg.augment_prompt(
             query=query,
             context=context,
             params=context_params
         )
@@ -114,11 +116,21 @@ def pipeline(starting_point: str,
         logger.info(f"Error at line {exc_tb.tb_lineno} while generating response: {e}")
         return None
     if test:
-        return retrieved_cities, prompt[1]['content'], response
     else:
-        return response
 if __name__ == "__main__":

 logger = logging.getLogger(__name__)
 logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
 from src.text_generation.mapper import MODEL_MAPPER
+from src.post_processing.post_process import post_process_output
 TEST_DIR = "../tests/"
     try:
         prompt = pg.augment_prompt(
             query=query,
+            starting_point=starting_point,
             context=context,
             params=context_params
         )
         logger.info(f"Error at line {exc_tb.tb_lineno} while generating response: {e}")
         return None
+    try:
+        model_params = {"max_tokens": params["max_tokens"], "temperature": params["temperature"]}
+        post_processed_response = post_process_output(
+            model_id=model_id, user_query=query,
+            starting_point=starting_point,
+            context=context, response=response, **model_params)
+    except Exception as e:
+        exc_type, exc_obj, exc_tb = sys.exc_info()
+        logger.info(f"Error at line {exc_tb.tb_lineno} while generating response: {e}")
+        return None
     if test:
+        return retrieved_cities, prompt[1]['content'], post_processed_response
     else:
+        return post_processed_response
 if __name__ == "__main__":

src/post_processing/post_process.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from src.text_generation.text_generation import generate_response
+def post_process_output(model_id: str, user_query: str, starting_point: str, response: str, context: dict, **params: dict) -> str | None:
+    if "s-fairness" in response.lower() or "score" in response.lower():
+        formatted_response = \
+            f"You are an AI recommendation system focused on sustainable travel. Rewrite this response without " \
+            f"mentioning the sustainability score or s-fairness.\n {response} "
+    elif "the provided context does not have the answer" \
+            in response.lower():
+        formatted_response = \
+            f'You are an AI recommendation system focused on sustainable travel. Rewrite this response using the information from the context: \n Starting point: {starting_point}\n Query: {user_query}\n Context: {context}'
+    else:
+        formatted_response = response
+    final_prompt = [{"role": "system", "content": formatted_response}]
+    generated_response = generate_response(model_id,
+                                           final_prompt, **params)
+    return generated_response

src/text_generation/mapper.py CHANGED Viewed

@@ -1,9 +1,9 @@
 MODEL_MAPPER = {
-    'Gemma-2-9B-it': "google/gemma-2-9b-it",
-    'Gemma-2-2B-it': "google/gemma-2-2b-it",
     "Gemini-1.0-pro": "gemini-1.0-pro",
     "Gemini-1.5-Flash": "gemini-1.5-flash-001",
     "Gemini-1.5-Pro": "gemini-1.5-pro-001",
     "Claude3.5-sonnet": "claude-3-5-sonnet@20240620",
     'GPT-4': "gpt-4o-mini",
     'Llama3': "meta-llama/Meta-Llama-3-8B",

 MODEL_MAPPER = {
     "Gemini-1.0-pro": "gemini-1.0-pro",
     "Gemini-1.5-Flash": "gemini-1.5-flash-001",
     "Gemini-1.5-Pro": "gemini-1.5-pro-001",
+    'Gemma-2-9B-it': "google/gemma-2-9b-it",
+    'Gemma-2-2B-it': "google/gemma-2-2b-it",
     "Claude3.5-sonnet": "claude-3-5-sonnet@20240620",
     'GPT-4': "gpt-4o-mini",
     'Llama3': "meta-llama/Meta-Llama-3-8B",

src/text_generation/model_init.py CHANGED Viewed

@@ -134,6 +134,7 @@ class LLMBaseClass:
         """
         Generate responses using Hugging Face models.
         """
         response = self.model.chat_completion(
             messages=[{"role": "user", "content": messages[0]["content"] + messages[1]["content"]}],
             max_tokens=self.tokens, temperature=self.temp)

         """
         Generate responses using Hugging Face models.
         """
+        content = " ".join([message["content"] for message in messages])
         response = self.model.chat_completion(
             messages=[{"role": "user", "content": messages[0]["content"] + messages[1]["content"]}],
             max_tokens=self.tokens, temperature=self.temp)

src/ui/components/inputs.py CHANGED Viewed

@@ -57,7 +57,7 @@ def main_component() -> Tuple[gr.Dropdown, gr.Dropdown, gr.Textbox, gr.Checkbox,
                  "your starting location, and month of travel?"
         )
-        models = list(MODEL_MAPPER.keys())[:5]
         # Model selection dropdown
         model = gr.Dropdown(
             choices=models,

                  "your starting location, and month of travel?"
         )
+        models = list(MODEL_MAPPER.keys())[:3]
         # Model selection dropdown
         model = gr.Dropdown(
             choices=models,