Spaces:

ashmib
/

green-city-finder

Running

App Files Files Community

Ashmi Banerjee commited on Sep 12

Commit

ac20456

•

1 Parent(s): f4d1603

updates to the s-fairness calculation and refactoring code duplication

Browse files

Files changed (10) hide show

README.md +15 -5
app.py +85 -58
src/augmentation/prompt_generation.py +0 -1
src/helpers/__init__.py +0 -0
src/helpers/creds_loader.py +0 -0
src/helpers/data_loaders.py +52 -0
src/information_retrieval/info_retrieval.py +20 -12
src/pipeline.py +2 -2
src/sustainability/s_fairness.py +89 -53
src/text_generation/vertexai_setup.py +1 -1

README.md CHANGED Viewed

@@ -15,8 +15,18 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
 ### TODOs
-[x] Refactor the vectordb.py - remove code duplication
-[x] Sustainability - database paths - move to HF
-[x] Fix it for the new models e.g. Llama and others
-[x] Add the space secrets to have it running online
-[x] Make the space public

 ### TODOs
+- [ ] Refactor the vectordb.py - remove code duplication
+- [x] Sustainability - database paths - move to HF
+- [ ] Fix it for the new models e.g. Llama and others
+- [ ] Add the space secrets to have it running online
+  - [ ] Fix the google application json file
+- [ ] Make the space public
+- [x] Add emissions calculation and starting point
+- [x] Add more cities to starting point
+- [ ] Experiment with the sustainability & without sustainability prompt

app.py CHANGED Viewed

@@ -1,78 +1,105 @@
 from typing import Optional
 import gradio as gr
-import os, sys
 sys.path.append("./src")
-print(os.getcwd())
 from src.pipeline import pipeline
 def clear():
     return None, None, None
 def generate_text(query_text, model_name: Optional[str], is_sustainable: Optional[bool], tokens: Optional[int] = 1024,
-                  temp: Optional[float] = 0.49):
-    if is_sustainable:
-        sustainability = 1
-    else:
-        sustainability = 0
     pipeline_response = pipeline(
         query=query_text,
         model_name=model_name,
-        sustainability= sustainability
     )
     return pipeline_response
-examples = [["I'm planning a vacation to France. Can you suggest a one-week itinerary including must-visit places and "
-             "local cuisines to try?", "GPT-4"],
-            ["I want to explore off-the-beaten-path destinations in Europe, any suggestions?", "Gemini-1.0-pro"],
-            ["Suggest some cities that can be visited from London and are very rich in history and culture.",
-             "Gemini-1.0-pro"],
-            ]
-with gr.Blocks() as demo:
-    gr.HTML("""<center><h1 style='font-size:xx-large;'>🇪🇺 Euro City Recommender using Gemini & Gemma 🇪🇺</h1><br><h3>Gemini
-    & Gemma Sprints 2024 submissions by Ashmi Banerjee. </h3></center> <br><p>We're testing the compatibility of
-    Retrieval Augmented Generation (RAG) implementations with Google's <b>Gemma-2b-it</b> & <b>Gemini 1.0 Pro</b>
-    models through HuggingFace and VertexAI, respectively, to generate travel recommendations. This early version (read
-    quick and dirty implementation) aims to see if functionalities work smoothly. It relies on Wikipedia abstracts
-    from 160 European cities to provide answers to your questions. Please be kind with it, as it's a work in progress!
-    </p> <br>Google Cloud credits are provided for this project. </p>
-    """)
-    with gr.Group():
-        query = gr.Textbox(label="Query", placeholder="Ask for your city recommendation here!")
-        sustainable = gr.Checkbox(label="Sustainable", info="If you want sustainable recommendations for "
-                                                                        "hidden gems?")
-        model = gr.Dropdown(
-            ["GPT-4", "Gemini-1.0-pro"], label="Model", info="Select your model. Will add more "
-                                                                                  "models "
-                                                                                  "later!",
-        )
-        output = gr.Textbox(label="Generated Results", lines=4)
-        with gr.Accordion("Settings", open=False):
-            max_new_tokens = gr.Slider(label="Max new tokens", value=1024, minimum=0, maximum=8192, step=64,
-                                       interactive=True,
-                                       visible=True, info="The maximum number of output tokens")
-            temperature = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.49,
-                                    interactive=True,
-                                    visible=True, info="The value used to module the logits distribution")
-    with gr.Group():
-        with gr.Row():
-            submit_btn = gr.Button("Submit", variant="primary")
-            clear_btn = gr.Button("Clear", variant="secondary")
-            cancel_btn = gr.Button("Cancel", variant="stop")
-    submit_btn.click(generate_text, inputs=[query, model, sustainable], outputs=[output])
-    clear_btn.click(clear, inputs=[], outputs=[query, model, output])
-    cancel_btn.click(clear, inputs=[], outputs=[query, model, output])
-    gr.Markdown("## Examples")
-    gr.Examples(
-        examples, inputs=[query, model], label="Examples", fn=generate_text, outputs=[output],
-        cache_examples=True,
-    )
 if __name__ == "__main__":
-    demo.launch(show_api=False)

 from typing import Optional
 import gradio as gr
+import sys
 sys.path.append("./src")
 from src.pipeline import pipeline
+from src.helpers.data_loaders import load_places
 def clear():
     return None, None, None
+# Function to update the list of cities based on the selected country
+def update_cities(selected_country, df):
+    filtered_cities = df[df['country'] == selected_country]['city'].tolist()
+    return gr.Dropdown(choices=filtered_cities, interactive=True)  # Make it interactive as it is not by default
 def generate_text(query_text, model_name: Optional[str], is_sustainable: Optional[bool], tokens: Optional[int] = 1024,
+                  temp: Optional[float] = 0.49, starting_point: Optional[str] = "Munich"):
     pipeline_response = pipeline(
         query=query_text,
         model_name=model_name,
+        sustainability=is_sustainable,
+        starting_point=starting_point,
     )
     return pipeline_response
+def create_ui():
+    data_file = "cities/eu_200_cities.csv"
+    df = load_places(data_file)
+    df = df.sort_values(by=['country', 'city'])
+    examples = [
+        ["I'm planning a vacation to France. Can you suggest a one-week itinerary including must-visit places and "
+         "local cuisines to try?", "GPT-4"],
+        ["I want to explore off-the-beaten-path destinations in Europe, any suggestions?", "Gemini-1.0-pro"],
+        ["Suggest some cities that can be visited from London and are very rich in history and culture.",
+         "Gemini-1.0-pro"],
+    ]
+    with gr.Blocks() as app:
+        gr.HTML(
+            "<center><h1 style='font-size:xx-large; font-color: green'>🍀 Green City Finder 🍀</h1><h3>AI Sprint 2024 submissions by Ashmi Banerjee. </h3></center> <br><p>We're testing the "
+            "compatibility of"
+            "Retrieval Augmented Generation (RAG) implementations with Google's <b>Gemma-2b-it</b> & <b>Gemini 1.0 "
+            "Pro</b> \n "
+            "models through HuggingFace and VertexAI, respectively, to generate sustainable travel recommendations.\n "
+            "We use the Wikivoyage dataset to provide city recommendations based on user queries. The vector "
+            "embeddings are stored in a VectorDB (LanceDB) hosted in Google Cloud.\n "
+            "<p>Sustainability is calculated based on the work by <a href=https://arxiv.org/abs/2403.18604>Banerjee "
+            "et al.</a></p>\n "
+            "        </p> <br>Google Cloud credits are provided for this project. </p>\n"
+            "        ")
+        with gr.Group():
+            countries = gr.Dropdown(choices=list(df.country), multiselect=False, label="Countries")
+            starting_point = gr.Dropdown(choices=[], multiselect=False,
+                                         label="Select your starting point for the trip!")
+            countries.select(fn=lambda selected_country:
+                             update_cities(selected_country, df),
+                             inputs=countries, outputs=starting_point)
+            query = gr.Textbox(label="Query", placeholder="Ask for your city recommendation here!")
+            sustainable = gr.Checkbox(label="Sustainable", info="Do you want your recommendations to be sustainable "
+                                                                "with regards to the environment, your starting "
+                                                                "location and month of travel?")
+            # TODO: Add model options, month and starting point
+            model = gr.Dropdown(
+                ["GPT-4", "Gemini-1.0-pro"], label="Model", info="Select your model. Will add more "
+                                                                 "models "
+                                                                 "later!",
+            )
+            output = gr.Textbox(label="Generated Results", lines=4)
+            with gr.Accordion("Settings", open=False):
+                max_new_tokens = gr.Slider(label="Max new tokens", value=1024, minimum=0, maximum=8192, step=64,
+                                           interactive=True,
+                                           visible=True, info="The maximum number of output tokens")
+                temperature = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.49,
+                                        interactive=True,
+                                        visible=True, info="The value used to module the logits distribution")
+        with gr.Group():
+            with gr.Row():
+                submit_btn = gr.Button("Submit", variant="primary")
+                clear_btn = gr.Button("Clear", variant="secondary")
+                cancel_btn = gr.Button("Cancel", variant="stop")
+        submit_btn.click(generate_text, inputs=[query, model, sustainable, starting_point], outputs=[output])
+        clear_btn.click(clear, inputs=[], outputs=[query, model, output])
+        cancel_btn.click(clear, inputs=[], outputs=[query, model, output])
+        gr.Markdown("## Examples")
+        # gr.Examples(
+        #     examples, inputs=[query, model], label="Examples", fn=generate_text, outputs=[output],
+        #     cache_examples=True,
+        # )
+    return app
 if __name__ == "__main__":
+    app = create_ui()
+    app.launch(show_api=False)

src/augmentation/prompt_generation.py CHANGED Viewed

@@ -158,7 +158,6 @@ def test():
     # without sustainability
     context = ir.get_context(query, **context_params)
-    # formatted_context = format_context(context)
     without_sfairness = augment_prompt(
         query=query,

     # without sustainability
     context = ir.get_context(query, **context_params)
     without_sfairness = augment_prompt(
         query=query,

src/helpers/__init__.py ADDED Viewed

File without changes

src/helpers/creds_loader.py ADDED Viewed

File without changes

src/helpers/data_loaders.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from datasets import load_dataset
+from dotenv import load_dotenv
+from datasets import DatasetDict
+import os
+import pandas as pd
+from typing import Optional
+load_dotenv()
+import logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
+HF_TOKEN = os.environ["HF_TOKEN"]
+def load_data_hf(repo_name: str, data_files: str, is_public: bool) -> DatasetDict:
+    if is_public:
+        dataset = load_dataset(repo_name, split="train")
+    else:
+        dataset = load_dataset(repo_name, token=True, data_files=data_files)
+    return dataset
+def load_scores(category: str) -> pd.DataFrame | None:
+    repository = os.environ.get("DATA_REPO")
+    data_file = None
+    match category:
+        case "popularity":
+            data_file = "computed/popularity/popularity_scores.csv"
+        case "seasonality":
+            data_file = "computed/seasonality/seasonality_scores.csv"
+        case "emissions":
+            data_file = "computed/emissions/emissions_merged.csv"
+        case _:
+            logger.info(f"Invalid category: {category}")
+    if data_file:  # only for valid categories
+        data = load_data_hf(repository, data_file, is_public=False)
+        df = pd.DataFrame(data["train"][:])
+        return df
+    return None
+def load_places(data_file: str) -> pd.DataFrame | None:
+    repository = os.environ.get("DATA_REPO")
+    if data_file:
+        data = load_data_hf(repository, data_file, is_public=False)
+        df = pd.DataFrame(data["train"][:])
+        return df
+    return None

src/information_retrieval/info_retrieval.py CHANGED Viewed

@@ -10,6 +10,7 @@ import logging
 logger = logging.getLogger(__name__)
 logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
 def get_travel_months(query):
     """
@@ -91,7 +92,7 @@ def get_wikivoyage_context(query, limit=10, reranking=0):
     return results
-def get_sustainability_scores(query, destinations):
     """
     Function to get the s-fairness scores for each destination for the given month (or the ideal month of travel if the user hasn't provided a month).
@@ -109,15 +110,20 @@ def get_sustainability_scores(query, destinations):
     months = get_travel_months(query)
     logger.info("Finished parsing query for months.")
     for city in destinations:
         if city not in city_scores:
             city_scores[city] = []
         if not months:  # no month(s) or seasons provided by the user
-            city_scores[city].append(s_fairness.compute_sfairness_score(city))
         else:
             for month in months:
-                city_scores[city].append(s_fairness.compute_sfairness_score(city, month))
     logger.info("Finished getting s-fairness scores.")
@@ -130,7 +136,8 @@ def get_sustainability_scores(query, destinations):
                 result.append({
                     'city': city,
                     'month': 'No data available',
-                    's-fairness': 'No data available'
                 })
                 break
@@ -139,14 +146,15 @@ def get_sustainability_scores(query, destinations):
             result.append({
                 'city': city,
                 'month': min_score['month'],
-                's-fairness': min_score['s-fairness']
             })
     logger.info("Returning s-fairness results.")
     return result
-def get_cities(context):
     """
     Only to be used for testing! Function that returns a list of cities with their s-fairness scores, provided the retrieved context
@@ -184,9 +192,8 @@ def get_cities(context):
         return recommended_cities
-def get_context(query, **params):
     """
     Function that returns all the context: from the database, as well as the respective s-fairness scores for the
     destinations. The default does not consider S-Fairness scores, i.e. to append sustainability scores, a non-zero
     parameter "sustainability" needs to be explicitly passed to params.
@@ -210,12 +217,13 @@ def get_context(query, **params):
     recommended_cities = wikivoyage_context.keys()
     if 'sustainability' in params and params['sustainability']:
-        s_fairness_scores = get_sustainability_scores(query, recommended_cities)
         for score in s_fairness_scores:
             wikivoyage_context[score['city']]['sustainability'] = {
                 'month': score['month'],
-                's-fairness': score['s-fairness']
             }
     return wikivoyage_context
@@ -225,11 +233,11 @@ def test():
     queries = []
     query = "Suggest some places to visit during winter. I like hiking, nature and the mountains and I enjoy skiing " \
             "in winter. "
     context = None
     try:
-        context = get_context(query, sustainability=1)
         # cities = get_cities(context)
         # print(cities)
     except FileNotFoundError as e:

 logger = logging.getLogger(__name__)
 logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
+from src.helpers.data_loaders import load_scores
 def get_travel_months(query):
     """
     return results
+def get_sustainability_scores(starting_point: str , query: str, destinations: list):
     """
     Function to get the s-fairness scores for each destination for the given month (or the ideal month of travel if the user hasn't provided a month).
     months = get_travel_months(query)
     logger.info("Finished parsing query for months.")
+    popularity_data = load_scores("popularity")
+    seasonality_data = load_scores("seasonality")
+    emissions_data = load_scores("emissions")
+    data = [popularity_data, seasonality_data, emissions_data]
     for city in destinations:
         if city not in city_scores:
             city_scores[city] = []
         if not months:  # no month(s) or seasons provided by the user
+            city_scores[city].append(s_fairness.compute_sfairness_score(data, starting_point, city))
         else:
             for month in months:
+                city_scores[city].append(s_fairness.compute_sfairness_score(data, city, month))
     logger.info("Finished getting s-fairness scores.")
                 result.append({
                     'city': city,
                     'month': 'No data available',
+                    's-fairness': 'No data available',
+                    'mode': 'No data available'
                 })
                 break
             result.append({
                 'city': city,
                 'month': min_score['month'],
+                's-fairness': min_score['s-fairness'],
+                'mode': min_score['mode'],
             })
     logger.info("Returning s-fairness results.")
     return result
+def get_cities(context: dict):
     """
     Only to be used for testing! Function that returns a list of cities with their s-fairness scores, provided the retrieved context
         return recommended_cities
+def get_context(starting_point: str, query: str, **params):
     """
     Function that returns all the context: from the database, as well as the respective s-fairness scores for the
     destinations. The default does not consider S-Fairness scores, i.e. to append sustainability scores, a non-zero
     parameter "sustainability" needs to be explicitly passed to params.
     recommended_cities = wikivoyage_context.keys()
     if 'sustainability' in params and params['sustainability']:
+        s_fairness_scores = get_sustainability_scores(starting_point, query, recommended_cities)
         for score in s_fairness_scores:
             wikivoyage_context[score['city']]['sustainability'] = {
                 'month': score['month'],
+                's-fairness': score['s-fairness'],
+                'transport': score['mode']
             }
     return wikivoyage_context
     queries = []
     query = "Suggest some places to visit during winter. I like hiking, nature and the mountains and I enjoy skiing " \
             "in winter. "
+    starting_point = "Munich"
     context = None
     try:
+        context = get_context(starting_point, query, sustainability=1)
         # cities = get_cities(context)
         # print(cities)
     except FileNotFoundError as e:

src/pipeline.py CHANGED Viewed

@@ -37,7 +37,7 @@ MODELS = {
 }
-def pipeline(query: str, model_name: str, test: int = 0, **params):
     """
     Executes the entire RAG pipeline, provided the query and model class name.
@@ -73,7 +73,7 @@ def pipeline(query: str, model_name: str, test: int = 0, **params):
     logger.info("Retrieving context..")
     try:
-        context = ir.get_context(query=query, **context_params)
         if test:
             retrieved_cities = ir.get_cities(context)
         else:

 }
+def pipeline(starting_point: str, query: str, model_name: str, test: int = 0, **params):
     """
     Executes the entire RAG pipeline, provided the query and model class name.
     logger.info("Retrieving context..")
     try:
+        context = ir.get_context(starting_point=starting_point, query=query, **context_params)
         if test:
             retrieved_cities = ir.get_cities(context)
         else:

src/sustainability/s_fairness.py CHANGED Viewed

@@ -1,100 +1,126 @@
 import sys
 import os
 import pandas as pd
-import numpy as np
 import logging
 logger = logging.getLogger(__name__)
 logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.dirname(SCRIPT_DIR))
-from data_directories import *
-def get_popularity(destination):
     """
-    Returns the popularity score for a particular destination.
-    Args:
-        - destination: str
     """
-    parent_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-    if "src" in os.getcwd() and os.path.exists(os.path.join(parent_path, "european-city-data")):
-        popularity_path = popularity_dir.replace("../../", "../")
     else:
-        popularity_path = popularity_dir
-    popularity_df = pd.read_csv(popularity_path + "popularity_scores.csv")
-    if not len(popularity_df[popularity_df['city'] == destination]):
-        print(f"{destination} does not have popularity data")
-        return None
-    return popularity_df[popularity_df['city'] == destination]['weighted_pop_score'].item()
-def get_seasonality(destination, month=None):
     """
-    Returns the seasonality score for a particular destination for a particular month. If no month is provided then
     the best month, i.e. month of lowest seasonality is returned.
     Args:
         - destination: str
         - month: str (default: None)
     """
-    parent_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
-    if "src" in os.getcwd() and os.path.exists(os.path.join(parent_path, "european-city-data")):
-        seasonality_path = seasonality_dir.replace("../../", "../")
-    else:
-        seasonality_path = seasonality_dir
-    seasonality_df = pd.read_csv(seasonality_path + "seasonality_scores.csv")
-    # Check if city is present in dataframe
-    if not len(seasonality_df[seasonality_df['city'] == destination]):
-        logger.info(f"{destination} does not have seasonality data for {month}")
         return None, None
-    if month:
-        m = month.capitalize()[:3]
-    else:
-        seasonality_df['lowest_col'] = seasonality_df.loc[:, seasonality_df.columns != 'city'].idxmin(axis="columns")
-        m = seasonality_df[seasonality_df['city'] == destination]['lowest_col'].item()
-    # print(destination, m, seasonality_df[seasonality_df['city'] == destination][m])
-    return m, seasonality_df[seasonality_df['city'] == destination][m].item()
-def compute_sfairness_score(destination, month=None):
     """
     Returns the s-fairness score for a particular destination city and (optional) month. If the destination doesn't
     have popularity or seasonality scores, then the function returns None.
     Args:
         - destination: str
         - month: str (default: None)
     """
-    seasonality = get_seasonality(destination, month)
-    month = seasonality[0]
-    popularity = get_popularity(destination)
-    emissions = 0
     # RECHECK
-    if seasonality[1] is not None and popularity is not None:
-        s_fairness = round(0.281 * emissions + 0.334 * popularity + 0.385 * seasonality[1], 3)
         return {
             'month': month,
             's-fairness': s_fairness
         }
     # elif popularity is not None: # => seasonality is None
@@ -106,9 +132,19 @@ def compute_sfairness_score(destination, month=None):
     else:
         return {
             'month': None,
             's-fairness': None
         }
 if __name__ == "__main__":
-    print(compute_sfairness_score("Paris", "Oct"))

 import sys
 import os
+from typing import Optional, Dict, Any
 import pandas as pd
 import logging
+from dotenv import load_dotenv
+load_dotenv()
 logger = logging.getLogger(__name__)
 logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.dirname(SCRIPT_DIR))
+def get_emission_scores(emissions_df: pd.DataFrame, starting_point: str, destination: str, ):
     """
+    Returns the emission score for the connection with least co2e between two cities.
+    :param emissions_df:
+    :param starting_point:
+    :param destination:
+    :return:
     """
+    df = emissions_df.loc[(emissions_df["city_1"] == starting_point) & (emissions_df["city_2"] == destination)]
+    if len(df) == 0:
+        logger.info(f"Connection not found between {starting_point} and {destination}")
+        return 0, None
+    df.loc[:, 'min_co2e'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].min(axis=1)
+    df.loc[:, 'min_co2e_colname'] = df[['fly_co2e_kg', 'drive_co2e_kg', 'train_co2e_kg']].idxmin(axis=1)
+    min_co2e = df.min_co2e.values[0]
+    mode_prefix = (df.min_co2e_colname.values[0]).split("_")[0]
+    min_cost = df[mode_prefix + "_cost_EUR"].values[0]
+    if mode_prefix == "train":
+        min_travel_time = df[mode_prefix + "_time_mins"].values[0] / 60
     else:
+        min_travel_time = df[mode_prefix + "_time_hrs"].values[0]
+    emission_score = 0.352 * min_travel_time + 0.218 * min_co2e + 0.431 * min_cost
+    return emission_score, mode_prefix
+def _check_city_present(df: pd.DataFrame, starting_point: Optional[str] = None, destination: str = "",
+                        category: str = "popularity"):
+    if category == "emissions":
+        if not ((df['city_1'] == starting_point) & (df['city_2'] == destination)).any():
+            return False
+        else:
+            return True
+    if not len(df[df['city'] == destination]):
+        return False
+    return True
+def get_scores(df: pd.DataFrame, starting_point: Optional[str] = None, destination="",
+               month: Optional[str] = None, category: str = "popularity"):
     """
+    Returns the seasonality/popularity score for a particular destination.
+    Seasonality is calculated for a particular month, while popularity is year-round.
+    If no month is provided then
     the best month, i.e. month of lowest seasonality is returned.
     Args:
         - destination: str
         - month: str (default: None)
+        - category: str (default: "popularity")
     """
+    # Check if city is present in dataframe
+    if not _check_city_present(df, starting_point, destination, category):
+        logger.info(f"{destination} does not have {category} data")
         return None, None
+    match category:
+        case "popularity":
+            return df[df['city'] == destination]['weighted_pop_score'].item()
+        case "seasonality":
+            dest_df = df.loc[df['city'] == destination]
+            if month:
+                m = month.capitalize()[:3]
+            else:
+                dest_df['lowest_col'] = dest_df.loc[:, dest_df.columns != 'city'].idxmin(axis="columns")
+                m = dest_df[dest_df['city'] == destination]['lowest_col'].item()
+            return m, dest_df[dest_df['city'] == destination][m].item()
+        case "emissions":
+            emissions = get_emission_scores(df, starting_point, destination)
+            return emissions
+def compute_sfairness_score(data: list[pd.DataFrame],
+                            starting_point: str, destination: str,
+                            month: Optional[str] = None) -> dict[str, Any] | dict[str, None]:
     """
     Returns the s-fairness score for a particular destination city and (optional) month. If the destination doesn't
     have popularity or seasonality scores, then the function returns None.
     Args:
+        - data: list[pd.DataFrame]
+        - starting_point: str
         - destination: str
         - month: str (default: None)
     """
+    popularity_score = get_scores(df=data[0],
+                                  starting_point=None,
+                                  destination=destination, month=None, category="popularity")
+    month, seasonality_score = get_scores(df=data[1],
+                                          starting_point=None, destination=destination,
+                                          month=month, category="seasonality")
+    emission_score, mode = get_scores(df=data[2],
+                                      starting_point=starting_point, destination=destination, category="emissions")
+    if emission_score is None:
+        emission_score = 0
     # RECHECK
+    if seasonality_score is not None and popularity_score is not None:
+        s_fairness = round(0.281 * emission_score + 0.334 * popularity_score + 0.385 * seasonality_score, 3)
         return {
             'month': month,
+            'mode': mode,  # 'fly', 'drive', 'train'
             's-fairness': s_fairness
         }
     # elif popularity is not None: # => seasonality is None
     else:
         return {
             'month': None,
+            'mode': None,  # 'fly', 'drive', 'train'
             's-fairness': None
         }
+def test():
+    popularity_data = load_data("popularity")
+    seasonality_data = load_data("seasonality")
+    emissions_data = load_data("emissions")
+    data = [popularity_data, seasonality_data, emissions_data]
+    print(compute_sfairness_score(data=data, starting_point="Munich", destination="Dijon"))
+    print(compute_sfairness_score(data=data, starting_point="Munich", destination="Strasbourg", month="Dec"))
 if __name__ == "__main__":
+    test()

src/text_generation/vertexai_setup.py CHANGED Viewed

@@ -21,7 +21,7 @@ def decode_service_key():
 def initialize_vertexai_params(location: Optional[str] = "us-central1"):
-    creds_file_name = os.getcwd() + "/.config/application_default_credentials.json"
     print(creds_file_name)
     if not os.path.exists(os.path.dirname(creds_file_name)):
         credentials = decode_service_key()

 def initialize_vertexai_params(location: Optional[str] = "us-central1"):
+    creds_file_name = os.getcwd() + "/.config/gcp_default_credentials.json"
     print(creds_file_name)
     if not os.path.exists(os.path.dirname(creds_file_name)):
         credentials = decode_service_key()