m-ric HF staff commited on
Commit
ec6bf88
·
verified ·
1 Parent(s): ce66c61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -13
app.py CHANGED
@@ -25,32 +25,37 @@ from examples import (
25
  repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
26
  llm_client = InferenceClient(model=repo_id, timeout=180)
27
 
 
28
 
29
  def generate_key_points(text):
30
  prompt = f"""
31
- Please generate a set of key geographical points for the following description: {text}, as a json list of less than 10 dictionnaries with the following keys: 'name', 'description'. Precise the full location in the 'name' if there is a possible ambiguity.
32
- Generally try to minimze the distance between locations. Always think of the transportation means that you want to use, and the timing: morning, afternoon, where to sleep.
33
- Only generate a 'Thought:' and a 'Key points:' sections, nothing else.
 
 
34
 
35
  For instance:
36
  Description: {description_sf}
37
  Thought: {output_example_sf}
 
38
 
39
  Description: {description_loire}
40
  Thought: {output_example_loire}
 
41
 
42
  Now begin. You can make the descriptions a bit more verbose than in the examples.
43
 
44
  Description: {text}
45
  Thought:
46
  """
47
- return llm_client.text_generation(prompt, max_new_tokens=2000, stream=True)
48
 
49
 
50
  def parse_llm_output(output):
51
  rationale = "Thought: " + output.split("Key points:")[0]
52
  key_points = output.split("Key points:")[1]
53
- output = key_points.replace(" ", "")
54
  parsed_output = literal_eval(output)
55
  dataframe = pd.DataFrame.from_dict(parsed_output)
56
  return dataframe, rationale
@@ -67,17 +72,41 @@ class AsyncLRUCache:
67
  self.cache.move_to_end(key)
68
  return self.cache[key]
69
 
70
- async def set(self, key, value):
 
 
 
71
  if key in self.cache:
72
  self.cache.move_to_end(key)
73
  self.cache[key] = value
74
  if len(self.cache) > self.maxsize:
75
  self.cache.popitem(last=False)
76
 
77
-
78
  # Instantiate the cache
79
  cache = AsyncLRUCache(maxsize=500)
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  async def geocode_address(address):
82
  # Check if the result is in cache
83
  cached_location = await cache.get(address)
@@ -91,10 +120,12 @@ async def geocode_address(address):
91
  ) as geolocator:
92
  location = await geolocator.geocode(address, timeout=10)
93
  if location:
 
94
  # Save the result in cache for future use
95
- await cache.set(address, location)
96
- return location
97
-
 
98
  async def ageocode_addresses(addresses):
99
  tasks = [geocode_address(address) for address in addresses]
100
  locations = await asyncio.gather(*tasks)
@@ -107,9 +138,9 @@ def geocode_addresses(addresses):
107
 
108
 
109
  def create_map_from_markers(dataframe):
110
- locations = geocode_addresses(dataframe["name"])
111
- dataframe["lat"] = [location.latitude if location else None for location in locations]
112
- dataframe["lon"] = [location.longitude if location else None for location in locations]
113
 
114
  f_map = Map(
115
  location=[dataframe["lat"].mean(), dataframe["lon"].mean()],
 
25
  repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
26
  llm_client = InferenceClient(model=repo_id, timeout=180)
27
 
28
+ end_sequence = "I hope that helps!"
29
 
30
  def generate_key_points(text):
31
  prompt = f"""
32
+ Please generate a set of key geographical points for the following description: {text}, as a json list of less than 10 dictionnaries with the following keys: 'name', 'description'.
33
+ Precise the full location in the 'name' if there is a possible ambiguity: for instance given that there are Chinatowns in several US cities, give the city name to disambiguate.
34
+ Generally try to minimize the distance between locations. Always think of the transportation means that you want to use, and the timing: morning, afternoon, where to sleep.
35
+ Only generate two sections: 'Thought:' provides your rationale for generating the points, then you list the locations in 'Key points:'.
36
+ Then generate '{end_sequence}' to indicate the end of the response.
37
 
38
  For instance:
39
  Description: {description_sf}
40
  Thought: {output_example_sf}
41
+ {end_sequence}
42
 
43
  Description: {description_loire}
44
  Thought: {output_example_loire}
45
+ {end_sequence}
46
 
47
  Now begin. You can make the descriptions a bit more verbose than in the examples.
48
 
49
  Description: {text}
50
  Thought:
51
  """
52
+ return llm_client.text_generation(prompt, max_new_tokens=2000, stream=True, stop_sequences=[end_sequence])
53
 
54
 
55
  def parse_llm_output(output):
56
  rationale = "Thought: " + output.split("Key points:")[0]
57
  key_points = output.split("Key points:")[1]
58
+ output = key_points.replace(" ", "").replace(end_sequence, "").strip()
59
  parsed_output = literal_eval(output)
60
  dataframe = pd.DataFrame.from_dict(parsed_output)
61
  return dataframe, rationale
 
72
  self.cache.move_to_end(key)
73
  return self.cache[key]
74
 
75
+ async def aset(self, key, value):
76
+ self.set(key, value)
77
+
78
+ def set(self, key, value):
79
  if key in self.cache:
80
  self.cache.move_to_end(key)
81
  self.cache[key] = value
82
  if len(self.cache) > self.maxsize:
83
  self.cache.popitem(last=False)
84
 
 
85
  # Instantiate the cache
86
  cache = AsyncLRUCache(maxsize=500)
87
 
88
+ preset_values = {
89
+ "Fisherman's Wharf, San Francisco": {'lat': 37.808332, 'lon': -122.415715},
90
+ 'Ghirardelli Square, San Francisco': {'lat': 37.80587075, 'lon': -122.42294914207058},
91
+ 'Cable Car Museum, San Francisco': {'lat': 37.79476015, 'lon': -122.41185284314184},
92
+ 'Union Square, San Francisco': {'lat': 37.7875138, 'lon': -122.407159},
93
+ 'Chinatown, San Francisco': {'lat': 37.7943011, 'lon': -122.4063757},
94
+ 'Coit Tower, San Francisco': {'lat': 37.80237905, 'lon': -122.40583435461313},
95
+ 'Chinatown, San Francisco, California': {'lat': 37.7943011, 'lon': -122.4063757},
96
+ 'Chinatown, New York City, New York': {'lat': 40.7164913, 'lon': -73.9962504},
97
+ 'Chinatown, Los Angeles, California': {'lat': 34.0638402, 'lon': -118.2358676},
98
+ 'Chinatown, Philadelphia, Pennsylvania': {'lat': 39.9534461, 'lon': -75.1546218},
99
+ 'Chinatown, Chicago, Illinois': {'lat': 41.8516579, 'lon': -87.6331383},
100
+ 'Chinatown, Boston, Massachusetts': {'lat': 42.3513291, 'lon': -71.0626228},
101
+ 'Chinatown, Honolulu, Hawaii': {'lat': 21.3129031, 'lon': -157.8628003},
102
+ 'Chinatown, Seattle, Washington': {'lat': 47.5980601, 'lon': -122.3245246},
103
+ 'Chinatown, Portland, Oregon': {'lat': 45.5251092, 'lon': -122.6744481},
104
+ 'Chinatown, Las Vegas, Nevada': {'lat': 36.2823279, 'lon': -115.3310655}
105
+ }
106
+ for key, value in preset_values.items():
107
+ cache.set(key, value)
108
+
109
+
110
  async def geocode_address(address):
111
  # Check if the result is in cache
112
  cached_location = await cache.get(address)
 
120
  ) as geolocator:
121
  location = await geolocator.geocode(address, timeout=10)
122
  if location:
123
+ coords = {'lat': location.latitude, "lon": location.longitude}
124
  # Save the result in cache for future use
125
+ await cache.aset(address, coords)
126
+ return coords
127
+ return None
128
+
129
  async def ageocode_addresses(addresses):
130
  tasks = [geocode_address(address) for address in addresses]
131
  locations = await asyncio.gather(*tasks)
 
138
 
139
 
140
  def create_map_from_markers(dataframe):
141
+ coordinates = geocode_addresses(dataframe["name"])
142
+ dataframe["lat"] = [coords['lat'] if coords else None for coords in coordinates]
143
+ dataframe["lon"] = [coords['lon'] if coords else None for coords in coordinates]
144
 
145
  f_map = Map(
146
  location=[dataframe["lat"].mean(), dataframe["lon"].mean()],