Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -25,32 +25,37 @@ from examples import (
|
|
25 |
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
26 |
llm_client = InferenceClient(model=repo_id, timeout=180)
|
27 |
|
|
|
28 |
|
29 |
def generate_key_points(text):
|
30 |
prompt = f"""
|
31 |
-
Please generate a set of key geographical points for the following description: {text}, as a json list of less than 10 dictionnaries with the following keys: 'name', 'description'.
|
32 |
-
|
33 |
-
|
|
|
|
|
34 |
|
35 |
For instance:
|
36 |
Description: {description_sf}
|
37 |
Thought: {output_example_sf}
|
|
|
38 |
|
39 |
Description: {description_loire}
|
40 |
Thought: {output_example_loire}
|
|
|
41 |
|
42 |
Now begin. You can make the descriptions a bit more verbose than in the examples.
|
43 |
|
44 |
Description: {text}
|
45 |
Thought:
|
46 |
"""
|
47 |
-
return llm_client.text_generation(prompt, max_new_tokens=2000, stream=True)
|
48 |
|
49 |
|
50 |
def parse_llm_output(output):
|
51 |
rationale = "Thought: " + output.split("Key points:")[0]
|
52 |
key_points = output.split("Key points:")[1]
|
53 |
-
output = key_points.replace(" ", "")
|
54 |
parsed_output = literal_eval(output)
|
55 |
dataframe = pd.DataFrame.from_dict(parsed_output)
|
56 |
return dataframe, rationale
|
@@ -67,17 +72,41 @@ class AsyncLRUCache:
|
|
67 |
self.cache.move_to_end(key)
|
68 |
return self.cache[key]
|
69 |
|
70 |
-
async def
|
|
|
|
|
|
|
71 |
if key in self.cache:
|
72 |
self.cache.move_to_end(key)
|
73 |
self.cache[key] = value
|
74 |
if len(self.cache) > self.maxsize:
|
75 |
self.cache.popitem(last=False)
|
76 |
|
77 |
-
|
78 |
# Instantiate the cache
|
79 |
cache = AsyncLRUCache(maxsize=500)
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
async def geocode_address(address):
|
82 |
# Check if the result is in cache
|
83 |
cached_location = await cache.get(address)
|
@@ -91,10 +120,12 @@ async def geocode_address(address):
|
|
91 |
) as geolocator:
|
92 |
location = await geolocator.geocode(address, timeout=10)
|
93 |
if location:
|
|
|
94 |
# Save the result in cache for future use
|
95 |
-
await cache.
|
96 |
-
|
97 |
-
|
|
|
98 |
async def ageocode_addresses(addresses):
|
99 |
tasks = [geocode_address(address) for address in addresses]
|
100 |
locations = await asyncio.gather(*tasks)
|
@@ -107,9 +138,9 @@ def geocode_addresses(addresses):
|
|
107 |
|
108 |
|
109 |
def create_map_from_markers(dataframe):
|
110 |
-
|
111 |
-
dataframe["lat"] = [
|
112 |
-
dataframe["lon"] = [
|
113 |
|
114 |
f_map = Map(
|
115 |
location=[dataframe["lat"].mean(), dataframe["lon"].mean()],
|
|
|
25 |
repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
26 |
llm_client = InferenceClient(model=repo_id, timeout=180)
|
27 |
|
28 |
+
end_sequence = "I hope that helps!"
|
29 |
|
30 |
def generate_key_points(text):
|
31 |
prompt = f"""
|
32 |
+
Please generate a set of key geographical points for the following description: {text}, as a json list of less than 10 dictionnaries with the following keys: 'name', 'description'.
|
33 |
+
Precise the full location in the 'name' if there is a possible ambiguity: for instance given that there are Chinatowns in several US cities, give the city name to disambiguate.
|
34 |
+
Generally try to minimize the distance between locations. Always think of the transportation means that you want to use, and the timing: morning, afternoon, where to sleep.
|
35 |
+
Only generate two sections: 'Thought:' provides your rationale for generating the points, then you list the locations in 'Key points:'.
|
36 |
+
Then generate '{end_sequence}' to indicate the end of the response.
|
37 |
|
38 |
For instance:
|
39 |
Description: {description_sf}
|
40 |
Thought: {output_example_sf}
|
41 |
+
{end_sequence}
|
42 |
|
43 |
Description: {description_loire}
|
44 |
Thought: {output_example_loire}
|
45 |
+
{end_sequence}
|
46 |
|
47 |
Now begin. You can make the descriptions a bit more verbose than in the examples.
|
48 |
|
49 |
Description: {text}
|
50 |
Thought:
|
51 |
"""
|
52 |
+
return llm_client.text_generation(prompt, max_new_tokens=2000, stream=True, stop_sequences=[end_sequence])
|
53 |
|
54 |
|
55 |
def parse_llm_output(output):
|
56 |
rationale = "Thought: " + output.split("Key points:")[0]
|
57 |
key_points = output.split("Key points:")[1]
|
58 |
+
output = key_points.replace(" ", "").replace(end_sequence, "").strip()
|
59 |
parsed_output = literal_eval(output)
|
60 |
dataframe = pd.DataFrame.from_dict(parsed_output)
|
61 |
return dataframe, rationale
|
|
|
72 |
self.cache.move_to_end(key)
|
73 |
return self.cache[key]
|
74 |
|
75 |
+
async def aset(self, key, value):
|
76 |
+
self.set(key, value)
|
77 |
+
|
78 |
+
def set(self, key, value):
|
79 |
if key in self.cache:
|
80 |
self.cache.move_to_end(key)
|
81 |
self.cache[key] = value
|
82 |
if len(self.cache) > self.maxsize:
|
83 |
self.cache.popitem(last=False)
|
84 |
|
|
|
85 |
# Instantiate the cache
|
86 |
cache = AsyncLRUCache(maxsize=500)
|
87 |
|
88 |
+
preset_values = {
|
89 |
+
"Fisherman's Wharf, San Francisco": {'lat': 37.808332, 'lon': -122.415715},
|
90 |
+
'Ghirardelli Square, San Francisco': {'lat': 37.80587075, 'lon': -122.42294914207058},
|
91 |
+
'Cable Car Museum, San Francisco': {'lat': 37.79476015, 'lon': -122.41185284314184},
|
92 |
+
'Union Square, San Francisco': {'lat': 37.7875138, 'lon': -122.407159},
|
93 |
+
'Chinatown, San Francisco': {'lat': 37.7943011, 'lon': -122.4063757},
|
94 |
+
'Coit Tower, San Francisco': {'lat': 37.80237905, 'lon': -122.40583435461313},
|
95 |
+
'Chinatown, San Francisco, California': {'lat': 37.7943011, 'lon': -122.4063757},
|
96 |
+
'Chinatown, New York City, New York': {'lat': 40.7164913, 'lon': -73.9962504},
|
97 |
+
'Chinatown, Los Angeles, California': {'lat': 34.0638402, 'lon': -118.2358676},
|
98 |
+
'Chinatown, Philadelphia, Pennsylvania': {'lat': 39.9534461, 'lon': -75.1546218},
|
99 |
+
'Chinatown, Chicago, Illinois': {'lat': 41.8516579, 'lon': -87.6331383},
|
100 |
+
'Chinatown, Boston, Massachusetts': {'lat': 42.3513291, 'lon': -71.0626228},
|
101 |
+
'Chinatown, Honolulu, Hawaii': {'lat': 21.3129031, 'lon': -157.8628003},
|
102 |
+
'Chinatown, Seattle, Washington': {'lat': 47.5980601, 'lon': -122.3245246},
|
103 |
+
'Chinatown, Portland, Oregon': {'lat': 45.5251092, 'lon': -122.6744481},
|
104 |
+
'Chinatown, Las Vegas, Nevada': {'lat': 36.2823279, 'lon': -115.3310655}
|
105 |
+
}
|
106 |
+
for key, value in preset_values.items():
|
107 |
+
cache.set(key, value)
|
108 |
+
|
109 |
+
|
110 |
async def geocode_address(address):
|
111 |
# Check if the result is in cache
|
112 |
cached_location = await cache.get(address)
|
|
|
120 |
) as geolocator:
|
121 |
location = await geolocator.geocode(address, timeout=10)
|
122 |
if location:
|
123 |
+
coords = {'lat': location.latitude, "lon": location.longitude}
|
124 |
# Save the result in cache for future use
|
125 |
+
await cache.aset(address, coords)
|
126 |
+
return coords
|
127 |
+
return None
|
128 |
+
|
129 |
async def ageocode_addresses(addresses):
|
130 |
tasks = [geocode_address(address) for address in addresses]
|
131 |
locations = await asyncio.gather(*tasks)
|
|
|
138 |
|
139 |
|
140 |
def create_map_from_markers(dataframe):
|
141 |
+
coordinates = geocode_addresses(dataframe["name"])
|
142 |
+
dataframe["lat"] = [coords['lat'] if coords else None for coords in coordinates]
|
143 |
+
dataframe["lon"] = [coords['lon'] if coords else None for coords in coordinates]
|
144 |
|
145 |
f_map = Map(
|
146 |
location=[dataframe["lat"].mean(), dataframe["lon"].mean()],
|