Spaces:
Running
Running
phyloforfun
commited on
Commit
•
bd72568
1
Parent(s):
28ebe52
add mammal prompt, fix bug
Browse files
api_cost/api_cost.yaml
CHANGED
@@ -102,6 +102,10 @@ MISTRAL_SMALL:
|
|
102 |
################
|
103 |
# Local Models
|
104 |
################
|
|
|
|
|
|
|
|
|
105 |
LOCAL_MIXTRAL_8X7B_INSTRUCT_V01:
|
106 |
in: 0.0
|
107 |
out: 0.0
|
@@ -113,4 +117,4 @@ LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF:
|
|
113 |
out: 0.0
|
114 |
phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05:
|
115 |
in: 0.0
|
116 |
-
out: 0.0
|
|
|
102 |
################
|
103 |
# Local Models
|
104 |
################
|
105 |
+
# mistralai/Mistral-Nemo-Instruct-2407
|
106 |
+
LOCAL_MISTRAL_NEMO_INSTRUCT_2407:
|
107 |
+
in: 0.0
|
108 |
+
out: 0.0
|
109 |
LOCAL_MIXTRAL_8X7B_INSTRUCT_V01:
|
110 |
in: 0.0
|
111 |
out: 0.0
|
|
|
117 |
out: 0.0
|
118 |
phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05:
|
119 |
in: 0.0
|
120 |
+
out: 0.0
|
app.py
CHANGED
@@ -2226,13 +2226,13 @@ def content_collage_overlay():
|
|
2226 |
# Set the options for the radio button with corresponding indices
|
2227 |
# Set the options for the transcription method radio button
|
2228 |
options = {
|
2229 |
-
0: "Use
|
2230 |
-
1: "Use
|
2231 |
2: "Use specimen collage for transcriptions"
|
2232 |
}
|
2233 |
|
2234 |
# Determine the default index based on the current configuration
|
2235 |
-
default_index = st.session_state.config['leafmachine'].get('use_RGB_label_images',
|
2236 |
|
2237 |
# Create the radio button for transcription method selection
|
2238 |
selected_option = st.radio(
|
|
|
2226 |
# Set the options for the radio button with corresponding indices
|
2227 |
# Set the options for the transcription method radio button
|
2228 |
options = {
|
2229 |
+
0: "Use original images for transcriptions",
|
2230 |
+
1: "Use LeafMachine2 label collage for transcriptions",
|
2231 |
2: "Use specimen collage for transcriptions"
|
2232 |
}
|
2233 |
|
2234 |
# Determine the default index based on the current configuration
|
2235 |
+
default_index = st.session_state.config['leafmachine'].get('use_RGB_label_images', 1)
|
2236 |
|
2237 |
# Create the radio button for transcription method selection
|
2238 |
selected_option = st.radio(
|
pages/prompt_builder.py
CHANGED
@@ -19,6 +19,20 @@ def create_download_button_yaml(file_path, selected_yaml_file, key_val):
|
|
19 |
)
|
20 |
|
21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
def upload_local_prompt_to_server(dir_prompt):
|
23 |
uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
24 |
if uploaded_file is not None:
|
@@ -31,10 +45,14 @@ def upload_local_prompt_to_server(dir_prompt):
|
|
31 |
with open(file_path, 'wb') as f:
|
32 |
f.write(uploaded_file.getbuffer())
|
33 |
st.success(f"Saved file {file_name} in {dir_prompt}")
|
|
|
|
|
|
|
34 |
else:
|
35 |
st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
|
36 |
|
37 |
|
|
|
38 |
def save_prompt_yaml(filename, col):
|
39 |
yaml_content = {
|
40 |
'prompt_author': st.session_state['prompt_author'],
|
@@ -207,6 +225,9 @@ def build_LLM_prompt_config():
|
|
207 |
st.write('##')
|
208 |
create_download_button_yaml(download_file_path, st.session_state['selected_yaml_file'],key_val=345798)
|
209 |
|
|
|
|
|
|
|
210 |
# Prompt Author Information
|
211 |
st.write("---")
|
212 |
st.header("Prompt Author Information")
|
|
|
19 |
)
|
20 |
|
21 |
|
22 |
+
# def upload_local_prompt_to_server(dir_prompt):
|
23 |
+
# uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
24 |
+
# if uploaded_file is not None:
|
25 |
+
# # Check the file extension
|
26 |
+
# file_name = uploaded_file.name
|
27 |
+
# if file_name.endswith('.yaml'):
|
28 |
+
# file_path = os.path.join(dir_prompt, file_name)
|
29 |
+
|
30 |
+
# # Save the file
|
31 |
+
# with open(file_path, 'wb') as f:
|
32 |
+
# f.write(uploaded_file.getbuffer())
|
33 |
+
# st.success(f"Saved file {file_name} in {dir_prompt}")
|
34 |
+
# else:
|
35 |
+
# st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
|
36 |
def upload_local_prompt_to_server(dir_prompt):
|
37 |
uploaded_file = st.file_uploader("Upload a custom prompt file", type=['yaml'])
|
38 |
if uploaded_file is not None:
|
|
|
45 |
with open(file_path, 'wb') as f:
|
46 |
f.write(uploaded_file.getbuffer())
|
47 |
st.success(f"Saved file {file_name} in {dir_prompt}")
|
48 |
+
|
49 |
+
# Update the prompt list
|
50 |
+
st.session_state['yaml_files'] = [f for f in os.listdir(dir_prompt) if f.endswith('.yaml')]
|
51 |
else:
|
52 |
st.error("Please upload a .yaml file that you previously created using this Prompt Builder tool.")
|
53 |
|
54 |
|
55 |
+
|
56 |
def save_prompt_yaml(filename, col):
|
57 |
yaml_content = {
|
58 |
'prompt_author': st.session_state['prompt_author'],
|
|
|
225 |
st.write('##')
|
226 |
create_download_button_yaml(download_file_path, st.session_state['selected_yaml_file'],key_val=345798)
|
227 |
|
228 |
+
|
229 |
+
upload_local_prompt_to_server(dir_prompt)
|
230 |
+
|
231 |
# Prompt Author Information
|
232 |
st.write("---")
|
233 |
st.header("Prompt Author Information")
|
vouchervision/general_utils.py
CHANGED
@@ -1311,12 +1311,12 @@ def create_specimen_collage(cfg, logger, dir_home, Project, Dirs):
|
|
1311 |
|
1312 |
# After processing, delete the original images, leaving only the _collage images
|
1313 |
# This is used just in case the HF version puts them there
|
1314 |
-
for filename in filenames:
|
1315 |
-
|
1316 |
-
|
1317 |
-
|
1318 |
-
|
1319 |
-
|
1320 |
|
1321 |
def crop_component_from_yolo_coords(anno_type, Dirs, analysis, all_detections, full_image, filename, save_per_image, save_per_class, save_list):
|
1322 |
height = analysis['height']
|
|
|
1311 |
|
1312 |
# After processing, delete the original images, leaving only the _collage images
|
1313 |
# This is used just in case the HF version puts them there
|
1314 |
+
# for filename in filenames:
|
1315 |
+
# if not filename.endswith('_collage.jpg'):
|
1316 |
+
# file_path = os.path.join(Dirs.save_original, filename)
|
1317 |
+
# if os.path.exists(file_path):
|
1318 |
+
# os.remove(file_path)
|
1319 |
+
# logger.info(f"Deleted original image: {file_path}")
|
1320 |
|
1321 |
def crop_component_from_yolo_coords(anno_type, Dirs, analysis, all_detections, full_image, filename, save_per_image, save_per_class, save_list):
|
1322 |
height = analysis['height']
|
vouchervision/model_maps.py
CHANGED
@@ -34,6 +34,8 @@ class ModelMaps:
|
|
34 |
|
35 |
'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01': '#000000', # Black
|
36 |
'LOCAL_MISTRAL_7B_INSTRUCT_V02': '#4a4a4a', # Gray
|
|
|
|
|
37 |
|
38 |
'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF': '#bababa', # Gray
|
39 |
|
@@ -78,7 +80,8 @@ class ModelMaps:
|
|
78 |
'Open Mistral 7B',
|
79 |
]
|
80 |
|
81 |
-
MODELS_LOCAL = ['LOCAL
|
|
|
82 |
'LOCAL Mistral 7B Instruct v0.2',
|
83 |
'LOCAL CPU Mistral 7B Instruct v0.2 GGUF',
|
84 |
'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
|
@@ -124,6 +127,7 @@ class ModelMaps:
|
|
124 |
'Open Mixtral 8x7B': 'OPEN_MIXTRAL_8X7B',
|
125 |
'Open Mistral 7B': 'OPEN_MISTRAL_7B',
|
126 |
|
|
|
127 |
'LOCAL Mixtral 8x7B Instruct v0.1': 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01',
|
128 |
'LOCAL Mistral 7B Instruct v0.2': 'LOCAL_MISTRAL_7B_INSTRUCT_V02',
|
129 |
|
@@ -166,6 +170,7 @@ class ModelMaps:
|
|
166 |
'Open Mixtral 8x7B': has_key_mistral,
|
167 |
'Open Mistral 7B': has_key_mistral,
|
168 |
|
|
|
169 |
'LOCAL Mixtral 8x7B Instruct v0.1': True,
|
170 |
'LOCAL Mistral 7B Instruct v0.2': True,
|
171 |
|
@@ -208,6 +213,7 @@ class ModelMaps:
|
|
208 |
'Open Mixtral 8x7B': False,
|
209 |
'Open Mistral 7B': False,
|
210 |
|
|
|
211 |
'LOCAL Mixtral 8x7B Instruct v0.1': False,
|
212 |
'LOCAL Mistral 7B Instruct v0.2': False,
|
213 |
|
@@ -304,11 +310,15 @@ class ModelMaps:
|
|
304 |
|
305 |
|
306 |
### Mistral LOCAL
|
|
|
|
|
|
|
|
|
307 |
elif key == 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01':
|
308 |
return 'Mixtral-8x7B-Instruct-v0.1'
|
309 |
|
310 |
elif key == 'LOCAL_MISTRAL_7B_INSTRUCT_V02':
|
311 |
-
return 'Mistral-7B-Instruct-v0.
|
312 |
|
313 |
### Mistral LOCAL CPU
|
314 |
elif key == 'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF':
|
|
|
34 |
|
35 |
'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01': '#000000', # Black
|
36 |
'LOCAL_MISTRAL_7B_INSTRUCT_V02': '#4a4a4a', # Gray
|
37 |
+
# mistralai/Mistral-Nemo-Instruct-2407
|
38 |
+
'LOCAL_MISTRAL_NEMO_INSTRUCT_2407': '#000000', # Black
|
39 |
|
40 |
'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF': '#bababa', # Gray
|
41 |
|
|
|
80 |
'Open Mistral 7B',
|
81 |
]
|
82 |
|
83 |
+
MODELS_LOCAL = ['LOCAL Mistral Nemo Instruct 2407',
|
84 |
+
'LOCAL Mixtral 8x7B Instruct v0.1',
|
85 |
'LOCAL Mistral 7B Instruct v0.2',
|
86 |
'LOCAL CPU Mistral 7B Instruct v0.2 GGUF',
|
87 |
'phyloforfun/mistral-7b-instruct-v2-bnb-4bit__HLT_MICH_Angiospermae_SLTPvC_v1-0_medium_OCR-C25-L25-E50-R05']
|
|
|
127 |
'Open Mixtral 8x7B': 'OPEN_MIXTRAL_8X7B',
|
128 |
'Open Mistral 7B': 'OPEN_MISTRAL_7B',
|
129 |
|
130 |
+
'LOCAL Mistral Nemo Instruct 2407': 'LOCAL_MISTRAL_NEMO_INSTRUCT_2407',
|
131 |
'LOCAL Mixtral 8x7B Instruct v0.1': 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01',
|
132 |
'LOCAL Mistral 7B Instruct v0.2': 'LOCAL_MISTRAL_7B_INSTRUCT_V02',
|
133 |
|
|
|
170 |
'Open Mixtral 8x7B': has_key_mistral,
|
171 |
'Open Mistral 7B': has_key_mistral,
|
172 |
|
173 |
+
'LOCAL Mistral Nemo Instruct 2407': True,
|
174 |
'LOCAL Mixtral 8x7B Instruct v0.1': True,
|
175 |
'LOCAL Mistral 7B Instruct v0.2': True,
|
176 |
|
|
|
213 |
'Open Mixtral 8x7B': False,
|
214 |
'Open Mistral 7B': False,
|
215 |
|
216 |
+
'LOCAL Mistral Nemo Instruct 2407': False,
|
217 |
'LOCAL Mixtral 8x7B Instruct v0.1': False,
|
218 |
'LOCAL Mistral 7B Instruct v0.2': False,
|
219 |
|
|
|
310 |
|
311 |
|
312 |
### Mistral LOCAL
|
313 |
+
#LOCAL_MISTRAL_NEMO_INSTRUCT_2407 'LOCAL Mistral Nemo Instruct 2407 mistralai/Mistral-Nemo-Instruct-2407
|
314 |
+
elif key == 'LOCAL_MISTRAL_NEMO_INSTRUCT_2407':
|
315 |
+
return 'Mistral-Nemo-Instruct-2407'
|
316 |
+
|
317 |
elif key == 'LOCAL_MIXTRAL_8X7B_INSTRUCT_V01':
|
318 |
return 'Mixtral-8x7B-Instruct-v0.1'
|
319 |
|
320 |
elif key == 'LOCAL_MISTRAL_7B_INSTRUCT_V02':
|
321 |
+
return 'Mistral-7B-Instruct-v0.3'
|
322 |
|
323 |
### Mistral LOCAL CPU
|
324 |
elif key == 'LOCAL_CPU_MISTRAL_7B_INSTRUCT_V02_GGUF':
|
vouchervision/utils_LLM_JSON_validation.py
CHANGED
@@ -12,8 +12,8 @@ def validate_and_align_JSON_keys_with_template(data, JSON_dict_structure):
|
|
12 |
data[key] = ''
|
13 |
elif isinstance(value, str):
|
14 |
if value.lower() in ['unknown','not provided', 'missing', 'na', 'none', 'n/a', 'null', 'unspecified',
|
15 |
-
'TBD',
|
16 |
-
'not provided in the text', 'not found in the text',
|
17 |
'not in the text', 'not provided', 'not found',
|
18 |
'not provided in the ocr', 'not found in the ocr',
|
19 |
'not in the ocr',
|
@@ -29,7 +29,7 @@ def validate_and_align_JSON_keys_with_template(data, JSON_dict_structure):
|
|
29 |
'not in the ocr text',
|
30 |
'Not provided in ocr text',
|
31 |
'not provided in ocr text',
|
32 |
-
'n/a n/a','n/a, n/a',
|
33 |
'n/a, n/a, n/a','n/a n/a, n/a','n/a, n/a n/a','n/a n/a n/a',
|
34 |
'n/a, n/a, n/a, n/a','n/a n/a n/a n/a','n/a n/a, n/a, n/a','n/a, n/a n/a, n/a','n/a, n/a, n/a n/a',
|
35 |
'n/a n/a n/a, n/a','n/a, n/a n/a n/a',
|
|
|
12 |
data[key] = ''
|
13 |
elif isinstance(value, str):
|
14 |
if value.lower() in ['unknown','not provided', 'missing', 'na', 'none', 'n/a', 'null', 'unspecified',
|
15 |
+
'TBD', 'tbd',
|
16 |
+
'not provided in the text', 'not found in the text', 'Not found in OCR text', 'not found in ocr text',
|
17 |
'not in the text', 'not provided', 'not found',
|
18 |
'not provided in the ocr', 'not found in the ocr',
|
19 |
'not in the ocr',
|
|
|
29 |
'not in the ocr text',
|
30 |
'Not provided in ocr text',
|
31 |
'not provided in ocr text',
|
32 |
+
'n/a n/a','n/a, n/a','Not applicable','not applicable',
|
33 |
'n/a, n/a, n/a','n/a n/a, n/a','n/a, n/a n/a','n/a n/a n/a',
|
34 |
'n/a, n/a, n/a, n/a','n/a n/a n/a n/a','n/a n/a, n/a, n/a','n/a, n/a n/a, n/a','n/a, n/a, n/a n/a',
|
35 |
'n/a n/a n/a, n/a','n/a, n/a n/a n/a',
|
vouchervision/utils_VoucherVision.py
CHANGED
@@ -164,7 +164,7 @@ class VoucherVision():
|
|
164 |
|
165 |
|
166 |
def map_dir_labels(self):
|
167 |
-
if self.cfg['leafmachine']['use_RGB_label_images']:
|
168 |
self.dir_labels = os.path.join(self.Dirs.save_per_annotation_class,'label')
|
169 |
else:
|
170 |
self.dir_labels = self.Dirs.save_original
|
@@ -353,7 +353,7 @@ class VoucherVision():
|
|
353 |
elif header.value == "path_to_crop":
|
354 |
sheet.cell(row=next_row, column=i, value=path_to_crop)
|
355 |
elif header.value == "path_to_original":
|
356 |
-
if self.cfg['leafmachine']['use_RGB_label_images']:
|
357 |
fname = os.path.basename(path_to_crop)
|
358 |
base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(path_to_crop))))
|
359 |
path_to_original = os.path.join(base, 'Original_Images', fname)
|
|
|
164 |
|
165 |
|
166 |
def map_dir_labels(self):
|
167 |
+
if self.cfg['leafmachine']['use_RGB_label_images'] in [1,2]:
|
168 |
self.dir_labels = os.path.join(self.Dirs.save_per_annotation_class,'label')
|
169 |
else:
|
170 |
self.dir_labels = self.Dirs.save_original
|
|
|
353 |
elif header.value == "path_to_crop":
|
354 |
sheet.cell(row=next_row, column=i, value=path_to_crop)
|
355 |
elif header.value == "path_to_original":
|
356 |
+
if self.cfg['leafmachine']['use_RGB_label_images'] in [1,2]:
|
357 |
fname = os.path.basename(path_to_crop)
|
358 |
base = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(path_to_crop))))
|
359 |
path_to_original = os.path.join(base, 'Original_Images', fname)
|
vouchervision/utils_VoucherVision_parallel.py
CHANGED
@@ -704,8 +704,8 @@ class VoucherVision():
|
|
704 |
json_report.set_text(text_main='Sending batch to OCR and LLM')
|
705 |
|
706 |
num_files = len(self.img_paths)
|
707 |
-
|
708 |
-
num_threads = 128
|
709 |
counter = AtomicCounter()
|
710 |
|
711 |
# Setup for parallel execution
|
|
|
704 |
json_report.set_text(text_main='Sending batch to OCR and LLM')
|
705 |
|
706 |
num_files = len(self.img_paths)
|
707 |
+
num_threads = min(num_files, 128)
|
708 |
+
# num_threads = 128
|
709 |
counter = AtomicCounter()
|
710 |
|
711 |
# Setup for parallel execution
|