Fixed a minor bug with region OCR always returning the first label with an erronious </s>
Browse filesThe raw output of florence here is:
tensor([[ 2, 0, 8108, 500, 50528, 50486, 50736, 50479, 50739, 50592,
50532, 50600, 2]], device='cuda:0')
The 2 token is </s>. Without this change, florence does not remove this in the OCR with regions case which results in the first label always having an extra </s> E.G.:
'labels': ['</s>SSR']}}
- processing_florence2.py +1 -0
processing_florence2.py
CHANGED
@@ -722,6 +722,7 @@ class Florence2PostProcesser(object):
|
|
722 |
bboxes = []
|
723 |
labels = []
|
724 |
text = text.replace('<s>', '')
|
|
|
725 |
# ocr with regions
|
726 |
parsed = re.findall(pattern, text)
|
727 |
instances = []
|
|
|
722 |
bboxes = []
|
723 |
labels = []
|
724 |
text = text.replace('<s>', '')
|
725 |
+
text = text.replace('</s>', '')
|
726 |
# ocr with regions
|
727 |
parsed = re.findall(pattern, text)
|
728 |
instances = []
|