lkhl commited on
Commit
2701e26
·
verified ·
1 Parent(s): 0f3981b

Update processing_videollama3.py

Browse files
Files changed (1) hide show
  1. processing_videollama3.py +9 -8
processing_videollama3.py CHANGED
@@ -682,14 +682,15 @@ class Videollama3Qwen2Processor(ProcessorMixin):
682
  kwargs.pop("padding")
683
  kwargs.pop("padding_side")
684
 
685
- image_idx = 0
686
- while DEFAULT_IMAGE_TOKEN in text:
687
- num_tokens = self._get_visual_seq_len(grid_sizes[image_idx])
688
- text = text.replace(DEFAULT_IMAGE_TOKEN, "<placeholder>" * num_tokens, 1)
689
- image_idx += 1
690
- text = text.replace("<placeholder>", DEFAULT_IMAGE_TOKEN)
691
-
692
- assert len(grid_sizes) == image_idx, "Number of images does not match the number of image tokens in the text."
 
693
 
694
  text_inputs = self.tokenizer(text, **kwargs)
695
  return text_inputs
 
682
  kwargs.pop("padding")
683
  kwargs.pop("padding_side")
684
 
685
+ if len(grid_sizes) > 0:
686
+ image_idx = 0
687
+ while DEFAULT_IMAGE_TOKEN in text:
688
+ num_tokens = self._get_visual_seq_len(grid_sizes[image_idx])
689
+ text = text.replace(DEFAULT_IMAGE_TOKEN, "<placeholder>" * num_tokens, 1)
690
+ image_idx += 1
691
+ text = text.replace("<placeholder>", DEFAULT_IMAGE_TOKEN)
692
+
693
+ assert len(grid_sizes) == image_idx, "Number of images does not match the number of image tokens in the text."
694
 
695
  text_inputs = self.tokenizer(text, **kwargs)
696
  return text_inputs