File size: 1,242 Bytes
c61bdc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
================================================
@author: Jaron
@time: 2024/08/21 17:51:45
@email: [email protected]
@description:
================================================
"""
from transformers import PretrainedConfig


class VideoCCAMConfig(PretrainedConfig):
    model_type = 'videoccam'
    _auto_class = 'AutoConfig'

    def __init__(
        self,
        llm_name_or_path: str = None,
        projector_name_or_path: str = None,
        vision_encoder_name_or_path: str = None,
        image_token: str = '<image>',
        video_token: str = '<video>',
        vision_select_layer: int = -2,
        vision_max_chunk_size: int = 0,
        _attn_implementation: str = 'flash_attention_2',
        **kwargs
    ):
        super().__init__(**kwargs)
        self.llm_name_or_path = llm_name_or_path
        self.projector_name_or_path = projector_name_or_path
        self.vision_encoder_name_or_path = vision_encoder_name_or_path
        self.image_token = image_token
        self.video_token = video_token
        self.vision_select_layer = vision_select_layer
        self.vision_max_chunk_size = vision_max_chunk_size
        self._attn_implementation = _attn_implementation