Markgazol commited on
Commit
710406c
·
verified ·
1 Parent(s): 5f76bcd

Update modeling_colqwenstella.py

Browse files
Files changed (1) hide show
  1. modeling_colqwenstella.py +2 -55
modeling_colqwenstella.py CHANGED
@@ -1431,12 +1431,6 @@ class Qwen2ForSequenceClassification(Qwen2PreTrainedModel):
1431
  )
1432
 
1433
 
1434
- ####################################################################################################################
1435
- ####################################################################################################################
1436
- ####################################################################################################################
1437
- ####################################################################################################################
1438
- ### codes for jasper
1439
- ####################################################################################################################
1440
  from transformers.models.qwen2_vl import Qwen2VLConfig, Qwen2VLForConditionalGeneration
1441
  from transformers import PretrainedConfig
1442
 
@@ -1446,8 +1440,6 @@ class ColStellaVLConfig(PretrainedConfig):
1446
 
1447
  def __init__(
1448
  self,
1449
- # vector_dropout_p: float = 0.2,
1450
- # num_img_tokens: int = 300,
1451
  img_start_token_id: int = 151644,
1452
  img_start_token: str = "<|im_start|>",
1453
  img_token_id: int = 151646,
@@ -1459,12 +1451,6 @@ class ColStellaVLConfig(PretrainedConfig):
1459
  **kwargs
1460
  ):
1461
  super().__init__(**kwargs)
1462
- # if vector_dim not in (12288, 1024, 512, 256):
1463
- # raise ValueError("vector_dim must be 12288, 1024, 512, 256")
1464
- # self.vector_dim = vector_dim
1465
- # self.vector_dropout_p = vector_dropout_p
1466
-
1467
- # self.num_img_tokens = num_img_tokens
1468
 
1469
  self.img_start_token_id = img_start_token_id
1470
  self.img_start_token = img_start_token
@@ -1499,9 +1485,6 @@ class ColStellaVLConfig(PretrainedConfig):
1499
 
1500
 
1501
 
1502
- # ColStellaVLConfig.from_pretrained("/home/nane.saroyan99/colpali/models/stella_transformer")
1503
-
1504
-
1505
  class ColQwenStella(PreTrainedModel):
1506
  config_class = ColStellaVLConfig
1507
  _supports_sdpa = True
@@ -1514,34 +1497,11 @@ class ColQwenStella(PreTrainedModel):
1514
  super().__init__(config)
1515
  self.model = Qwen2Model(config.text_config)
1516
  self.config = config
1517
- # if not config.is_text_encoder:
1518
- # self.vision_model = Qwen2VLForConditionalGeneration.from_pretrained("Qwen/Qwen2-VL-2B-Instruct").visual
1519
- # qwen =
1520
- # print(config.vision_config)
1521
  self.vision_model = Qwen2VLForConditionalGeneration(config.vision_config).visual
1522
- # self.get_rope_index = self.vision_model.get_rope_index
1523
- # self.vision_model = self.vision_model.visual
1524
- if torch.cuda.is_available():
1525
- print("deleting all else")
1526
- torch.cuda.empty_cache()
1527
- # self.adaptive_avg_pool2d = torch.nn.AdaptiveAvgPool2d(
1528
- # (self.config.num_img_tokens, config.text_config.hidden_size)
1529
- # )
1530
-
1531
- # self.vector_linear_12288 = nn.Linear(config.text_config.hidden_size, 12288, bias=True)
1532
  self.vector_linear_1024 = nn.Linear(config.text_config.hidden_size, 1024, bias=True)
1533
- # self.vector_linear_512 = nn.Linear(config.text_config.hidden_size, 512, bias=True)
1534
- # self.vector_linear_256 = nn.Linear(config.text_config.hidden_size, 256, bias=True)
1535
- # Initialize weights and apply final processing
1536
- # self.model.resize_token_embeddings(151647)
1537
  self.post_init()
1538
- # self.model.resize_token_embeddings(151647)
1539
 
1540
- # def get_input_embeddings(self):
1541
- # return self.model.embed_tokens
1542
 
1543
- # def set_input_embeddings(self, value):
1544
- # self.model.embed_tokens = value
1545
 
1546
  def inner_forward(
1547
  self,
@@ -1588,12 +1548,7 @@ class ColQwenStella(PreTrainedModel):
1588
 
1589
 
1590
 
1591
- def forward(self,
1592
- # input_ids: torch.LongTensor = None,
1593
- # attention_mask: Optional[torch.Tensor] = None,
1594
- # pixel_values: Optional[torch.Tensor] = None,
1595
- *args,
1596
- **kwargs) -> torch.Tensor:
1597
  # Delete output_hidden_states from kwargs
1598
  kwargs.pop("output_hidden_states", None)
1599
 
@@ -1625,12 +1580,4 @@ class ColQwenStella(PreTrainedModel):
1625
 
1626
  @property
1627
  def spatial_merge_size(self) -> int:
1628
- return self.vision_model.config.spatial_merge_size
1629
-
1630
-
1631
- # from models.qwenstella_base.modeling_colqwenstella import ColQwenStella, ColStellaVLConfig
1632
-
1633
- # ColStellaVLConfig.__module__ = "transformers_modules.qwenstella_base.modeling_colqwenstella"
1634
-
1635
- # AutoConfig.register("colstella_vl", ColStellaVLConfig)
1636
- # AutoModel.register(ColStellaVLConfig, ColQwenStella)
 
1431
  )
1432
 
1433
 
 
 
 
 
 
 
1434
  from transformers.models.qwen2_vl import Qwen2VLConfig, Qwen2VLForConditionalGeneration
1435
  from transformers import PretrainedConfig
1436
 
 
1440
 
1441
  def __init__(
1442
  self,
 
 
1443
  img_start_token_id: int = 151644,
1444
  img_start_token: str = "<|im_start|>",
1445
  img_token_id: int = 151646,
 
1451
  **kwargs
1452
  ):
1453
  super().__init__(**kwargs)
 
 
 
 
 
 
1454
 
1455
  self.img_start_token_id = img_start_token_id
1456
  self.img_start_token = img_start_token
 
1485
 
1486
 
1487
 
 
 
 
1488
  class ColQwenStella(PreTrainedModel):
1489
  config_class = ColStellaVLConfig
1490
  _supports_sdpa = True
 
1497
  super().__init__(config)
1498
  self.model = Qwen2Model(config.text_config)
1499
  self.config = config
 
 
 
 
1500
  self.vision_model = Qwen2VLForConditionalGeneration(config.vision_config).visual
 
 
 
 
 
 
 
 
 
 
1501
  self.vector_linear_1024 = nn.Linear(config.text_config.hidden_size, 1024, bias=True)
 
 
 
 
1502
  self.post_init()
 
1503
 
 
 
1504
 
 
 
1505
 
1506
  def inner_forward(
1507
  self,
 
1548
 
1549
 
1550
 
1551
+ def forward(self, *args, **kwargs) -> torch.Tensor:
 
 
 
 
 
1552
  # Delete output_hidden_states from kwargs
1553
  kwargs.pop("output_hidden_states", None)
1554
 
 
1580
 
1581
  @property
1582
  def spatial_merge_size(self) -> int:
1583
+ return self.vision_model.config.spatial_merge_size