VITA-MLLM
/

VITA-1.5

Video-Text-to-Text

Model card Files Files and versions Community

lxysl commited on Dec 25, 2024

Commit

ea93d1c

·

verified ·

1 Parent(s): 0763970

Upload config.json

Files changed (1) hide show

config.json +3 -3

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/mnt/cfs2/lhj/videomllm_ckpt/outputs/vita_video_audio_1021/bp_llava-s3-finetune_task_neg",
   "architectures": [
     "VITAQwen2ForCausalLM"
   ],
@@ -19,11 +19,11 @@
   "intermediate_size": 18944,
   "max_position_embeddings": 32768,
   "max_window_layers": 28,
-  "mm_audio_encoder": "/mnt/cfs2/lhj/model_weights/audio-encoder-Qwen2-7B-1107-weight-base-11wh-tunning",
   "mm_hidden_size": 4096,
   "mm_projector_lr": null,
   "mm_projector_type": "mlp2x_gelu",
-  "mm_vision_tower": "/mnt/cfs/lhj/model_weights/InternViT-300M-448px",
   "model_type": "vita-Qwen2",
   "num_attention_heads": 28,
   "num_hidden_layers": 28,

 {
+  "_name_or_path": "VITA-MLLM/VITA-1.5",
   "architectures": [
     "VITAQwen2ForCausalLM"
   ],
   "intermediate_size": 18944,
   "max_position_embeddings": 32768,
   "max_window_layers": 28,
+  "mm_audio_encoder": "VITA-MLLM/VITA-1.5-AudioEnc",
   "mm_hidden_size": 4096,
   "mm_projector_lr": null,
   "mm_projector_type": "mlp2x_gelu",
+  "mm_vision_tower": "OpenGVLab/InternViT-300M-448px",
   "model_type": "vita-Qwen2",
   "num_attention_heads": 28,
   "num_hidden_layers": 28,