PATTARA TIPAKSORN commited on
Commit
dae6ad4
1 Parent(s): 1aeb37b

Upload 9 files

Browse files
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "PathummaAudioModel"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_pathumma_audio.PathummaAudioConfig",
7
+ "AutoModel": "modeling_pathumma_audio.PathummaAudioModel"
8
+ },
9
+ "beats_path": "",
10
+ "init_from_scratch": true,
11
+ "llm_path": "Qwen/Qwen2-7B-Instruct",
12
+ "lora": true,
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.1,
15
+ "lora_infer_mode": true,
16
+ "lora_rank": 8,
17
+ "model_type": "pathumma_audio",
18
+ "qformer_hidden_layers": 2,
19
+ "qformer_query_token": 1,
20
+ "second_per_window": 0.333333,
21
+ "second_stride": 0.333333,
22
+ "target_modules": null,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.44.2",
25
+ "whisper_path": "openai/whisper-large-v3"
26
+ }
configuration_pathumma_audio.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from typing import Dict, Tuple, List
3
+ from transformers import PretrainedConfig
4
+
5
+ class PathummaAudioConfig(PretrainedConfig):
6
+
7
+ model_type: str = "pathumma_audio"
8
+
9
+ def __init__(
10
+ self,
11
+ llm_path: str = "Qwen/Qwen2-7B-Instruct",
12
+ whisper_path: str = "openai/whisper-large-v3",
13
+ beats_path: str = "",
14
+ init_from_scratch: bool = True,
15
+
16
+ lora: bool = True,
17
+ lora_infer_mode: bool = True,
18
+ lora_rank: int = 8,
19
+ lora_alpha: int = 32,
20
+ lora_dropout: float = 0.1,
21
+ target_modules: List[str] = ["q_proj", "v_proj"],
22
+ qformer_query_token: int = 1,
23
+ qformer_hidden_layers: int = 2,
24
+ second_per_window: float = 0.333333,
25
+ second_stride: float = 0.333333,
26
+
27
+ torch_dtype: torch.dtype = torch.bfloat16,
28
+ **kwargs
29
+ ):
30
+ super().__init__(**kwargs)
31
+
32
+ self.architectures = kwargs.get("architectures", ["PathummaAudioModel"])
33
+ self.auto_map = kwargs.get("auto_map", {
34
+ "AutoConfig": "configuration_pathumma_audio.PathummaAudioConfig",
35
+ "AutoModel": "modeling_pathumma_audio.PathummaAudioModel"
36
+ })
37
+
38
+ self.llm_path = llm_path
39
+ self.whisper_path = whisper_path
40
+ self.beats_path = beats_path
41
+ self.init_from_scratch = init_from_scratch
42
+
43
+ self.lora = lora
44
+ self.lora_infer_mode = lora_infer_mode
45
+ self.lora_rank = lora_rank
46
+ self.lora_alpha = lora_alpha
47
+ self.lora_dropout = lora_dropout
48
+ self.target_modules = target_modules
49
+
50
+ self.qformer_query_token = qformer_query_token
51
+ self.qformer_hidden_layers = qformer_hidden_layers
52
+ self.second_per_window = second_per_window
53
+ self.second_stride = second_stride
54
+
55
+ self.torch_dtype = torch_dtype
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.44.2"
4
+ }
modeling_pathumma_audio.py ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model-00001-of-00004.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79692df1f14cd91e53518bf688fd8b4e54d72e4f983a96eea30463ad677d9982
3
+ size 4880949349
pytorch_model-00002-of-00004.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b159089ab56607238ff6d8cfc93efc996ad6d4c338c4a4930bf072c1459a5f90
3
+ size 4936404688
pytorch_model-00003-of-00004.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dc497d77f78e19581119f4c15cb88b3f093cc0d0aba378731d84e4a8828bcee
3
+ size 4334153926
pytorch_model-00004-of-00004.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8170ad28032cb7cfee768f7d22f8a6de4ef323a6af296246478a90b7dd7da3f0
3
+ size 2596849960
pytorch_model.bin.index.json ADDED
The diff for this file is too large to render. See raw diff