ZoniaChatbot commited on
Commit
f3bf9fe
·
1 Parent(s): 92be6db

Subiendo modelo VITS colombiano femenino

Browse files
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "<unk>": 45
3
+ }
checkpoint-500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf3bf2550ba5ba4235408b2a84912839cdf9669c5133c9b649060be2572df05e
3
+ size 145288288
checkpoint-500/model_1.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63dad49dc58224a7b209adf39fa7c648fae29f547172a32b3375f6cf2f84462a
3
+ size 187000136
checkpoint-500/optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d40a422cb83e52b46bb096b3069448077edd72a143b4688426af9086a0720e
3
+ size 291083132
checkpoint-500/optimizer_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95b9d52e5868d7b1924804b3caced73e83b8aaa5cc68e4b7996485472f6faf50
3
+ size 374071772
checkpoint-500/random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07a0764ffa47878a32ae7eccaa886ab7e29c9cf2403a717dc783ff37f83a7f55
3
+ size 14408
checkpoint-500/scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9cd88b2d541a562d87e166a2ba925ba92f0cf1185e3844b468e39712226d598
3
+ size 1000
checkpoint-500/scheduler_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd187b318e3bf83063aa293f7c55b8b901fe6050068ba20110fe814ac2dccd21
3
+ size 1008
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ylacombe/mms-tts-spa-train",
3
+ "activation_dropout": 0.1,
4
+ "architectures": [
5
+ "VitsModelForPreTraining"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "depth_separable_channels": 2,
9
+ "depth_separable_num_layers": 3,
10
+ "discriminator_kernel_size": 5,
11
+ "discriminator_period_channels": [
12
+ 1,
13
+ 32,
14
+ 128,
15
+ 512,
16
+ 1024
17
+ ],
18
+ "discriminator_periods": [
19
+ 2,
20
+ 3,
21
+ 5,
22
+ 7,
23
+ 11
24
+ ],
25
+ "discriminator_scale_channels": [
26
+ 1,
27
+ 16,
28
+ 64,
29
+ 256,
30
+ 1024
31
+ ],
32
+ "discriminator_stride": 3,
33
+ "duration_predictor_dropout": 0.5,
34
+ "duration_predictor_filter_channels": 256,
35
+ "duration_predictor_flow_bins": 10,
36
+ "duration_predictor_kernel_size": 3,
37
+ "duration_predictor_num_flows": 4,
38
+ "duration_predictor_tail_bound": 5.0,
39
+ "ffn_dim": 768,
40
+ "ffn_kernel_size": 3,
41
+ "flow_size": 192,
42
+ "hidden_act": "relu",
43
+ "hidden_dropout": 0.1,
44
+ "hidden_size": 192,
45
+ "hop_length": 256,
46
+ "initializer_range": 0.02,
47
+ "layer_norm_eps": 1e-05,
48
+ "layerdrop": 0.1,
49
+ "leaky_relu_slope": 0.1,
50
+ "model_type": "vits",
51
+ "noise_scale": 0.667,
52
+ "noise_scale_duration": 0.8,
53
+ "num_attention_heads": 2,
54
+ "num_hidden_layers": 6,
55
+ "num_speakers": 1,
56
+ "posterior_encoder_num_wavenet_layers": 16,
57
+ "prior_encoder_num_flows": 4,
58
+ "prior_encoder_num_wavenet_layers": 4,
59
+ "resblock_dilation_sizes": [
60
+ [
61
+ 1,
62
+ 3,
63
+ 5
64
+ ],
65
+ [
66
+ 1,
67
+ 3,
68
+ 5
69
+ ],
70
+ [
71
+ 1,
72
+ 3,
73
+ 5
74
+ ]
75
+ ],
76
+ "resblock_kernel_sizes": [
77
+ 3,
78
+ 7,
79
+ 11
80
+ ],
81
+ "sampling_rate": 16000,
82
+ "segment_size": 8192,
83
+ "speaker_embedding_size": 0,
84
+ "speaking_rate": 1.0,
85
+ "spectrogram_bins": 513,
86
+ "torch_dtype": "float32",
87
+ "transformers_version": "4.48.1",
88
+ "upsample_initial_channel": 512,
89
+ "upsample_kernel_sizes": [
90
+ 16,
91
+ 16,
92
+ 4,
93
+ 4
94
+ ],
95
+ "upsample_rates": [
96
+ 8,
97
+ 8,
98
+ 2,
99
+ 2
100
+ ],
101
+ "use_bias": true,
102
+ "use_stochastic_duration_prediction": true,
103
+ "vocab_size": 45,
104
+ "wavenet_dilation_rate": 1,
105
+ "wavenet_dropout": 0.0,
106
+ "wavenet_kernel_size": 5,
107
+ "window_size": 4
108
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb577dec4591b1236acb80de37a96f1a1958e9b0a3192ddca86bef5436121378
3
+ size 332166088
preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "feature_extractor_type": "VitsFeatureExtractor",
3
+ "feature_size": 80,
4
+ "hop_length": 256,
5
+ "max_wav_value": 32768.0,
6
+ "n_fft": 1024,
7
+ "padding_side": "right",
8
+ "padding_value": 0.0,
9
+ "return_attention_mask": false,
10
+ "sampling_rate": 16000
11
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pad_token": {
3
+ "content": "7",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "unk_token": {
10
+ "content": "<unk>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ }
16
+ }
tmp/vits_finetuned/runs/Feb02_18-58-22_farid/vits_colombian_spanish_female/1738540919.5627022/events.out.tfevents.1738540919.farid.20396.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:307be31b84a1487d0de1694bc3135e05a3f612cf4b6ef6fc72d2ca3304d73f28
3
+ size 7929
tmp/vits_finetuned/runs/Feb02_18-58-22_farid/vits_colombian_spanish_female/1738540919.5721805/hparams.yml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerator_config: '{''split_batches'': False, ''dispatch_batches'': None, ''even_batches'':
2
+ True, ''use_seedable_sampler'': True, ''non_blocking'': False, ''gradient_accumulation_kwargs'':
3
+ None}'
4
+ adafactor: false
5
+ adam_beta1: 0.8
6
+ adam_beta2: 0.99
7
+ adam_epsilon: 1.0e-08
8
+ auto_find_batch_size: false
9
+ average_tokens_across_devices: false
10
+ batch_eval_metrics: false
11
+ bf16: false
12
+ bf16_full_eval: false
13
+ data_seed: None
14
+ dataloader_drop_last: false
15
+ dataloader_num_workers: 0
16
+ dataloader_persistent_workers: false
17
+ dataloader_pin_memory: true
18
+ dataloader_prefetch_factor: None
19
+ ddp_backend: None
20
+ ddp_broadcast_buffers: None
21
+ ddp_bucket_cap_mb: None
22
+ ddp_find_unused_parameters: None
23
+ ddp_timeout: 1800
24
+ debug: '[]'
25
+ deepspeed: None
26
+ disable_tqdm: false
27
+ dispatch_batches: None
28
+ do_eval: true
29
+ do_predict: false
30
+ do_step_schedule_per_epoch: true
31
+ do_train: true
32
+ eval_accumulation_steps: None
33
+ eval_batch_size: 16
34
+ eval_delay: 0
35
+ eval_do_concat_batches: true
36
+ eval_on_start: false
37
+ eval_steps: 25
38
+ eval_strategy: 'no'
39
+ eval_use_gather_object: false
40
+ evaluation_strategy: None
41
+ fp16: false
42
+ fp16_backend: auto
43
+ fp16_full_eval: false
44
+ fp16_opt_level: O1
45
+ fsdp: '[]'
46
+ fsdp_config: '{''min_num_params'': 0, ''xla'': False, ''xla_fsdp_v2'': False, ''xla_fsdp_grad_ckpt'':
47
+ False}'
48
+ fsdp_min_num_params: 0
49
+ fsdp_transformer_layer_cls_to_wrap: None
50
+ full_determinism: false
51
+ gradient_accumulation_steps: 1
52
+ gradient_checkpointing: false
53
+ gradient_checkpointing_kwargs: None
54
+ greater_is_better: None
55
+ group_by_length: false
56
+ half_precision_backend: auto
57
+ hub_always_push: false
58
+ hub_model_id: vits_colombian_1523_female
59
+ hub_private_repo: None
60
+ hub_strategy: every_save
61
+ hub_token: <HUB_TOKEN>
62
+ ignore_data_skip: false
63
+ include_for_metrics: '[]'
64
+ include_inputs_for_metrics: false
65
+ include_num_input_tokens_seen: false
66
+ include_tokens_per_second: false
67
+ jit_mode_eval: false
68
+ label_names: None
69
+ label_smoothing_factor: 0.0
70
+ learning_rate: 2.0e-05
71
+ length_column_name: length
72
+ load_best_model_at_end: false
73
+ local_rank: 0
74
+ log_level: passive
75
+ log_level_replica: warning
76
+ log_on_each_node: true
77
+ logging_dir: ./tmp/vits_finetuned\runs\Feb02_18-58-22_farid
78
+ logging_first_step: false
79
+ logging_nan_inf_filter: true
80
+ logging_steps: 500
81
+ logging_strategy: steps
82
+ lr_decay: 0.999875
83
+ lr_scheduler_kwargs: '{}'
84
+ lr_scheduler_type: linear
85
+ max_grad_norm: 1.0
86
+ max_steps: 750
87
+ metric_for_best_model: None
88
+ mp_parameters: ''
89
+ neftune_noise_alpha: None
90
+ no_cuda: false
91
+ num_train_epochs: 75
92
+ optim: adamw_torch
93
+ optim_args: None
94
+ optim_target_modules: None
95
+ output_dir: ./tmp/vits_finetuned
96
+ overwrite_output_dir: true
97
+ past_index: -1
98
+ per_device_eval_batch_size: 16
99
+ per_device_train_batch_size: 16
100
+ per_gpu_eval_batch_size: None
101
+ per_gpu_train_batch_size: None
102
+ prediction_loss_only: false
103
+ push_to_hub: true
104
+ push_to_hub_model_id: None
105
+ push_to_hub_organization: None
106
+ push_to_hub_token: <PUSH_TO_HUB_TOKEN>
107
+ ray_scope: last
108
+ remove_unused_columns: true
109
+ report_to: '[''tensorboard'', ''wandb'']'
110
+ restore_callback_states_from_checkpoint: false
111
+ resume_from_checkpoint: None
112
+ run_name: ./tmp/vits_finetuned
113
+ save_on_each_node: false
114
+ save_only_model: false
115
+ save_safetensors: true
116
+ save_steps: 500
117
+ save_strategy: steps
118
+ save_total_limit: None
119
+ seed: 456
120
+ skip_memory_metrics: true
121
+ split_batches: None
122
+ tf32: None
123
+ torch_compile: false
124
+ torch_compile_backend: None
125
+ torch_compile_mode: None
126
+ torch_empty_cache_steps: None
127
+ torchdynamo: None
128
+ tpu_metrics_debug: false
129
+ tpu_num_cores: None
130
+ train_batch_size: 16
131
+ use_cpu: false
132
+ use_ipex: false
133
+ use_legacy_prediction_loop: false
134
+ use_liger_kernel: false
135
+ use_mps_device: false
136
+ warmup_ratio: 0.01
137
+ warmup_steps: 0
138
+ weight_decay: 0.0
139
+ weight_disc: 3
140
+ weight_duration: 1
141
+ weight_fmaps: 1
142
+ weight_gen: 1
143
+ weight_kl: 1.5
144
+ weight_mel: 35
tmp/vits_finetuned/runs/Feb02_18-58-22_farid/vits_colombian_spanish_female/events.out.tfevents.1738540917.farid.20396.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fee95830221cd40f86d33f59ee339b51319d2335333b1ddad20ab1e34d5c71cb
3
+ size 40127322
tokenizer_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_blank": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "7",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "45": {
13
+ "content": "<unk>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "clean_up_tokenization_spaces": true,
22
+ "extra_special_tokens": {},
23
+ "is_uroman": false,
24
+ "language": "spa",
25
+ "model_max_length": 1000000000000000019884624838656,
26
+ "normalize": true,
27
+ "pad_token": "7",
28
+ "phonemize": false,
29
+ "tokenizer_class": "VitsTokenizer",
30
+ "unk_token": "<unk>",
31
+ "verbose": false
32
+ }
vocab.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ " ": 27,
3
+ "0": 5,
4
+ "1": 24,
5
+ "2": 36,
6
+ "3": 32,
7
+ "4": 12,
8
+ "5": 6,
9
+ "6": 44,
10
+ "7": 0,
11
+ "8": 8,
12
+ "9": 23,
13
+ "_": 22,
14
+ "a": 1,
15
+ "b": 31,
16
+ "c": 3,
17
+ "d": 29,
18
+ "e": 39,
19
+ "f": 25,
20
+ "g": 34,
21
+ "h": 19,
22
+ "i": 37,
23
+ "j": 33,
24
+ "k": 15,
25
+ "l": 35,
26
+ "m": 13,
27
+ "n": 20,
28
+ "o": 41,
29
+ "p": 9,
30
+ "q": 18,
31
+ "r": 43,
32
+ "s": 16,
33
+ "t": 26,
34
+ "u": 38,
35
+ "v": 2,
36
+ "x": 28,
37
+ "y": 10,
38
+ "z": 11,
39
+ "á": 17,
40
+ "é": 21,
41
+ "í": 30,
42
+ "ñ": 42,
43
+ "ó": 7,
44
+ "ú": 40,
45
+ "ü": 14,
46
+ "—": 4
47
+ }