Upload 5 files

Browse files

Files changed (5) hide show

README.md +14 -0
sovits4.1-Latest_44k/G_101600.pth +3 -0
sovits4.1-Latest_44k/config.json +107 -0
sovits4.1-Latest_44k/config.yaml +51 -0
sovits4.1-Latest_44k/model_196000.pt +3 -0

README.md CHANGED Viewed

@@ -19,6 +19,20 @@ tags:
 - **Model type:** [so-vits-svc 3.0 48kHz](https://github.com/svc-develop-team/so-vits-svc/tree/3.0-48k)、[so-vits-svc 4.0](https://github.com/svc-develop-team/so-vits-svc)
 - **Demo:** [SPACE: AI岁己（歌声变声器）](https://huggingface.co/spaces/Miuzarte/SUI-svc-3.0)、[SPACE: AI岁己（歌声变声器）第二代](https://huggingface.co/spaces/Miuzarte/SUI-svc-4.0)
 ### pth文件名的训练步数是程序按学习率等超参数得出的步数，onnx文件名的步数为实际训练步数
   |sovits3_v1|Base/G_1000000.pth|Singing/G_1M111000.pth|Singing/G_100000.pth|

 - **Model type:** [so-vits-svc 3.0 48kHz](https://github.com/svc-develop-team/so-vits-svc/tree/3.0-48k)、[so-vits-svc 4.0](https://github.com/svc-develop-team/so-vits-svc)
 - **Demo:** [SPACE: AI岁己（歌声变声器）](https://huggingface.co/spaces/Miuzarte/SUI-svc-3.0)、[SPACE: AI岁己（歌声变声器）第二代](https://huggingface.co/spaces/Miuzarte/SUI-svc-4.0)
+### 新模型：分子4.1-Latest
+跟几个旧的4.0一样的训练集，底模比之前好了不少炼起来快了很多
+算力提供：[米玖澪](https://space.bilibili.com/365980549)
+没导出onnx，去用 [Google Colab](https://colab.research.google.com/github/svc-develop-team/so-vits-svc/blob/4.1-Stable/sovits4_for_colab.ipynb) 来推理挺方便的
+  |sovits4.1-Latest|G_101600.pth|model_196000.pt|
+  |-:|:-:|:-:|
+  |备注|主模型|浅扩撒模型|
+### 旧模型：
 ### pth文件名的训练步数是程序按学习率等超参数得出的步数，onnx文件名的步数为实际训练步数
   |sovits3_v1|Base/G_1000000.pth|Singing/G_1M111000.pth|Singing/G_100000.pth|

sovits4.1-Latest_44k/G_101600.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79be9b852f8b864d1d399cc4285e9d7ecf1f2aa4cd74b2090b9f1ac4bbbbff30
+size 627915739

sovits4.1-Latest_44k/config.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "train": {
+    "log_interval": 200,
+    "eval_interval": 800,
+    "seed": 1234,
+    "epochs": 10000,
+    "learning_rate": 0.0001,
+    "betas": [
+      0.8,
+      0.99
+    ],
+    "eps": 1e-09,
+    "batch_size": 25,
+    "fp16_run": true,
+    "half_type": "fp16",
+    "lr_decay": 0.999875,
+    "segment_size": 10240,
+    "init_lr_ratio": 1,
+    "warmup_epochs": 0,
+    "c_mel": 45,
+    "c_kl": 1.0,
+    "use_sr": true,
+    "max_speclen": 512,
+    "port": "8001",
+    "keep_ckpts": 3,
+    "all_in_mem": false,
+    "vol_aug": true
+  },
+  "data": {
+    "training_files": "filelists/train.txt",
+    "validation_files": "filelists/val.txt",
+    "max_wav_value": 32768.0,
+    "sampling_rate": 44100,
+    "filter_length": 2048,
+    "hop_length": 512,
+    "win_length": 2048,
+    "n_mel_channels": 80,
+    "mel_fmin": 0.0,
+    "mel_fmax": 22050,
+    "unit_interpolate_mode": "nearest"
+  },
+  "model": {
+    "inter_channels": 192,
+    "hidden_channels": 192,
+    "filter_channels": 768,
+    "n_heads": 2,
+    "n_layers": 6,
+    "kernel_size": 3,
+    "p_dropout": 0.1,
+    "resblock": "1",
+    "resblock_kernel_sizes": [
+      3,
+      7,
+      11
+    ],
+    "resblock_dilation_sizes": [
+      [
+        1,
+        3,
+        5
+      ],
+      [
+        1,
+        3,
+        5
+      ],
+      [
+        1,
+        3,
+        5
+      ]
+    ],
+    "upsample_rates": [
+      8,
+      8,
+      2,
+      2,
+      2
+    ],
+    "upsample_initial_channel": 512,
+    "upsample_kernel_sizes": [
+      16,
+      16,
+      4,
+      4,
+      4
+    ],
+    "n_layers_q": 3,
+    "n_layers_trans_flow": 3,
+    "n_flow_layer": 4,
+    "use_spectral_norm": false,
+    "gin_channels": 768,
+    "ssl_dim": 768,
+    "n_speakers": 1,
+    "vocoder_name": "nsf-hifigan",
+    "speech_encoder": "vec768l12",
+    "speaker_embedding": false,
+    "vol_embedding": true,
+    "use_depthwise_conv": false,
+    "flow_share_parameter": false,
+    "use_automatic_f0_prediction": true,
+    "use_transformer_flow": false
+  },
+  "spk": {
+    "sui": 0
+  }
+}

sovits4.1-Latest_44k/config.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+data:
+  block_size: 512
+  cnhubertsoft_gate: 10
+  duration: 2
+  encoder: vec768l12
+  encoder_hop_size: 320
+  encoder_out_channels: 768
+  encoder_sample_rate: 16000
+  extensions:
+  - wav
+  sampling_rate: 44100
+  training_files: filelists/train.txt
+  unit_interpolate_mode: nearest
+  validation_files: filelists/val.txt
+device: cuda
+env:
+  expdir: logs/44k/diffusion
+  gpu_id: 0
+infer:
+  method: dpm-solver++
+  speedup: 10
+model:
+  k_step_max: 200
+  n_chans: 512
+  n_hidden: 256
+  n_layers: 20
+  n_spk: 1
+  timesteps: 1000
+  type: Diffusion
+  use_pitch_aug: true
+spk:
+  sui: 0
+train:
+  amp_dtype: fp32
+  batch_size: 48
+  cache_all_data: true
+  cache_device: cpu
+  cache_fp16: true
+  decay_step: 100000
+  epochs: 100000
+  gamma: 0.5
+  interval_force_save: 5000
+  interval_log: 10
+  interval_val: 2000
+  lr: 0.0001
+  num_workers: 4
+  save_opt: false
+  weight_decay: 0
+vocoder:
+  ckpt: pretrain/nsf_hifigan/model
+  type: nsf-hifigan

sovits4.1-Latest_44k/model_196000.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:612f199e6227e62b56c83c1c9c566c700e1290cfb3cbb5557a5c9a4456b31dab
+size 220893960