Text-to-Speech
English
hexgrad commited on
Commit
9b10985
·
verified ·
1 Parent(s): a757040

Upload 2 files

Browse files
Files changed (2) hide show
  1. config.json +26 -0
  2. models.py +7 -9
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "decoder": {
3
+ "type": "istftnet",
4
+ "upsample_kernel_sizes": [20, 12],
5
+ "upsample_rates": [10, 6],
6
+ "gen_istft_hop_size": 5,
7
+ "gen_istft_n_fft": 20,
8
+ "resblock_dilation_sizes": [
9
+ [1, 3, 5],
10
+ [1, 3, 5],
11
+ [1, 3, 5]
12
+ ],
13
+ "resblock_kernel_sizes": [3, 7, 11],
14
+ "upsample_initial_channel": 512
15
+ },
16
+ "dim_in": 64,
17
+ "dropout": 0.2,
18
+ "hidden_dim": 512,
19
+ "max_conv_dim": 512,
20
+ "max_dur": 50,
21
+ "multispeaker": true,
22
+ "n_layer": 3,
23
+ "n_mels": 80,
24
+ "n_token": 178,
25
+ "style_dim": 128
26
+ }
models.py CHANGED
@@ -1,8 +1,10 @@
1
  # https://github.com/yl4579/StyleTTS2/blob/main/models.py
2
  from istftnet import Decoder
3
  from munch import Munch
 
4
  from plbert import load_plbert
5
  from torch.nn.utils import weight_norm, spectral_norm
 
6
  import numpy as np
7
  import os
8
  import os.path as osp
@@ -550,15 +552,11 @@ def recursive_munch(d):
550
  return d
551
 
552
  def build_model(path, device):
553
- args = recursive_munch(dict(
554
- decoder=dict(
555
- type='istftnet', upsample_kernel_sizes=[20, 12], upsample_rates=[10, 6], gen_istft_hop_size=5, gen_istft_n_fft=20,
556
- resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]], resblock_kernel_sizes=[3, 7, 11], upsample_initial_channel=512,
557
- ),
558
- dim_in=64, dropout=0.2, hidden_dim=512, max_conv_dim=512, max_dur=50,
559
- multispeaker=True, n_layer=3, n_mels=80, n_token=178, style_dim=128
560
- ))
561
- assert args.decoder.type == 'istftnet', 'Decoder type unknown'
562
  decoder = Decoder(dim_in=args.hidden_dim, style_dim=args.style_dim, dim_out=args.n_mels,
563
  resblock_kernel_sizes = args.decoder.resblock_kernel_sizes,
564
  upsample_rates = args.decoder.upsample_rates,
 
1
  # https://github.com/yl4579/StyleTTS2/blob/main/models.py
2
  from istftnet import Decoder
3
  from munch import Munch
4
+ from pathlib import Path
5
  from plbert import load_plbert
6
  from torch.nn.utils import weight_norm, spectral_norm
7
+ import json
8
  import numpy as np
9
  import os
10
  import os.path as osp
 
552
  return d
553
 
554
  def build_model(path, device):
555
+ config = Path(__file__).parent / 'config.json'
556
+ assert config.exists(), f'Config path incorrect: config.json not found at {config}'
557
+ with open(config, 'r') as r:
558
+ args = recursive_munch(json.load(r))
559
+ assert args.decoder.type == 'istftnet', f'Unknown decoder type: {args.decoder.type}'
 
 
 
 
560
  decoder = Decoder(dim_in=args.hidden_dim, style_dim=args.style_dim, dim_out=args.n_mels,
561
  resblock_kernel_sizes = args.decoder.resblock_kernel_sizes,
562
  upsample_rates = args.decoder.upsample_rates,