thefcraft commited on
Commit
8a7e64d
1 Parent(s): 56530d0

first commit

Browse files
base_speakers/EN/checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db1ae1a5c8ded049bd1536051489aefbfad4a5077c01c2257e9e88fa1bb8422
3
+ size 160467309
base_speakers/EN/config.json ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "text_cleaners": [
4
+ "cjke_cleaners2"
5
+ ],
6
+ "sampling_rate": 22050,
7
+ "filter_length": 1024,
8
+ "hop_length": 256,
9
+ "win_length": 1024,
10
+ "n_mel_channels": 80,
11
+ "add_blank": true,
12
+ "cleaned_text": true,
13
+ "n_speakers": 10
14
+ },
15
+ "model": {
16
+ "inter_channels": 192,
17
+ "hidden_channels": 192,
18
+ "filter_channels": 768,
19
+ "n_heads": 2,
20
+ "n_layers": 6,
21
+ "n_layers_trans_flow": 3,
22
+ "kernel_size": 3,
23
+ "p_dropout": 0.1,
24
+ "resblock": "1",
25
+ "resblock_kernel_sizes": [
26
+ 3,
27
+ 7,
28
+ 11
29
+ ],
30
+ "resblock_dilation_sizes": [
31
+ [
32
+ 1,
33
+ 3,
34
+ 5
35
+ ],
36
+ [
37
+ 1,
38
+ 3,
39
+ 5
40
+ ],
41
+ [
42
+ 1,
43
+ 3,
44
+ 5
45
+ ]
46
+ ],
47
+ "upsample_rates": [
48
+ 8,
49
+ 8,
50
+ 2,
51
+ 2
52
+ ],
53
+ "upsample_initial_channel": 512,
54
+ "upsample_kernel_sizes": [
55
+ 16,
56
+ 16,
57
+ 4,
58
+ 4
59
+ ],
60
+ "n_layers_q": 3,
61
+ "use_spectral_norm": false,
62
+ "gin_channels": 256
63
+ },
64
+ "symbols": [
65
+ "_",
66
+ ",",
67
+ ".",
68
+ "!",
69
+ "?",
70
+ "-",
71
+ "~",
72
+ "\u2026",
73
+ "N",
74
+ "Q",
75
+ "a",
76
+ "b",
77
+ "d",
78
+ "e",
79
+ "f",
80
+ "g",
81
+ "h",
82
+ "i",
83
+ "j",
84
+ "k",
85
+ "l",
86
+ "m",
87
+ "n",
88
+ "o",
89
+ "p",
90
+ "s",
91
+ "t",
92
+ "u",
93
+ "v",
94
+ "w",
95
+ "x",
96
+ "y",
97
+ "z",
98
+ "\u0251",
99
+ "\u00e6",
100
+ "\u0283",
101
+ "\u0291",
102
+ "\u00e7",
103
+ "\u026f",
104
+ "\u026a",
105
+ "\u0254",
106
+ "\u025b",
107
+ "\u0279",
108
+ "\u00f0",
109
+ "\u0259",
110
+ "\u026b",
111
+ "\u0265",
112
+ "\u0278",
113
+ "\u028a",
114
+ "\u027e",
115
+ "\u0292",
116
+ "\u03b8",
117
+ "\u03b2",
118
+ "\u014b",
119
+ "\u0266",
120
+ "\u207c",
121
+ "\u02b0",
122
+ "`",
123
+ "^",
124
+ "#",
125
+ "*",
126
+ "=",
127
+ "\u02c8",
128
+ "\u02cc",
129
+ "\u2192",
130
+ "\u2193",
131
+ "\u2191",
132
+ " "
133
+ ],
134
+ "speakers": {
135
+ "default": 1,
136
+ "whispering": 2,
137
+ "shouting": 3,
138
+ "excited": 4,
139
+ "cheerful": 5,
140
+ "terrified": 6,
141
+ "angry": 7,
142
+ "sad": 8,
143
+ "friendly": 9
144
+ }
145
+ }
base_speakers/EN/en_default_se.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cab24002eec738d0fe72cb73a34e57fbc3999c1bd4a1670a7b56ee4e3590ac9
3
+ size 1789
base_speakers/EN/en_style_se.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f698153be5004b90a8642d1157c89cae7dd296752a3276450ced6a17b8b98a9
3
+ size 1783
base_speakers/ZH/checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de9fb0eb749f3254130fe0172fcbb20e75f88a9b16b54dd0b73cac0dc40da7d9
3
+ size 160467309
base_speakers/ZH/config.json ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "text_cleaners": [
4
+ "cjke_cleaners2"
5
+ ],
6
+ "sampling_rate": 22050,
7
+ "filter_length": 1024,
8
+ "hop_length": 256,
9
+ "win_length": 1024,
10
+ "n_mel_channels": 80,
11
+ "add_blank": true,
12
+ "cleaned_text": true,
13
+ "n_speakers": 10
14
+ },
15
+ "model": {
16
+ "inter_channels": 192,
17
+ "hidden_channels": 192,
18
+ "filter_channels": 768,
19
+ "n_heads": 2,
20
+ "n_layers": 6,
21
+ "n_layers_trans_flow": 3,
22
+ "kernel_size": 3,
23
+ "p_dropout": 0.1,
24
+ "resblock": "1",
25
+ "resblock_kernel_sizes": [
26
+ 3,
27
+ 7,
28
+ 11
29
+ ],
30
+ "resblock_dilation_sizes": [
31
+ [
32
+ 1,
33
+ 3,
34
+ 5
35
+ ],
36
+ [
37
+ 1,
38
+ 3,
39
+ 5
40
+ ],
41
+ [
42
+ 1,
43
+ 3,
44
+ 5
45
+ ]
46
+ ],
47
+ "upsample_rates": [
48
+ 8,
49
+ 8,
50
+ 2,
51
+ 2
52
+ ],
53
+ "upsample_initial_channel": 512,
54
+ "upsample_kernel_sizes": [
55
+ 16,
56
+ 16,
57
+ 4,
58
+ 4
59
+ ],
60
+ "n_layers_q": 3,
61
+ "use_spectral_norm": false,
62
+ "gin_channels": 256
63
+ },
64
+ "symbols": [
65
+ "_",
66
+ ",",
67
+ ".",
68
+ "!",
69
+ "?",
70
+ "-",
71
+ "~",
72
+ "\u2026",
73
+ "N",
74
+ "Q",
75
+ "a",
76
+ "b",
77
+ "d",
78
+ "e",
79
+ "f",
80
+ "g",
81
+ "h",
82
+ "i",
83
+ "j",
84
+ "k",
85
+ "l",
86
+ "m",
87
+ "n",
88
+ "o",
89
+ "p",
90
+ "s",
91
+ "t",
92
+ "u",
93
+ "v",
94
+ "w",
95
+ "x",
96
+ "y",
97
+ "z",
98
+ "\u0251",
99
+ "\u00e6",
100
+ "\u0283",
101
+ "\u0291",
102
+ "\u00e7",
103
+ "\u026f",
104
+ "\u026a",
105
+ "\u0254",
106
+ "\u025b",
107
+ "\u0279",
108
+ "\u00f0",
109
+ "\u0259",
110
+ "\u026b",
111
+ "\u0265",
112
+ "\u0278",
113
+ "\u028a",
114
+ "\u027e",
115
+ "\u0292",
116
+ "\u03b8",
117
+ "\u03b2",
118
+ "\u014b",
119
+ "\u0266",
120
+ "\u207c",
121
+ "\u02b0",
122
+ "`",
123
+ "^",
124
+ "#",
125
+ "*",
126
+ "=",
127
+ "\u02c8",
128
+ "\u02cc",
129
+ "\u2192",
130
+ "\u2193",
131
+ "\u2191",
132
+ " "
133
+ ],
134
+ "speakers": {
135
+ "default": 0
136
+ }
137
+ }
base_speakers/ZH/zh_default_se.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b62e8264962059b8a84dd00b29e2fcccc92f5d3be90eec67dfa082c0cf58ccf
3
+ size 1789
converter/checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89ae83aa4e3668fef64b388b789ff7b0ce0def9f801069edfc18a00ea420748d
3
+ size 131327338
converter/config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data": {
3
+ "sampling_rate": 22050,
4
+ "filter_length": 1024,
5
+ "hop_length": 256,
6
+ "win_length": 1024,
7
+ "n_speakers": 0
8
+ },
9
+ "model": {
10
+ "inter_channels": 192,
11
+ "hidden_channels": 192,
12
+ "filter_channels": 768,
13
+ "n_heads": 2,
14
+ "n_layers": 6,
15
+ "kernel_size": 3,
16
+ "p_dropout": 0.1,
17
+ "resblock": "1",
18
+ "resblock_kernel_sizes": [
19
+ 3,
20
+ 7,
21
+ 11
22
+ ],
23
+ "resblock_dilation_sizes": [
24
+ [
25
+ 1,
26
+ 3,
27
+ 5
28
+ ],
29
+ [
30
+ 1,
31
+ 3,
32
+ 5
33
+ ],
34
+ [
35
+ 1,
36
+ 3,
37
+ 5
38
+ ]
39
+ ],
40
+ "upsample_rates": [
41
+ 8,
42
+ 8,
43
+ 2,
44
+ 2
45
+ ],
46
+ "upsample_initial_channel": 512,
47
+ "upsample_kernel_sizes": [
48
+ 16,
49
+ 16,
50
+ 4,
51
+ 4
52
+ ],
53
+ "n_layers_q": 3,
54
+ "use_spectral_norm": false,
55
+ "gin_channels": 256
56
+ }
57
+ }