Upload 3 files
Browse files- config.json +64 -0
- model.h5 +3 -0
- processor.json +1 -0
config.json
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"allow_cache": true,
|
3 |
+
"batch_size": 16,
|
4 |
+
"config": "/content/TensorFlowTTS/examples/fastspeech2/conf/fastspeech2.v1.yaml",
|
5 |
+
"dev_dir": "/content/dump_synpaflex/valid/",
|
6 |
+
"energy_stat": "/content/dump_synpaflex/stats_energy.npy",
|
7 |
+
"eval_interval_steps": 500,
|
8 |
+
"f0_stat": "/content/dump_synpaflex/stats_f0.npy",
|
9 |
+
"fastspeech2_params": {
|
10 |
+
"attention_probs_dropout_prob": 0.1,
|
11 |
+
"decoder_attention_head_size": 192,
|
12 |
+
"decoder_hidden_act": "mish",
|
13 |
+
"decoder_hidden_size": 384,
|
14 |
+
"decoder_intermediate_kernel_size": 3,
|
15 |
+
"decoder_intermediate_size": 1024,
|
16 |
+
"decoder_num_attention_heads": 2,
|
17 |
+
"decoder_num_hidden_layers": 4,
|
18 |
+
"encoder_attention_head_size": 192,
|
19 |
+
"encoder_hidden_act": "mish",
|
20 |
+
"encoder_hidden_size": 384,
|
21 |
+
"encoder_intermediate_kernel_size": 3,
|
22 |
+
"encoder_intermediate_size": 1024,
|
23 |
+
"encoder_num_attention_heads": 2,
|
24 |
+
"encoder_num_hidden_layers": 4,
|
25 |
+
"hidden_dropout_prob": 0.2,
|
26 |
+
"initializer_range": 0.02,
|
27 |
+
"max_position_embeddings": 2048,
|
28 |
+
"n_speakers": 1,
|
29 |
+
"num_mels": 80,
|
30 |
+
"output_attentions": false,
|
31 |
+
"output_hidden_states": false,
|
32 |
+
"variant_prediction_num_conv_layers": 2,
|
33 |
+
"variant_predictor_dropout_rate": 0.5,
|
34 |
+
"variant_predictor_filter": 256,
|
35 |
+
"variant_predictor_kernel_size": 3
|
36 |
+
},
|
37 |
+
"format": "npy",
|
38 |
+
"gradient_accumulation_steps": 1,
|
39 |
+
"hop_size": 256,
|
40 |
+
"is_shuffle": true,
|
41 |
+
"log_interval_steps": 200,
|
42 |
+
"mel_length_threshold": 32,
|
43 |
+
"mixed_precision": true,
|
44 |
+
"model_type": "fastspeech2",
|
45 |
+
"num_save_intermediate_results": 1,
|
46 |
+
"optimizer_params": {
|
47 |
+
"decay_steps": 150000,
|
48 |
+
"end_learning_rate": 5e-05,
|
49 |
+
"initial_learning_rate": 0.001,
|
50 |
+
"warmup_proportion": 0.02,
|
51 |
+
"weight_decay": 0.001
|
52 |
+
},
|
53 |
+
"outdir": "/content/drive/MyDrive/exp/train.fastspeech2.v1/",
|
54 |
+
"pretrained": "",
|
55 |
+
"remove_short_samples": true,
|
56 |
+
"resume": "/content/drive/MyDrive/exp/train.fastspeech2.v1/checkpoints/ckpt-1.index",
|
57 |
+
"save_interval_steps": 5000,
|
58 |
+
"train_dir": "/content/dump_synpaflex/train/",
|
59 |
+
"train_max_steps": 200000,
|
60 |
+
"use_norm": true,
|
61 |
+
"var_train_expr": null,
|
62 |
+
"verbose": 1,
|
63 |
+
"version": "0.0"
|
64 |
+
}
|
model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d68714458e51a3a4f91f602570c25218d45bb078f5a18ee2e525343350b2acd6
|
3 |
+
size 124796368
|
processor.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"symbol_to_id": {"pad": 0, "!": 1, "/": 2, "'": 3, "(": 4, ")": 5, ",": 6, "-": 7, ".": 8, ":": 9, ";": 10, "?": 11, " ": 12, "a": 13, "\u00e1": 14, "\u1ea3": 15, "\u00e0": 16, "\u00e3": 17, "\u1ea1": 18, "\u00e2": 19, "\u1ea5": 20, "\u1ea9": 21, "\u1ea7": 22, "\u1eab": 23, "\u1ead": 24, "\u0103": 25, "\u1eaf": 26, "\u1eb3": 27, "\u1eb1": 28, "\u1eb5": 29, "\u1eb7": 30, "b": 31, "c": 32, "d": 33, "\u0111": 34, "e": 35, "\u00e9": 36, "\u1ebb": 37, "\u00e8": 38, "\u1ebd": 39, "\u1eb9": 40, "\u00ea": 41, "\u1ebf": 42, "\u1ec3": 43, "\u1ec1": 44, "\u1ec5": 45, "\u1ec7": 46, "f": 47, "g": 48, "h": 49, "i": 50, "\u00ed": 51, "\u1ec9": 52, "\u00ec": 53, "\u0129": 54, "\u1ecb": 55, "j": 56, "k": 57, "l": 58, "m": 59, "n": 60, "o": 61, "\u00f3": 62, "\u1ecf": 63, "\u00f2": 64, "\u00f5": 65, "\u1ecd": 66, "\u00f4": 67, "\u1ed1": 68, "\u1ed5": 69, "\u1ed3": 70, "\u1ed7": 71, "\u1ed9": 72, "\u01a1": 73, "\u1edb": 74, "\u1edf": 75, "\u1edd": 76, "\u1ee1": 77, "\u1ee3": 78, "p": 79, "q": 80, "r": 81, "s": 82, "t": 83, "u": 84, "\u00fa": 85, "\u1ee7": 86, "\u00f9": 87, "\u0169": 88, "\u1ee5": 89, "\u01b0": 90, "\u1ee9": 91, "\u1eed": 92, "\u1eeb": 93, "\u1eef": 94, "\u1ef1": 95, "v": 96, "w": 97, "x": 98, "y": 99, "\u00fd": 100, "\u1ef7": 101, "\u1ef3": 102, "\u1ef9": 103, "\u1ef5": 104, "eos": 105}, "id_to_symbol": {"0": "pad", "1": "!", "2": "/", "3": "'", "4": "(", "5": ")", "6": ",", "7": "-", "8": ".", "9": ":", "10": ";", "11": "?", "12": " ", "13": "a", "14": "\u00e1", "15": "\u1ea3", "16": "\u00e0", "17": "\u00e3", "18": "\u1ea1", "19": "\u00e2", "20": "\u1ea5", "21": "\u1ea9", "22": "\u1ea7", "23": "\u1eab", "24": "\u1ead", "25": "\u0103", "26": "\u1eaf", "27": "\u1eb3", "28": "\u1eb1", "29": "\u1eb5", "30": "\u1eb7", "31": "b", "32": "c", "33": "d", "34": "\u0111", "35": "e", "36": "\u00e9", "37": "\u1ebb", "38": "\u00e8", "39": "\u1ebd", "40": "\u1eb9", "41": "\u00ea", "42": "\u1ebf", "43": "\u1ec3", "44": "\u1ec1", "45": "\u1ec5", "46": "\u1ec7", "47": "f", "48": "g", "49": "h", "50": "i", "51": "\u00ed", "52": "\u1ec9", "53": "\u00ec", "54": "\u0129", "55": "\u1ecb", "56": "j", "57": "k", "58": "l", "59": "m", "60": "n", "61": "o", "62": "\u00f3", "63": "\u1ecf", "64": "\u00f2", "65": "\u00f5", "66": "\u1ecd", "67": "\u00f4", "68": "\u1ed1", "69": "\u1ed5", "70": "\u1ed3", "71": "\u1ed7", "72": "\u1ed9", "73": "\u01a1", "74": "\u1edb", "75": "\u1edf", "76": "\u1edd", "77": "\u1ee1", "78": "\u1ee3", "79": "p", "80": "q", "81": "r", "82": "s", "83": "t", "84": "u", "85": "\u00fa", "86": "\u1ee7", "87": "\u00f9", "88": "\u0169", "89": "\u1ee5", "90": "\u01b0", "91": "\u1ee9", "92": "\u1eed", "93": "\u1eeb", "94": "\u1eef", "95": "\u1ef1", "96": "v", "97": "w", "98": "x", "99": "y", "100": "\u00fd", "101": "\u1ef7", "102": "\u1ef3", "103": "\u1ef9", "104": "\u1ef5", "105": "eos"}, "speakers_map": {"synpaflex": 0}, "processor_name": "SynpaflexProcessor"}
|