Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 149 additions & 0 deletions configs/config-ljspeech.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{
"train_config": {
"output_directory": "/home/zach/code/uberduck-ml-dev/outputs-2023-07-12-ljspeech/",
"epochs": 10000000,
"optim_algo": "RAdam",
"learning_rate": 0.0001,
"weight_decay": 1e-6,
"sigma": 1.0,
"iters_per_checkpoint": 2500,
"batch_size": 16,
"seed": null,
"checkpoint_path": "",
"ignore_layers": [],
"ignore_layers_warmstart": [],
"steps_per_sample": 500,
"finetune_layers": [],
"include_layers": [],
"vocoder_config_path": "/home/zach/code/uberduck-ml-dev/models/hifi_gan_config.json",
"vocoder_checkpoint_path": "/home/zach/code/uberduck-ml-dev/models/g_hifi_crust",
"log_attribute_samples": false,
"log_decoder_samples": true,
"warmstart_checkpoint_path": "/home/zach/code/uberduck-ml-dev/outputs-2023-07-12-ljspeech/model_2500.pt",
"______warmstartasdf": "/home/zach/code/uberduck-ml-dev/outputs/model_115000.pt",
"use_amp": true,
"grad_clip_val": 1.0,
"loss_weights": {
"blank_logprob": -1,
"ctc_loss_weight": 0.1,
"binarization_loss_weight": 1.0,
"dur_loss_weight": 1.0,
"f0_loss_weight": 1.0,
"energy_loss_weight": 1.0,
"vpred_loss_weight": 1.0
},
"binarization_start_iter": 6000,
"kl_loss_start_iter": 8000,
"unfreeze_modules": "all"
},
"data_config": {
"training_files": {
"lj": {
"basedir": "/home/zach/code/uberduck-ml-dev/data",
"audiodir": "",
"filelist": "/home/zach/code/uberduck-ml-dev/filelists/lj-filelist.txt",
"lmdbpath": ""
}
},
"validation_files": {},
"dur_min": 0.1,
"dur_max": 10.2,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": 8000.0,
"f0_min": 80.0,
"f0_max": 640.0,
"max_wav_value": 32768.0,
"use_f0": true,
"use_log_f0": 0,
"use_energy_avg": true,
"use_scaled_energy": true,
"symbol_set": "radtts",
"cleaner_names": ["radtts_cleaners"],
"heteronyms_path": "/home/zach/code/uberduck-ml-dev/uberduck_ml_dev/text/heteronyms",
"phoneme_dict_path": "/home/zach/code/uberduck-ml-dev/uberduck_ml_dev/text/cmudict-0.7b",
"p_phoneme": 1.0,
"handle_phoneme": "word",
"handle_phoneme_ambiguous": "ignore",
"include_speakers": null,
"n_frames": -1,
"betabinom_cache_path": "/home/zach/code/uberduck-ml-dev/data_cache/",
"lmdb_cache_path": "",
"use_attn_prior_masking": true,
"prepend_space_to_text": true,
"append_space_to_text": true,
"add_bos_eos_to_text": false,
"betabinom_scaling_factor": 1.0,
"distance_tx_unvoiced": false,
"is_zero_shot": false,
"mel_noise_scale": 0.0
},
"dist_config": {
"dist_backend": "nccl",
"dist_url": "tcp://localhost:54321"
},
"model_config": {
"n_speakers": 1,
"n_speaker_dim": 16,
"n_text": 185,
"n_text_dim": 512,
"n_flows": 8,
"n_conv_layers_per_step": 4,
"n_mel_channels": 80,
"n_hidden": 1024,
"mel_encoder_n_hidden": 512,
"dummy_speaker_embedding": false,
"n_early_size": 2,
"n_early_every": 2,
"n_group_size": 2,
"affine_model": "wavenet",
"include_modules": "decatn",
"scaling_fn": "tanh",
"matrix_decomposition": "LUS",
"learn_alignments": true,
"use_speaker_emb_for_alignment": false,
"attn_straight_through_estimator": true,
"use_context_lstm": true,
"context_lstm_norm": "spectral",
"context_lstm_w_f0_and_energy": true,
"text_encoder_lstm_norm": "spectral",
"n_f0_dims": 1,
"n_energy_avg_dims": 1,
"use_first_order_features": false,
"unvoiced_bias_activation": "relu",
"decoder_use_partial_padding": true,
"decoder_use_unvoiced_bias": true,
"ap_pred_log_f0": true,
"ap_use_unvoiced_bias": true,
"ap_use_voiced_embeddings": true,
"dur_model_config": null,
"f0_model_config": null,
"energy_model_config": null,
"v_model_config": {
"name": "dap",
"hparams": {
"n_speaker_dim": 16,
"take_log_of_input": false,
"bottleneck_hparams": {
"in_dim": 512,
"reduction_factor": 16,
"norm": "weightnorm",
"non_linearity": "relu"
},
"arch_hparams": {
"out_dim": 1,
"n_layers": 2,
"n_channels": 256,
"kernel_size": 3,
"p_dropout": 0.5,
"lstm_type": "",
"use_linear": 1
}
}
}
}
}
149 changes: 149 additions & 0 deletions configs/config-zeroshot-warmstart.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{
"train_config": {
"output_directory": "/home/zach/code/uberduck-ml-dev/outputs-2023-07-13-zeroshot-warmstart/",
"epochs": 10000000,
"optim_algo": "RAdam",
"learning_rate": 0.0001,
"weight_decay": 1e-6,
"sigma": 1.0,
"iters_per_checkpoint": 2500,
"batch_size": 16,
"seed": null,
"checkpoint_path": "",
"ignore_layers": [],
"ignore_layers_warmstart": [],
"steps_per_sample": 500,
"finetune_layers": [],
"include_layers": [],
"vocoder_config_path": "/home/zach/code/uberduck-ml-dev/models/hifi_gan_config.json",
"vocoder_checkpoint_path": "/home/zach/code/uberduck-ml-dev/models/g_hifi_crust",
"log_attribute_samples": false,
"log_decoder_samples": true,
"warmstart_checkpoint_path": "/home/zach/code/uberduck-ml-dev/outputs-2023-07-12-zeroshot/model_40000.pt",
"______warmstartasdf": "/home/zach/code/uberduck-ml-dev/outputs/model_115000.pt",
"use_amp": true,
"grad_clip_val": 1.0,
"loss_weights": {
"blank_logprob": -1,
"ctc_loss_weight": 0.1,
"binarization_loss_weight": 1.0,
"dur_loss_weight": 1.0,
"f0_loss_weight": 1.0,
"energy_loss_weight": 1.0,
"vpred_loss_weight": 1.0
},
"binarization_start_iter": 6000,
"kl_loss_start_iter": 8000,
"unfreeze_modules": "all"
},
"data_config": {
"training_files": {
"vctk": {
"basedir": "/home/zach/code/uberduck-ml-dev/data",
"audiodir": "",
"filelist": "/home/zach/code/uberduck-ml-dev/filelists/vctk-radtts.txt",
"lmdbpath": ""
}
},
"validation_files": {},
"dur_min": 0.1,
"dur_max": 10.2,
"sampling_rate": 22050,
"filter_length": 1024,
"hop_length": 256,
"win_length": 1024,
"n_mel_channels": 80,
"mel_fmin": 0.0,
"mel_fmax": 8000.0,
"f0_min": 80.0,
"f0_max": 640.0,
"max_wav_value": 32768.0,
"use_f0": true,
"use_log_f0": 0,
"use_energy_avg": true,
"use_scaled_energy": true,
"symbol_set": "radtts",
"cleaner_names": ["radtts_cleaners"],
"heteronyms_path": "/home/zach/code/uberduck-ml-dev/uberduck_ml_dev/text/heteronyms",
"phoneme_dict_path": "/home/zach/code/uberduck-ml-dev/uberduck_ml_dev/text/cmudict-0.7b",
"p_phoneme": 1.0,
"handle_phoneme": "word",
"handle_phoneme_ambiguous": "ignore",
"include_speakers": null,
"n_frames": -1,
"betabinom_cache_path": "/home/zach/code/uberduck-ml-dev/data_cache/",
"lmdb_cache_path": "",
"use_attn_prior_masking": true,
"prepend_space_to_text": true,
"append_space_to_text": true,
"add_bos_eos_to_text": false,
"betabinom_scaling_factor": 1.0,
"distance_tx_unvoiced": false,
"is_zero_shot": true,
"mel_noise_scale": 0.0
},
"dist_config": {
"dist_backend": "nccl",
"dist_url": "tcp://localhost:54321"
},
"model_config": {
"n_speakers": 0,
"n_speaker_dim": 512,
"n_text": 185,
"n_text_dim": 512,
"n_flows": 8,
"n_conv_layers_per_step": 4,
"n_mel_channels": 80,
"n_hidden": 1024,
"mel_encoder_n_hidden": 512,
"dummy_speaker_embedding": false,
"n_early_size": 2,
"n_early_every": 2,
"n_group_size": 2,
"affine_model": "wavenet",
"include_modules": "decatn",
"scaling_fn": "tanh",
"matrix_decomposition": "LUS",
"learn_alignments": true,
"use_speaker_emb_for_alignment": false,
"attn_straight_through_estimator": true,
"use_context_lstm": true,
"context_lstm_norm": "spectral",
"context_lstm_w_f0_and_energy": true,
"text_encoder_lstm_norm": "spectral",
"n_f0_dims": 1,
"n_energy_avg_dims": 1,
"use_first_order_features": false,
"unvoiced_bias_activation": "relu",
"decoder_use_partial_padding": true,
"decoder_use_unvoiced_bias": true,
"ap_pred_log_f0": true,
"ap_use_unvoiced_bias": true,
"ap_use_voiced_embeddings": true,
"dur_model_config": null,
"f0_model_config": null,
"energy_model_config": null,
"v_model_config": {
"name": "dap",
"hparams": {
"n_speaker_dim": 16,
"take_log_of_input": false,
"bottleneck_hparams": {
"in_dim": 512,
"reduction_factor": 16,
"norm": "weightnorm",
"non_linearity": "relu"
},
"arch_hparams": {
"out_dim": 1,
"n_layers": 2,
"n_channels": 256,
"kernel_size": 3,
"p_dropout": 0.5,
"lstm_type": "",
"use_linear": 1
}
}
}
}
}
Loading