Skip to content
4 changes: 2 additions & 2 deletions configs/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ train:
# optimization related
eos: False #True
opt: 'noam'
accum_grad: 4
accum_grad: 1
grad_clip: 1.0
weight_decay: 0.001
patience: 0
Expand All @@ -126,7 +126,7 @@ train:
seed: 1 # random seed number
resume: "" # the snapshot path to resume (if set empty, no effect)
use_phonemes: True
batch_size : 16
batch_size : 48
# other
melgan_vocoder : True
save_interval : 1000
Expand Down
8 changes: 4 additions & 4 deletions fastspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,12 +211,12 @@ def _forward(
# print("d_outs:", d_outs.shape) # torch.Size([32, 121])
hs = self.length_regulator(hs, ds, ilens) # (B, Lmax, adim)
# print("After Hs:",hs.shape) #torch.Size([32, 868, 256])
e_outs = self.energy_predictor(hs, mel_masks)
e_outs = self.energy_predictor(hs.detach(), mel_masks)
# print("e_outs:", e_outs.shape) #torch.Size([32, 868])
p_outs = self.pitch_predictor(hs, mel_masks)
p_outs = self.pitch_predictor(hs.detach(), mel_masks)
# print("p_outs:", p_outs.shape) #torch.Size([32, 868])
hs = hs + self.pitch_embed(one_hot_pitch) # (B, Lmax, adim)
hs = hs + self.energy_embed(one_hot_energy) # (B, Lmax, adim)
hs = hs + one_hot_pitch + one_hot_energy # self.pitch_embed(one_hot_pitch) (B, Lmax, adim)
#hs = hs + self.energy_embed(one_hot_energy) # (B, Lmax, adim)
# forward decoder
if olens is not None:
h_masks = self._source_mask(olens)
Expand Down
Loading