rishikksh20 · rishikksh20 · Sep 14, 2020 · Sep 14, 2020 · Sep 14, 2020 · Sep 14, 2020
diff --git a/configs/default.yaml b/configs/default.yaml
@@ -110,7 +110,7 @@ train:
   # optimization related
   eos: False #True
   opt: 'noam'
-  accum_grad: 4
+  accum_grad: 1
   grad_clip: 1.0
   weight_decay: 0.001
   patience: 0
@@ -126,7 +126,7 @@ train:
   seed: 1       # random seed number
   resume: ""    # the snapshot path to resume (if set empty, no effect)
   use_phonemes: True
-  batch_size : 16
+  batch_size : 48
   # other
   melgan_vocoder : True
   save_interval : 1000

diff --git a/fastspeech.py b/fastspeech.py
@@ -211,12 +211,12 @@ def _forward(
             # print("d_outs:", d_outs.shape)      #  torch.Size([32, 121])
             hs = self.length_regulator(hs, ds, ilens)  # (B, Lmax, adim)
             # print("After Hs:",hs.shape)  #torch.Size([32, 868, 256])
-            e_outs = self.energy_predictor(hs, mel_masks)
+            e_outs = self.energy_predictor(hs.detach(), mel_masks)
             # print("e_outs:", e_outs.shape)  #torch.Size([32, 868])
-            p_outs = self.pitch_predictor(hs, mel_masks)
+            p_outs = self.pitch_predictor(hs.detach(), mel_masks)
             # print("p_outs:", p_outs.shape)   #torch.Size([32, 868])
-        hs = hs + self.pitch_embed(one_hot_pitch)  # (B, Lmax, adim)
-        hs = hs + self.energy_embed(one_hot_energy)  # (B, Lmax, adim)
+        hs = hs + one_hot_pitch + one_hot_energy # self.pitch_embed(one_hot_pitch)   (B, Lmax, adim)
+        #hs = hs + self.energy_embed(one_hot_energy)  # (B, Lmax, adim)
         # forward decoder
         if olens is not None:
             h_masks = self._source_mask(olens)