forked from Soft-wa-re/BlogToPodcast
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
executable file
·122 lines (104 loc) · 4.8 KB
/
main.py
File metadata and controls
executable file
·122 lines (104 loc) · 4.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/usr/bin/env python3
"""Main Module: This module has basically everything even though it shouldn't """
from pathlib import Path
import re
import sys
from os.path import exists
import os
import soundfile as sf
import tensorflow as tf
import frontmatter
from tensorflow_tts.inference import TFAutoModel
from tensorflow_tts.inference import AutoProcessor
from pydub import AudioSegment
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # or any {'0', '1', '2'}
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
for device in gpu_devices:
tf.config.experimental.set_memory_growth(device, True)
# initialize fastspeech2 model.
fastspeech2 = TFAutoModel.from_pretrained("tensorspeech/tts-fastspeech2-ljspeech-en")
# initialize mb_melgan model
mb_melgan = TFAutoModel.from_pretrained("tensorspeech/tts-mb_melgan-ljspeech-en")
# inference
processor = AutoProcessor.from_pretrained("tensorspeech/tts-fastspeech2-ljspeech-en")
MY_PATH = "."
markdown_files = list(Path(MY_PATH).rglob("*.markdown"))
for MD_FILE in markdown_files:
try:
MD_FILE = str(MD_FILE)
if "_drafts" in MD_FILE:
print("Directory not supported:" +MD_FILE)
continue
if "_postsBacklog" in MD_FILE:
print("Directory not supported:" +MD_FILE)
continue
if "vendor" in MD_FILE:
print("Directory not supported:" +MD_FILE)
continue
if exists(MD_FILE+".mp3"):
print(MD_FILE+".mp3" + " exists")
continue
print("generating"+MD_FILE)
except: # pylint: disable=bare-except
print("Unexpected error:", sys.exc_info()[0])
print("Error in file:"+MD_FILE)
continue
try:
post = frontmatter.load(MD_FILE)
if 'blogcast' not in post.metadata:
print(MD_FILE+" does not have blogToPodcast Key")
continue
CONTENT = post.content
CONTENT = CONTENT.replace("Array.prototype.indexOf(...) >= 0", "Array Dot indexOf")
CONTENT = CONTENT.replace("String.prototype.indexOf(...) >= 0", "String Dot indexOf")
CONTENT = CONTENT.replace("Array.prototype.includes(...) >= 0", "Array Dot includes")
CONTENT = CONTENT.replace("String.prototype.includes(...) >= 0", "String Dot includes")
CONTENT = CONTENT.replace(".includes(...)", "Dot includes")
CONTENT = CONTENT.replace(".indexOf(...)", "Dot indexOf")
WIKI_ECMA_PATH = "https://en.wikipedia.org/\
wiki/ECMAScript#7th_Edition_%E2%80%93_ECMAScript_2016"
CONTENT = CONTENT.replace(WIKI_ECMA_PATH, "")
MOZILLA_PATH = "https://developer.mozilla.org/\
en-US/docs/Web/JavaScript/Reference/Global_Objects/String/includes#polyfill"
CONTENT = CONTENT.replace(MOZILLA_PATH, "")
CONTENT = re.sub("```javascript.*```", "", CONTENT, re.DOTALL)
fileTextArr = CONTENT.split('.')
AUDIO_BEFORE = None
AUDIO_AFTER = None
for i, fTxt in enumerate(fileTextArr):
try:
if 0 < len(fTxt):
ids = processor.text_to_sequence(fTxt+".")
mel_before, mel_after, duration_outputs, _, _ = fastspeech2.inference(
input_ids=tf.expand_dims(tf.convert_to_tensor(ids, dtype=tf.int32), 0),
speaker_ids=tf.convert_to_tensor([0], dtype=tf.int32),
speed_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
f0_ratios =tf.convert_to_tensor([1.0], dtype=tf.float32),
energy_ratios =tf.convert_to_tensor([1.0], dtype=tf.float32),
)
# melgan inference
if None is AUDIO_BEFORE:
AUDIO_BEFORE = mb_melgan.inference(mel_before)[0, :, 0]
AUDIO_AFTER = mb_melgan.inference(mel_after)[0, :, 0]
else:
AUDIO_BEFORE = tf.concat([
AUDIO_BEFORE,
mb_melgan.inference(mel_before)[0, :, 0]], 0)
AUDIO_AFTER = tf.concat([
AUDIO_AFTER,
mb_melgan.inference(mel_after)[0, :, 0]], 0)
except: # pylint: disable=bare-except
print("your sentence was probably too long")
print("Unexpected error:", sys.exc_info()[0])
print(i)
print(len(fTxt))
print(fTxt)
sf.write(MD_FILE+'.wav', AUDIO_AFTER, 22050, 'PCM_24')
wavFile = AudioSegment.from_wav(MD_FILE+'.wav')
os.remove(MD_FILE+'.wav')
wavFile.export(MD_FILE+'.mp3', format="mp3")
except: # pylint: disable=bare-except
print(MD_FILE)
print(post.metadata)
print(post.metadata['blogcast'])