Skip to content

第10章 日本語Tactronに基づく音声合成システムの実装のノートブックで、エラーになります。 #43

@koudah

Description

@koudah

幸田と申します。

第10章のノートブックの32セル目で、以下のようなエラーが出ます。
※実行環境は、Jetson Orinで行っています。

/tmp/ipykernel_11866/3661236322.py:13: FutureWarning: Pass orig_sr=48000, target_sr=16000 as keyword args. From version 0.10 passing these as positional arguments will result in an error
x = librosa.resample(x, _sr, sr)


ValueError Traceback (most recent call last)
File ~/ttslearn/venv/lib/python3.8/site-packages/scipy/signal/windows/_windows.py:2214, in get_window(window, Nx, fftbins)
2213 try:
-> 2214 beta = float(window)
2215 except (TypeError, ValueError) as e:

ValueError: could not convert string to float: 'hanning'

During handling of the above exception, another exception occurred:

KeyError Traceback (most recent call last)
File ~/ttslearn/venv/lib/python3.8/site-packages/scipy/signal/windows/_windows.py:2232, in get_window(window, Nx, fftbins)
2231 try:
-> 2232 winfunc = _win_equiv[winstr]
2233 except KeyError as e:

KeyError: 'hanning'

The above exception was the direct cause of the following exception:

ValueError Traceback (most recent call last)
Cell In [32], line 15
12 x = (x / 32768).astype(np.float64)
13 x = librosa.resample(x, _sr, sr)
---> 15 out_feats = logmelspectrogram(x, sr)
17 # 冒頭と末尾の非音声区間の長さを調整
18 assert "sil" in labels.contexts[0] and "sil" in labels.contexts[-1]

File ~/ttslearn/venv/lib/python3.8/site-packages/ttslearn/dsp.py:310, in logmelspectrogram(y, sr, n_fft, hop_length, win_length, n_mels, fmin, fmax, clip)
307 if n_fft is None:
308 n_fft = next_power_of_2(win_length)
--> 310 S = librosa.stft(
311 y, n_fft=n_fft, hop_length=hop_length, win_length=win_length, window="hanning"
312 )
314 fmin = 0 if fmin is None else fmin
315 fmax = sr // 2 if fmax is None else fmax

File ~/ttslearn/venv/lib/python3.8/site-packages/librosa/util/decorators.py:88, in deprecate_positional_args.._inner_deprecate_positional_args..inner_f(*args, **kwargs)
86 extra_args = len(args) - len(all_args)
87 if extra_args <= 0:
---> 88 return f(*args, **kwargs)
90 # extra_args > 0
91 args_msg = [
92 "{}={}".format(name, arg)
93 for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:])
94 ]

File ~/ttslearn/venv/lib/python3.8/site-packages/librosa/core/spectrum.py:204, in stft(y, n_fft, hop_length, win_length, window, center, dtype, pad_mode)
201 # Check audio is valid
202 util.valid_audio(y, mono=False)
--> 204 fft_window = get_window(window, win_length, fftbins=True)
206 # Pad the window out to n_fft size
207 fft_window = util.pad_center(fft_window, size=n_fft)

File ~/ttslearn/venv/lib/python3.8/site-packages/librosa/util/decorators.py:88, in deprecate_positional_args.._inner_deprecate_positional_args..inner_f(*args, **kwargs)
86 extra_args = len(args) - len(all_args)
87 if extra_args <= 0:
---> 88 return f(*args, **kwargs)
90 # extra_args > 0
91 args_msg = [
92 "{}={}".format(name, arg)
93 for name, arg in zip(kwonly_args[:extra_args], args[-extra_args:])
94 ]

File ~/ttslearn/venv/lib/python3.8/site-packages/librosa/filters.py:1185, in get_window(window, Nx, fftbins)
1180 return window(Nx)
1182 elif isinstance(window, (str, tuple)) or np.isscalar(window):
1183 # TODO: if we add custom window functions in librosa, call them here
-> 1185 return scipy.signal.get_window(window, Nx, fftbins=fftbins)
1187 elif isinstance(window, (np.ndarray, list)):
1188 if len(window) == Nx:

File ~/ttslearn/venv/lib/python3.8/site-packages/scipy/signal/windows/_windows.py:2234, in get_window(window, Nx, fftbins)
2232 winfunc = _win_equiv[winstr]
2233 except KeyError as e:
-> 2234 raise ValueError("Unknown window type.") from e
2236 if winfunc is dpss:
2237 params = (Nx,) + args + (None,)

ValueError: Unknown window type.

なにか、環境やインストールの際のバージョン等に誤りがあるのでしょうか?

恐れ入りますが、よろしくおねがいいたします。

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions