diff --git a/aeneas/diagnostics.py b/aeneas/diagnostics.py index 7d9e98a1..6f4e291f 100644 --- a/aeneas/diagnostics.py +++ b/aeneas/diagnostics.py @@ -56,9 +56,9 @@ def check_shell_encoding(cls): """ is_in_utf8 = True is_out_utf8 = True - if sys.stdin.encoding not in ["UTF-8", "UTF8"]: + if sys.stdin.encoding not in ["UTF-8", "UTF8", "utf-8", "utf8"]: is_in_utf8 = False - if sys.stdout.encoding not in ["UTF-8", "UTF8"]: + if sys.stdout.encoding not in ["UTF-8", "UTF8", "utf-8", "utf8"]: is_out_utf8 = False if (is_in_utf8) and (is_out_utf8): gf.print_success(u"shell encoding OK") diff --git a/aeneas/tools/abstract_cli_program.py b/aeneas/tools/abstract_cli_program.py index be5bb2fa..b5b9fadd 100644 --- a/aeneas/tools/abstract_cli_program.py +++ b/aeneas/tools/abstract_cli_program.py @@ -295,10 +295,10 @@ def run(self, arguments, show_help=True): if self.use_sys: # check that sys.stdin.encoding and sys.stdout.encoding are set to utf-8 if not gf.FROZEN: - if sys.stdin.encoding not in ["UTF-8", "UTF8"]: + if sys.stdin.encoding not in ["UTF-8", "UTF8", "utf-8", "utf8"]: self.print_warning(u"The default input encoding is not UTF-8.") self.print_warning(u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell.") - if sys.stdout.encoding not in ["UTF-8", "UTF8"]: + if sys.stdout.encoding not in ["UTF-8", "UTF8", "utf-8", "utf8"]: self.print_warning(u"The default output encoding is not UTF-8.") self.print_warning(u"You might want to set 'PYTHONIOENCODING=UTF-8' in your shell.") # decode using sys.stdin.encoding diff --git a/debian/changelog b/debian/changelog index dce85a6e..a3ac4ed1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +aeneas (1.7.3.1) stable; urgency=medium + + * Fix spurious warnings about "encoding is not UTF-8" when encoding is "utf-8" + * Add debian packaging for python3-aeneas + + -- Stephen McConnel Thu, 02 Apr 2020 16:45:00 -0600 + aeneas (1.7.1) stable; urgency=medium * Fix bug #151 diff --git a/debian/compat b/debian/compat index 7f8f011e..ec635144 100644 --- a/debian/compat +++ b/debian/compat @@ -1 +1 @@ -7 +9 diff --git a/debian/control b/debian/control index 6564c39c..53773e8a 100644 --- a/debian/control +++ b/debian/control @@ -1,6 +1,5 @@ Source: aeneas Section: python -X-Python-Version: >= 2.7 Priority: optional Maintainer: Alberto Pettarin Build-Depends: debhelper (>= 9.0.0), @@ -8,14 +7,29 @@ Build-Depends: debhelper (>= 9.0.0), python-all-dev, python-setuptools, python-numpy, python-lxml, python-bs4, + python3-all-dev, + python3-setuptools, + python3-numpy, python3-lxml, python3-bs4, libasound2-dev, libsndfile1-dev, libespeak-dev -Standards-Version: 3.9.5 +Standards-Version: 4.1.4 Homepage: https://github.com/readbeyond/aeneas Package: python-aeneas Architecture: any -Depends: ${misc:Depends}, ${python:Depends}, +Depends: ${misc:Depends}, ${python:Depends}, ${shlibs:Depends}, espeak, espeak-data, libespeak1, vorbis-tools, ffmpeg, flac +Replaces: python3-aeneas +Description: Python library to automagically synchronize audio and text + aeneas automatically generates a synchronization map between a list of + text fragments and an audio file containing the narration of the text. + In computer science this task is known as (automatically computing a) + forced alignment. + +Package: python3-aeneas +Architecture: any +Depends: ${misc:Depends}, ${python3:Depends}, ${shlibs:Depends}, + espeak, espeak-data, libespeak1, vorbis-tools, ffmpeg, flac +Replaces: python-aeneas Description: Python library to automagically synchronize audio and text aeneas automatically generates a synchronization map between a list of text fragments and an audio file containing the narration of the text. diff --git a/debian/install b/debian/python-aeneas.install similarity index 100% rename from debian/install rename to debian/python-aeneas.install diff --git a/debian/python-aeneas.lintian-overrides b/debian/python-aeneas.lintian-overrides new file mode 100644 index 00000000..d181bd15 --- /dev/null +++ b/debian/python-aeneas.lintian-overrides @@ -0,0 +1,2 @@ +python-aeneas: binary-without-manpage +python-aeneas: debian-changelog-file-contains-invalid-email-address diff --git a/debian/python3-aeneas.install b/debian/python3-aeneas.install new file mode 100644 index 00000000..2acdc06c --- /dev/null +++ b/debian/python3-aeneas.install @@ -0,0 +1,3 @@ +aeneas_check_setup.py /usr/share/python3-aeneas +VERSION /usr/share/python3-aeneas +aeneas/tools/res/audio.mp3 /usr/share/python3-aeneas/aeneas/tools/res diff --git a/debian/python3-aeneas.lintian-overrides b/debian/python3-aeneas.lintian-overrides new file mode 100644 index 00000000..6fda14af --- /dev/null +++ b/debian/python3-aeneas.lintian-overrides @@ -0,0 +1,2 @@ +python3-aeneas: binary-without-manpage +python3-aeneas: debian-changelog-file-contains-invalid-email-address diff --git a/debian/rules b/debian/rules index d0c10cef..0ab4b2f6 100755 --- a/debian/rules +++ b/debian/rules @@ -3,5 +3,7 @@ export PYBUILD_NAME=aeneas %: - dh $@ --with python2 --buildsystem=pybuild + dh $@ --with python2,python3 --buildsystem=pybuild +override_dh_shlibdeps: + dh_shlibdeps && dh_numpy && dh_numpy3 diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst index 39f669c8..f4d9c8c9 100644 --- a/docs/source/changelog.rst +++ b/docs/source/changelog.rst @@ -1,5 +1,9 @@ Changelog ========= +v1.7.3.1 (2020-05-06) +--------------------- +#. Fixed spurious warnings about not using UTF-8 when "utf-8" is seen +#. Added debian packaging for python3-aeneas v1.7.4 (2018-05-??) ------------------- diff --git a/setup.py b/setup.py index e9dd533d..6e5269bd 100644 --- a/setup.py +++ b/setup.py @@ -80,16 +80,25 @@ def prepare_cew_for_windows(): :rtype: bool """ + try: # copy espeak_sapi.dll to C:\Windows\System32\espeak.dll - espeak_dll_win_path = "C:\\Windows\\System32\\espeak.dll" - espeak_dll_dst_path = "aeneas\\cew\\espeak.dll" - espeak_dll_src_paths = [ - "C:\\aeneas\\eSpeak\\espeak_sapi.dll", - "C:\\sync\\eSpeak\\espeak_sapi.dll", - "C:\\Program Files\\eSpeak\\espeak_sapi.dll", - "C:\\Program Files (x86)\\eSpeak\\espeak_sapi.dll", - ] + if USE_ESPEAKNG: + espeak_dll_win_path = "C:\\Windows\\System32\\libespeak-ng.dll" + espeak_dll_dst_path = "aeneas\\cew\\libespeak-ng.dll" + espeak_dll_src_paths = [ + "C:\\Program Files\\eSpeak NG\\libespeak-ng.dll", + "C:\\Program Files (x86)\\eSpeak NG\\libespeak-ng.dll", + ] + else: + espeak_dll_win_path = "C:\\Windows\\System32\\espeak.dll" + espeak_dll_dst_path = "aeneas\\cew\\espeak.dll" + espeak_dll_src_paths = [ + "C:\\aeneas\\eSpeak\\espeak_sapi.dll", + "C:\\sync\\eSpeak\\espeak_sapi.dll", + "C:\\Program Files\\eSpeak\\espeak_sapi.dll", + "C:\\Program Files (x86)\\eSpeak\\espeak_sapi.dll", + ] if os.path.exists(espeak_dll_dst_path): print("[INFO] Found eSpeak DLL in %s" % espeak_dll_dst_path) else: @@ -123,13 +132,20 @@ def prepare_cew_for_windows(): # so, we copy it in the current working directory from the included thirdparty\ directory # NOTE: PREV: copy thirdparty\espeak.lib to $PYTHON\libs\espeak.lib # NOTE: PREV: espeak_lib_dst_path = os.path.join(sys.prefix, "libs", "espeak.lib") - espeak_lib_src_path = os.path.join(os.path.dirname(__file__), "thirdparty", "espeak.lib") - espeak_lib_dst_path = os.path.join(os.path.dirname(__file__), "espeak.lib") + if USE_ESPEAKNG: + if IS_64BITS: + espeak_lib_src_path = os.path.join(os.path.dirname(__file__), "thirdparty", "libespeak-ng-x64.lib") + else: + espeak_lib_src_path = os.path.join(os.path.dirname(__file__), "thirdparty", "libespeak-ng-x86.lib") + espeak_lib_dst_path = os.path.join(os.path.dirname(__file__), "espeak-ng.lib") + else: + espeak_lib_src_path = os.path.join(os.path.dirname(__file__), "thirdparty", "espeak.lib") + espeak_lib_dst_path = os.path.join(os.path.dirname(__file__), "espeak.lib") if os.path.exists(espeak_lib_dst_path): print("[INFO] Found eSpeak LIB in %s" % espeak_lib_dst_path) else: try: - print("[INFO] Copying eSpeak LIB into %s" % espeak_lib_dst_path) + print("[INFO] Copying eSpeak LIB from %s into %s" % (espeak_lib_src_path, espeak_lib_dst_path)) shutil.copyfile(espeak_lib_src_path, espeak_lib_dst_path) print("[INFO] Copied eSpeak LIB") except: @@ -145,6 +161,55 @@ def prepare_cew_for_windows(): print("[WARN] Unexpected exception while preparing cew: %s" % e) return False +def prepare_cew_speak_lib(): + """ + Copy files needed to compile the ``cew`` Python C extension on Windows. + + Return ``True`` if successful, ``False`` otherwise. + + :rtype: bool + """ + + try: + # NOTE: speak_lib.h is needed only while compiling the C extension, not when using it + # so, we copy it in the current working directory from the included thirdparty\ directory + if USE_ESPEAKNG: + espeak_lib_src_path = os.path.join(os.path.dirname(__file__), "thirdparty", "speak-ng_lib.h") + else: + espeak_lib_src_path = os.path.join(os.path.dirname(__file__), "thirdparty", "speak_lib.h") + espeak_lib_dst_path = os.path.join(os.path.dirname(__file__), "aeneas", "cew", "speak_lib.h") + if os.path.exists(espeak_lib_dst_path): + print("[INFO] Found eSpeak LIB in %s" % espeak_lib_dst_path) + else: + try: + print("[INFO] Copying eSpeak LIB from %s into %s" % (espeak_lib_src_path, espeak_lib_dst_path)) + shutil.copyfile(espeak_lib_src_path, espeak_lib_dst_path) + print("[INFO] Copied eSpeak LIB") + except: + print("[WARN] Unable to copy the eSpeak LIB, probably because you are not running with admin privileges.") + print("[WARN] If you want to compile the C extension cew,") + print("[WARN] please copy espeak.lib from the thirdparty directory into %s" % espeak_lib_dst_path) + print("[WARN] and run the aeneas setup again.") + return False + + # if here, we have completed the setup, return True + return True + except Exception as e: + print("[WARN] Unexpected exception while preparing cew: %s" % e) + return False + +def get_espeak_lib(): + """ + Return ``espeak-ng`` if true, ``espeak`` otherwise. + + :rtype: string + """ + + if USE_ESPEAKNG: + return "espeak-ng" + else: + return "espeak" + ############################################################################## # @@ -156,6 +221,7 @@ def prepare_cew_for_windows(): IS_LINUX = (os.name == "posix") and (os.uname()[0] == "Linux") IS_OSX = (os.name == "posix") and (os.uname()[0] == "Darwin") IS_WINDOWS = (os.name == "nt") +IS_64BITS = (sys.maxsize > 2**32) # define what values of environment variables are considered equal to True TRUE_VALUES = [ @@ -177,6 +243,7 @@ def prepare_cew_for_windows(): WITHOUT_CEW = os.getenv("AENEAS_WITH_CEW", "True") not in TRUE_VALUES FORCE_CEW = os.getenv("AENEAS_FORCE_CEW", "False") in TRUE_VALUES FORCE_CFW = os.getenv("AENEAS_FORCE_CFW", "False") in TRUE_VALUES +USE_ESPEAKNG = os.getenv("AENEAS_USE_ESPEAKNG", "False") in TRUE_VALUES ############################################################################## @@ -224,7 +291,7 @@ def finalize_options(self): "aeneas/cew/cew_func.c" ], libraries=[ - "espeak" + get_espeak_lib() ] ) EXTENSION_CFW = Extension( @@ -292,8 +359,15 @@ def finalize_options(self): print("[INFO] ") EXTENSIONS.append(EXTENSION_CEW) else: + print("[INFO] ********************************************************************************") + print("[INFO] Specify AENEAS_USE_ESPEAKNG=True to build aeneas with espeak-ng libraries") + print("[INFO] ********************************************************************************") + print("[INFO] ") if IS_LINUX: - EXTENSIONS.append(EXTENSION_CEW) + if prepare_cew_speak_lib(): + EXTENSIONS.append(EXTENSION_CEW) + else: + print("[WARN] Unable to complete the setup for C extension cew, not building it.") elif IS_OSX: print("[INFO] *********************************************************************************") print("[INFO] Compiling the C extension cew on Mac OS X is experimental.") @@ -307,7 +381,10 @@ def finalize_options(self): print("[INFO] Please see the aeneas installation documentation for details.") print("[INFO] ********************************************************************************") print("[INFO] ") - EXTENSIONS.append(EXTENSION_CEW) + if prepare_cew_speak_lib(): + EXTENSIONS.append(EXTENSION_CEW) + else: + print("[WARN] Unable to complete the setup for C extension cew, not building it.") elif IS_WINDOWS: print("[INFO] *****************************************************************") print("[INFO] Compiling the C extension cew on Windows is experimental.") @@ -317,8 +394,11 @@ def finalize_options(self): print("[INFO] Please see the aeneas installation documentation for details.") print("[INFO] *****************************************************************") print("[INFO] ") - if prepare_cew_for_windows(): - EXTENSIONS.append(EXTENSION_CEW) + if prepare_cew_speak_lib(): + if prepare_cew_for_windows(): + EXTENSIONS.append(EXTENSION_CEW) + else: + print("[WARN] Unable to complete the setup for C extension cew, not building it.") else: print("[WARN] Unable to complete the setup for C extension cew, not building it.") else: diff --git a/thirdparty/libespeak-ng-x64.lib b/thirdparty/libespeak-ng-x64.lib new file mode 100644 index 00000000..c10eb887 Binary files /dev/null and b/thirdparty/libespeak-ng-x64.lib differ diff --git a/thirdparty/libespeak-ng-x86.lib b/thirdparty/libespeak-ng-x86.lib new file mode 100644 index 00000000..fc6c7545 Binary files /dev/null and b/thirdparty/libespeak-ng-x86.lib differ diff --git a/thirdparty/speak-ng_lib.h b/thirdparty/speak-ng_lib.h new file mode 100644 index 00000000..cc1e35e7 --- /dev/null +++ b/thirdparty/speak-ng_lib.h @@ -0,0 +1,709 @@ +#ifndef SPEAK_LIB_H +#define SPEAK_LIB_H +/*************************************************************************** + * Copyright (C) 2005 to 2012 by Jonathan Duddington * + * email: jonsd@users.sourceforge.net * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU General Public License as published by * + * the Free Software Foundation; either version 3 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU General Public License for more details. * + * * + * You should have received a copy of the GNU General Public License * + * along with this program; if not, see: * + * . * + ***************************************************************************/ + + +/*************************************************************/ +/* This is the header file for the library version of espeak */ +/* */ +/*************************************************************/ + +#include +#include + +#if defined(_WIN32) || defined(_WIN64) +#ifdef LIBESPEAK_NG_EXPORT +#define ESPEAK_API __declspec(dllexport) +#else +#define ESPEAK_API __declspec(dllimport) +#endif +#else +#define ESPEAK_API +#endif + +#define ESPEAK_API_REVISION 12 +/* +Revision 2 + Added parameter "options" to eSpeakInitialize() + +Revision 3 + Added espeakWORDGAP to espeak_PARAMETER + +Revision 4 + Added flags parameter to espeak_CompileDictionary() + +Revision 5 + Added espeakCHARS_16BIT + +Revision 6 + Added macros: espeakRATE_MINIMUM, espeakRATE_MAXIMUM, espeakRATE_NORMAL + +Revision 7 24.Dec.2011 + Changed espeak_EVENT structure to add id.string[] for phoneme mnemonics. + Added espeakINITIALIZE_PHONEME_IPA option for espeak_Initialize() to report phonemes as IPA names. + +Revision 8 26.Apr.2013 + Added function espeak_TextToPhonemes(). + +Revision 9 30.May.2013 + Changed function espeak_TextToPhonemes(). + +Revision 10 29.Aug.2014 + Changed phonememode parameter to espeak_TextToPhonemes() and espeak_SetPhonemeTrace + +Revision 11 (espeak-ng) + Made ESPEAK_API import/export symbols correctly on Windows. + +Revision 12 (espeak-ng) + Exposed espeak_SetPhonemeCallback. This is available in eSpeak, but was not exposed in this header. + +*/ + /********************/ + /* Initialization */ + /********************/ + +// values for 'value' in espeak_SetParameter(espeakRATE, value, 0), nominally in words-per-minute +#define espeakRATE_MINIMUM 80 +#define espeakRATE_MAXIMUM 450 +#define espeakRATE_NORMAL 175 + + +typedef enum { + espeakEVENT_LIST_TERMINATED = 0, // Retrieval mode: terminates the event list. + espeakEVENT_WORD = 1, // Start of word + espeakEVENT_SENTENCE = 2, // Start of sentence + espeakEVENT_MARK = 3, // Mark + espeakEVENT_PLAY = 4, // Audio element + espeakEVENT_END = 5, // End of sentence or clause + espeakEVENT_MSG_TERMINATED = 6, // End of message + espeakEVENT_PHONEME = 7, // Phoneme, if enabled in espeak_Initialize() + espeakEVENT_SAMPLERATE = 8 // internal use, set sample rate +} espeak_EVENT_TYPE; + + + +typedef struct { + espeak_EVENT_TYPE type; + unsigned int unique_identifier; // message identifier (or 0 for key or character) + int text_position; // the number of characters from the start of the text + int length; // word length, in characters (for espeakEVENT_WORD) + int audio_position; // the time in mS within the generated speech output data + int sample; // sample id (internal use) + void* user_data; // pointer supplied by the calling program + union { + int number; // used for WORD and SENTENCE events. + const char *name; // used for MARK and PLAY events. UTF8 string + char string[8]; // used for phoneme names (UTF8). Terminated by a zero byte unless the name needs the full 8 bytes. + } id; +} espeak_EVENT; +/* + When a message is supplied to espeak_synth, the request is buffered and espeak_synth returns. When the message is really processed, the callback function will be repetedly called. + + + In RETRIEVAL mode, the callback function supplies to the calling program the audio data and an event list terminated by 0 (LIST_TERMINATED). + + In PLAYBACK mode, the callback function is called as soon as an event happens. + + For example suppose that the following message is supplied to espeak_Synth: + "hello, hello." + + + * Once processed in RETRIEVAL mode, it could lead to 3 calls of the callback function : + + ** Block 1: +