diff --git a/tests/test_unidecode.py b/tests/test_unidecode.py index 100cfca..b09b987 100644 --- a/tests/test_unidecode.py +++ b/tests/test_unidecode.py @@ -68,6 +68,24 @@ def test_ascii(self): wlog.stop() + + def test_ignore(self): + wlog = WarningLogger() + wlog.start("should be ignored") + + r = self.unidecode(u'æøåÆØÅ', ignore=u'æøå') + self.assertEqual(r, u'æøåAEOA') + + if sys.version_info[0] >= 3: + self.assertEqual(type(r), str) + else: + self.assertEqual(type(r), unicode) + + # unicode objects shouldn't raise warnings + self.assertEqual(0, len(wlog.log)) + + wlog.stop() + def test_bmp(self): for n in range(0,0x10000): # skip over surrogate pairs, which throw a warning diff --git a/unidecode/__init__.py b/unidecode/__init__.py index 5d968fd..0278a30 100644 --- a/unidecode/__init__.py +++ b/unidecode/__init__.py @@ -28,7 +28,7 @@ def _warn_if_not_unicode(string): RuntimeWarning, 2) -def unidecode_expect_ascii(string): +def unidecode_expect_ascii(string, ignore=u''): """Transliterate an Unicode object into an ASCII string >>> unidecode(u"\u5317\u4EB0") @@ -47,13 +47,13 @@ def unidecode_expect_ascii(string): try: bytestring = string.encode('ASCII') except UnicodeEncodeError: - return _unidecode(string) + return _unidecode(string, ignore) if version_info[0] >= 3: return string else: return bytestring -def unidecode_expect_nonascii(string): +def unidecode_expect_nonascii(string, ignore=u''): """Transliterate an Unicode object into an ASCII string >>> unidecode(u"\u5317\u4EB0") @@ -61,16 +61,20 @@ def unidecode_expect_nonascii(string): """ _warn_if_not_unicode(string) - return _unidecode(string) + return _unidecode(string, ignore) unidecode = unidecode_expect_ascii -def _unidecode(string): +def _unidecode(string, ignore=u''): retval = [] for char in string: codepoint = ord(char) + if char in ignore: + retval.append(char) + continue + if codepoint < 0x80: # Basic ASCII retval.append(str(char)) continue