diff --git a/doc/whatsnew/fragments/8736.bugfix b/doc/whatsnew/fragments/8736.bugfix new file mode 100644 index 0000000000..d06d04d5ac --- /dev/null +++ b/doc/whatsnew/fragments/8736.bugfix @@ -0,0 +1,5 @@ +When displaying unicode with surrogates (or other potential ``UnicodeEncodeError``), +pylint will now display a '?' character (using ``encode(encoding="utf-8", errors="replace")``) +instead of crashing. The functional tests classes are also updated to handle this case. + +Closes #8736. diff --git a/pylint/reporters/base_reporter.py b/pylint/reporters/base_reporter.py index d370b1910e..aa83b4a089 100644 --- a/pylint/reporters/base_reporter.py +++ b/pylint/reporters/base_reporter.py @@ -42,7 +42,14 @@ def handle_message(self, msg: Message) -> None: def writeln(self, string: str = "") -> None: """Write a line in the output buffer.""" - print(string, file=self.out) + try: + print(string, file=self.out) + except UnicodeEncodeError: + print(self.reencode_output_after_unicode_error(string), file=self.out) + + @staticmethod + def reencode_output_after_unicode_error(string: str) -> str: + return string.encode(encoding="utf-8", errors="replace").decode("utf8") def display_reports(self, layout: Section) -> None: """Display results encapsulated in the layout tree.""" diff --git a/pylint/testutils/functional/lint_module_output_update.py b/pylint/testutils/functional/lint_module_output_update.py index 38ed465aad..a9af6bb658 100644 --- a/pylint/testutils/functional/lint_module_output_update.py +++ b/pylint/testutils/functional/lint_module_output_update.py @@ -40,4 +40,4 @@ def _check_output_text( with open(self._test_file.expected_output, "w", encoding="utf-8") as f: writer = csv.writer(f, dialect="test") for line in actual_output: - writer.writerow(line.to_csv()) + self.safe_write_output_line(writer, line) diff --git a/pylint/testutils/lint_module_test.py b/pylint/testutils/lint_module_test.py index 37839c8908..4da29beb77 100644 --- a/pylint/testutils/lint_module_test.py +++ b/pylint/testutils/lint_module_test.py @@ -11,7 +11,7 @@ from collections import Counter from io import StringIO from pathlib import Path -from typing import TextIO +from typing import TYPE_CHECKING, TextIO import pytest from _pytest.config import Config @@ -20,6 +20,7 @@ from pylint.config.config_initialization import _config_initialization from pylint.lint import PyLinter from pylint.message.message import Message +from pylint.reporters import BaseReporter from pylint.testutils.constants import _EXPECTED_RE, _OPERATORS, UPDATE_OPTION # need to import from functional.test_file to avoid cyclic import @@ -31,6 +32,8 @@ from pylint.testutils.output_line import OutputLine from pylint.testutils.reporter_for_tests import FunctionalTestReporter +if TYPE_CHECKING: + import _csv MessageCounter = Counter[tuple[int, str]] PYLINTRC = Path(__file__).parent / "testing_pylintrc" @@ -303,10 +306,22 @@ def error_msg_for_unequal_output( expected_csv = StringIO() writer = csv.writer(expected_csv, dialect="test") for line in sorted(received_lines, key=sort_by_line_number): - writer.writerow(line.to_csv()) + self.safe_write_output_line(writer, line) error_msg += expected_csv.getvalue() return error_msg + def safe_write_output_line(self, writer: _csv._writer, line: OutputLine) -> None: + """Write an OutputLine to the CSV writer, handling UnicodeEncodeError.""" + try: + writer.writerow(line.to_csv()) + except UnicodeEncodeError: + writer.writerow( + [ + BaseReporter.reencode_output_after_unicode_error(s) + for s in line.to_csv() + ] + ) + def _check_output_text( self, _: MessageCounter, diff --git a/tests/functional/r/regression_02/regression_8736.py b/tests/functional/r/regression_02/regression_8736.py new file mode 100644 index 0000000000..097d17c679 --- /dev/null +++ b/tests/functional/r/regression_02/regression_8736.py @@ -0,0 +1,3 @@ +"""This does not crash in the functional tests, but it did when called directly.""" + +assert "\U00010000" == "\ud800\udc00" # [comparison-of-constants] diff --git a/tests/functional/r/regression_02/regression_8736.txt b/tests/functional/r/regression_02/regression_8736.txt new file mode 100644 index 0000000000..3c22751005 --- /dev/null +++ b/tests/functional/r/regression_02/regression_8736.txt @@ -0,0 +1 @@ +comparison-of-constants:3:7:3:37::"Comparison between constants: '𐀀 == \\ud800\\udc00' has a constant value":HIGH