Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions doc/whatsnew/fragments/8736.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
When displaying unicode with surrogates (or other potential ``UnicodeEncodeError``),
pylint will now display a '?' character (using ``encode(encoding="utf-8", errors="replace")``)
instead of crashing. The functional tests classes are also updated to handle this case.

Closes #8736.
9 changes: 8 additions & 1 deletion pylint/reporters/base_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,14 @@ def handle_message(self, msg: Message) -> None:

def writeln(self, string: str = "") -> None:
"""Write a line in the output buffer."""
print(string, file=self.out)
try:
print(string, file=self.out)
except UnicodeEncodeError:
print(self.reencode_output_after_unicode_error(string), file=self.out)

@staticmethod
def reencode_output_after_unicode_error(string: str) -> str:
return string.encode(encoding="utf-8", errors="replace").decode("utf8")

def display_reports(self, layout: Section) -> None:
"""Display results encapsulated in the layout tree."""
Expand Down
2 changes: 1 addition & 1 deletion pylint/testutils/functional/lint_module_output_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ def _check_output_text(
with open(self._test_file.expected_output, "w", encoding="utf-8") as f:
writer = csv.writer(f, dialect="test")
for line in actual_output:
writer.writerow(line.to_csv())
self.safe_write_output_line(writer, line)
19 changes: 17 additions & 2 deletions pylint/testutils/lint_module_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from collections import Counter
from io import StringIO
from pathlib import Path
from typing import TextIO
from typing import TYPE_CHECKING, TextIO

import pytest
from _pytest.config import Config
Expand All @@ -20,6 +20,7 @@
from pylint.config.config_initialization import _config_initialization
from pylint.lint import PyLinter
from pylint.message.message import Message
from pylint.reporters import BaseReporter
from pylint.testutils.constants import _EXPECTED_RE, _OPERATORS, UPDATE_OPTION

# need to import from functional.test_file to avoid cyclic import
Expand All @@ -31,6 +32,8 @@
from pylint.testutils.output_line import OutputLine
from pylint.testutils.reporter_for_tests import FunctionalTestReporter

if TYPE_CHECKING:
import _csv
MessageCounter = Counter[tuple[int, str]]

PYLINTRC = Path(__file__).parent / "testing_pylintrc"
Expand Down Expand Up @@ -303,10 +306,22 @@ def error_msg_for_unequal_output(
expected_csv = StringIO()
writer = csv.writer(expected_csv, dialect="test")
for line in sorted(received_lines, key=sort_by_line_number):
writer.writerow(line.to_csv())
self.safe_write_output_line(writer, line)
error_msg += expected_csv.getvalue()
return error_msg

def safe_write_output_line(self, writer: _csv._writer, line: OutputLine) -> None:
"""Write an OutputLine to the CSV writer, handling UnicodeEncodeError."""
try:
writer.writerow(line.to_csv())
except UnicodeEncodeError:
writer.writerow(
[
BaseReporter.reencode_output_after_unicode_error(s)
for s in line.to_csv()
]
)

def _check_output_text(
self,
_: MessageCounter,
Expand Down
3 changes: 3 additions & 0 deletions tests/functional/r/regression_02/regression_8736.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"""This does not crash in the functional tests, but it did when called directly."""

assert "\U00010000" == "\ud800\udc00" # [comparison-of-constants]
1 change: 1 addition & 0 deletions tests/functional/r/regression_02/regression_8736.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
comparison-of-constants:3:7:3:37::"Comparison between constants: '𐀀 == \\ud800\\udc00' has a constant value":HIGH
Loading