From 2ad0392c93dd7e51f01b57938a2e1594e6d823d4 Mon Sep 17 00:00:00 2001 From: KD Bauer Date: Sun, 24 Aug 2025 13:27:41 +0200 Subject: [PATCH] Fix parser_header failing for unicode OFX-2.x files without linebreaks. --- ofxtools/header.py | 3 ++- tests/test_header.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/ofxtools/header.py b/ofxtools/header.py index 166713f..38a7127 100644 --- a/ofxtools/header.py +++ b/ofxtools/header.py @@ -273,7 +273,8 @@ def parse_header(source: BinaryIO) -> Tuple[OFXHeaderType, str]: # OFX header is read by nice clean machines, not meatbags - # should not contain 💩, 漢字, or what have you. - line = source.readline().decode("ascii") + # However, the first line may contain the XML body, which CAN contain non-ascii. + line = source.readline().decode("ascii", "replace") if line.strip(): found_header = True break diff --git a/tests/test_header.py b/tests/test_header.py index a1dfce6..3de6f1a 100644 --- a/tests/test_header.py +++ b/tests/test_header.py @@ -474,6 +474,16 @@ def testParseHeaderV2NoNewlineBetweenHeaderAndBody(self): self.assertIsNone(root.text) self.assertEqual(len(root), 1) + def testParseHeaderV2NoNewlineBetweenHeaderAndBodyWithUnicode(self): + """OFXv2 may contain non-ascii characters in the first line, + if the header is not separated from the body by newlines.""" + ofxtools.header.parse_header(BytesIO( + b'' + b'' + b'Dummy unicode data: A:\xc3\x83\xe2\x80\x9e\xc3\x83\xc2\xa4, ' + b'O:\xc3\x83\xe2\x80\x93\xc3\x83\xc2\xb6, U:\xc3\x83\xc5\x93\xc3\x83\xc2\xbc, and SZ:\xc3\x83\xc5\xb8' + )) + def testParseInvalid(self): header = str(self.headerClass(self.defaultVersion)) with self.assertRaises(ofxtools.header.OFXHeaderError):