Skip to content

Commit dfd8b64

Browse files
committed
Test 4503
Adding test script and test file.
1 parent 0293d50 commit dfd8b64

File tree

2 files changed

+38
-0
lines changed

2 files changed

+38
-0
lines changed

tests/resources/test-4503.pdf

49.5 KB
Binary file not shown.

tests/test_4503.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
"""
2+
Test for issue #4503 in pymupdf:
3+
Correct recognition of strikeout and underline styles in text spans.
4+
"""
5+
6+
import os
7+
import pymupdf
8+
from pymupdf import mupdf
9+
10+
STRIKEOUT = mupdf.FZ_STEXT_STRIKEOUT
11+
UNDERLINE = mupdf.FZ_STEXT_UNDERLINE
12+
13+
14+
def test_4503():
15+
"""
16+
Check that the text span with the specified text has the correct styling:
17+
strikeout, but no underline.
18+
Previously, the text was broken in multiple spans with span breaks at
19+
every space. and some parts were not detected as strikeout at all.
20+
"""
21+
scriptdir = os.path.dirname(os.path.abspath(__file__))
22+
text = "the right to request the state to review and, if appropriate,"
23+
filename = os.path.join(scriptdir, "resources", "test-4503.pdf")
24+
doc = pymupdf.open(filename)
25+
page = doc[0]
26+
flags = pymupdf.TEXT_ACCURATE_BBOXES | pymupdf.TEXT_COLLECT_STYLES
27+
spans = [
28+
s
29+
for b in page.get_text("dict", flags=flags)["blocks"]
30+
for l in b["lines"]
31+
for s in l["spans"]
32+
if s["text"] == text
33+
]
34+
assert spans, "No spans found with the specified text"
35+
span = spans[0]
36+
37+
assert span["char_flags"] & STRIKEOUT
38+
assert not span["char_flags"] & UNDERLINE

0 commit comments

Comments
 (0)