File tree Expand file tree Collapse file tree 2 files changed +38
-0
lines changed Expand file tree Collapse file tree 2 files changed +38
-0
lines changed Original file line number Diff line number Diff line change
1
+ """
2
+ Test for issue #4503 in pymupdf:
3
+ Correct recognition of strikeout and underline styles in text spans.
4
+ """
5
+
6
+ import os
7
+ import pymupdf
8
+ from pymupdf import mupdf
9
+
10
+ STRIKEOUT = mupdf .FZ_STEXT_STRIKEOUT
11
+ UNDERLINE = mupdf .FZ_STEXT_UNDERLINE
12
+
13
+
14
+ def test_4503 ():
15
+ """
16
+ Check that the text span with the specified text has the correct styling:
17
+ strikeout, but no underline.
18
+ Previously, the text was broken in multiple spans with span breaks at
19
+ every space. and some parts were not detected as strikeout at all.
20
+ """
21
+ scriptdir = os .path .dirname (os .path .abspath (__file__ ))
22
+ text = "the right to request the state to review and, if appropriate,"
23
+ filename = os .path .join (scriptdir , "resources" , "test-4503.pdf" )
24
+ doc = pymupdf .open (filename )
25
+ page = doc [0 ]
26
+ flags = pymupdf .TEXT_ACCURATE_BBOXES | pymupdf .TEXT_COLLECT_STYLES
27
+ spans = [
28
+ s
29
+ for b in page .get_text ("dict" , flags = flags )["blocks" ]
30
+ for l in b ["lines" ]
31
+ for s in l ["spans" ]
32
+ if s ["text" ] == text
33
+ ]
34
+ assert spans , "No spans found with the specified text"
35
+ span = spans [0 ]
36
+
37
+ assert span ["char_flags" ] & STRIKEOUT
38
+ assert not span ["char_flags" ] & UNDERLINE
You can’t perform that action at this time.
0 commit comments