From 0ea890178f375f8c477200e157d5c1d3c131039e Mon Sep 17 00:00:00 2001 From: Parker Jones Date: Wed, 2 Jul 2025 15:57:28 -0400 Subject: [PATCH] Replace most %-strings with f-strings. Fix syntax error, move dict because copilot nitpick. Use sql parameterization. Some loops inserted where repeated logic was used. --- docs/app4.rst | 2 +- docs/document.rst | 6 +- docs/font.rst | 2 +- docs/functions.rst | 2 +- ...ipes-common-issues-and-their-solutions.rst | 4 +- docs/recipes-images.rst | 6 +- docs/recipes-journalling.rst | 4 +- docs/recipes-low-level-interfaces.rst | 4 +- docs/recipes-text.rst | 9 +- docs/samples/filmfestival-sql.py | 5 +- docs/samples/multiprocess-render.py | 6 +- docs/samples/new-annots.py | 5 +- docs/samples/text-lister.py | 14 +- docs/the-basics.rst | 2 +- docs/tutorial.rst | 2 +- src/__init__.py | 242 ++++++++---------- src/__main__.py | 78 +++--- src/utils.py | 155 +++++------ 18 files changed, 246 insertions(+), 302 deletions(-) diff --git a/docs/app4.rst b/docs/app4.rst index 26037fc98..15a89064c 100644 --- a/docs/app4.rst +++ b/docs/app4.rst @@ -374,7 +374,7 @@ These are the :title:`Python` commands for how each tool is used: doc=pymupdf.open(datei) for p in pymupdf.Pages(doc): pix = p.get_pixmap(dpi=150) - pix.save("t-%s.png" % p.number) + pix.save(f"t-{p.number}.png") pix = None doc.close() return diff --git a/docs/document.rst b/docs/document.rst index b8791a61c..76f566ffa 100644 --- a/docs/document.rst +++ b/docs/document.rst @@ -228,7 +228,7 @@ For details on **embedded files** refer to Appendix 3. >>> import pymupdf >>> with pymupdf.open(...) as doc: - for page in doc: print("page %i" % page.number) + for page in doc: print(f"page {page.number}") page 0 page 1 page 2 @@ -2254,10 +2254,10 @@ Other Examples xref = img[0] # xref number pix = pymupdf.Pixmap(doc, xref) # make pixmap from image if pix.n - pix.alpha < 4: # can be saved as PNG - pix.save("p%s-%s.png" % (i, xref)) + pix.save(f"p{i}-{xref}.png") else: # CMYK: must convert first pix0 = pymupdf.Pixmap(pymupdf.csRGB, pix) - pix0.save("p%s-%s.png" % (i, xref)) + pix0.save(f"p{i}-{xref}.png") pix0 = None # free Pixmap resources pix = None # free Pixmap resources diff --git a/docs/font.rst b/docs/font.rst index 875213c17..53580aea2 100644 --- a/docs/font.rst +++ b/docs/font.rst @@ -158,7 +158,7 @@ A ``Font`` object also contains useful general information, like the font bbox, >>> font = pymupdf.Font("math") >>> vuc = font.valid_codepoints() >>> for i in vuc: - print("%04X %s (%s)" % (i, chr(i), font.unicode_to_glyph_name(i))) + >>> print(f"{i:04X} {chr(i)} ({font.unicode_to_glyph_name(i)})") 0000 000D (CR) 0020 (space) diff --git a/docs/functions.rst b/docs/functions.rst index 6f467ba11..03f2d0b47 100644 --- a/docs/functions.rst +++ b/docs/functions.rst @@ -763,7 +763,7 @@ Yet others are handy, general-purpose utilities. try: return sum([widthlist[ord(c)] for c in text]) * fontsize except IndexError: - raise ValueError:("max. code point found: %i, increase limit" % ord(max(text))) + raise ValueError(f"max. code point found: {ord(max(text))}, increase limit") ----- diff --git a/docs/recipes-common-issues-and-their-solutions.rst b/docs/recipes-common-issues-and-their-solutions.rst index 8f8c1a590..5114f9655 100644 --- a/docs/recipes-common-issues-and-their-solutions.rst +++ b/docs/recipes-common-issues-and-their-solutions.rst @@ -86,7 +86,7 @@ It features maintaining any metadata, table of contents and links contained in t raise SystemExit("need PyMuPDF v1.14.0+") fn = sys.argv[1] - print("Converting '%s' to '%s.pdf'" % (fn, fn)) + print(f"Converting '{fn}' to '{fn}.pdf'") doc = pymupdf.open(fn) @@ -123,7 +123,7 @@ It features maintaining any metadata, table of contents and links contained in t pdf.save(fn + ".pdf", garbage=4, deflate=True) # say how many named links we skipped if link_cnti > 0: - print("Skipped %i named links of a total of %i in input." % (link_skip, link_cnti)) + print(f"Skipped {link_skip} named links of a total of {link_cnti} in input.") diff --git a/docs/recipes-images.rst b/docs/recipes-images.rst index 7eaa77a55..f5569bd69 100644 --- a/docs/recipes-images.rst +++ b/docs/recipes-images.rst @@ -24,7 +24,7 @@ The script works as a command line tool which expects the filename being supplie doc = pymupdf.open(fname) # open document for page in doc: # iterate through the pages pix = page.get_pixmap() # render page to an image - pix.save("page-%i.png" % page.number) # store image as a PNG + pix.save(f"page-{page.number}.png") # store image as a PNG The script directory will now contain PNG image files named *page-0.png*, *page-1.png*, etc. Pictures have the dimension of their pages with width and height rounded to integers, e.g. 595 x 842 pixels for an A4 portrait sized page. They will have a resolution of 96 dpi in x and y dimension and have no transparency. You can change all that -- for how to do this, read the next sections. @@ -525,8 +525,8 @@ This script creates an approximate image of it as a PNG, by going down to one-pi t1 = time.perf_counter() pm.save("sierpinski-punch.png") t2 = time.perf_counter() - print ("%g sec to create / fill the pixmap" % round(t1-t0,3)) - print ("%g sec to save the image" % round(t2-t1,3)) + print (f"{round(t1-t0,3)} sec to create / fill the pixmap") + print (f"{round(t2-t1,3)} sec to save the image") The result should look something like this: diff --git a/docs/recipes-journalling.rst b/docs/recipes-journalling.rst index 279df638d..c75b855f1 100644 --- a/docs/recipes-journalling.rst +++ b/docs/recipes-journalling.rst @@ -109,8 +109,8 @@ Description: >>> page=doc.new_page() >>> doc.journal_stop_op() >>> for i in range(5): - doc.journal_start_op("insert-%i" % i) - page.insert_text((100, 100 + 20*i), "text line %i" %i) + doc.journal_start_op(f"insert-{i}") + page.insert_text((100, 100 + 20*i), f"text line {i}") doc.journal_stop_op() >>> # combined status info: diff --git a/docs/recipes-low-level-interfaces.rst b/docs/recipes-low-level-interfaces.rst index 60d63df99..a33fbdd7e 100644 --- a/docs/recipes-low-level-interfaces.rst +++ b/docs/recipes-low-level-interfaces.rst @@ -23,7 +23,7 @@ The following script loops through the :data:`xref` table and prints each object >>> xreflen = doc.xref_length() # length of objects table >>> for xref in range(1, xreflen): # skip item 0! print("") - print("object %i (stream: %s)" % (xref, doc.xref_is_stream(xref))) + print(f"object {xref} (stream: {doc.xref_is_stream(xref)})") print(doc.xref_object(xref, compressed=False)) @@ -324,7 +324,7 @@ There also exist granular, elegant ways to access and manipulate selected PDF :d * Here is a full listing of the above page keys:: In [9]: for key in doc.xref_get_keys(page.xref): - ...: print("%s = %s" % (key, doc.xref_get_key(page.xref, key))) + ...: print(f"{key} = {doc.xref_get_key(page.xref, key)}") ...: Type = ('name', '/Page') Contents = ('xref', '1297 0 R') diff --git a/docs/recipes-text.rst b/docs/recipes-text.rst index 5bc1bcbc4..b11ccdc1b 100644 --- a/docs/recipes-text.rst +++ b/docs/recipes-text.rst @@ -93,12 +93,11 @@ This is the responsibility of the PDF creator (software or a human). For example doc = pymupdf.open("some.pdf") header = "Header" # text in header - footer = "Page %i of %i" # text in footer for page in doc: page.insert_text((50, 50), header) # insert header page.insert_text( # insert footer 50 points above page bottom (50, page.rect.height - 50), - footer % (page.number + 1, doc.page_count), + f"Page {page.number + 1} of {doc.page_count}", # text in footer ) The text sequence extracted from a page modified in this way will look like this: @@ -166,7 +165,7 @@ But you also have other options:: text = sys.argv[2] # search string doc = pymupdf.open(fname) - print("underlining words containing '%s' in document '%s'" % (word, doc.name)) + print(f"underlining words containing '{word}' in document '{doc.name}'") new_doc = False # indicator if anything found at all @@ -174,7 +173,7 @@ But you also have other options:: found = mark_word(page, text) # mark the page's words if found: # if anything found ... new_doc = True - print("found '%s' %i times on page %i" % (text, found, page.number + 1)) + print(f"found '{text}' {found} times on page {page.number + 1}") if new_doc: doc.save("marked-" + doc.name) @@ -329,7 +328,7 @@ Output some text lines on a page:: fontsize = 11, # the default font size rotate = 0, # also available: 90, 180, 270 ) - print("%i lines printed on page %i." % (rc, page.number)) + print(f"{rc} lines printed on page {page.number}.") doc.save("text.pdf") diff --git a/docs/samples/filmfestival-sql.py b/docs/samples/filmfestival-sql.py index ec949471b..3a7dbffac 100644 --- a/docs/samples/filmfestival-sql.py +++ b/docs/samples/filmfestival-sql.py @@ -68,8 +68,7 @@ select_films = """SELECT title, director, year FROM films ORDER BY title""" # select stament for actors, a skeleton: sub-select by film title -select_casts = """SELECT name FROM actors WHERE film = "%s" ORDER BY name""" - +select_casts = """SELECT name FROM actors WHERE film = ? ORDER BY name""" # ------------------------------------------------------------------- # define the HTML Story and fill it with database data # ------------------------------------------------------------------- @@ -89,7 +88,7 @@ film.find(None, "id", "filmyear").add_text(str(year)) # put year # the actors reside in their own table - find the ones for this film title - cursor_casts.execute(select_casts % title) # execute cursor + cursor_casts.execute(select_casts, (title,)) # execute cursor casts = cursor_casts.fetchall() # read actors for the film # each actor name appears in its own tuple, so extract it from there film.find(None, "id", "cast").add_text("\n".join([c[0] for c in casts])) diff --git a/docs/samples/multiprocess-render.py b/docs/samples/multiprocess-render.py index 761ea351c..02a3c890b 100644 --- a/docs/samples/multiprocess-render.py +++ b/docs/samples/multiprocess-render.py @@ -58,7 +58,7 @@ def render_page(vector): pix = page.get_pixmap(alpha=False, matrix=mat) # store away the result somewhere ... # pix.save("p-%i.png" % i) - print("Processed page numbers %i through %i" % (seg_from, seg_to - 1)) + print(f"Processed page numbers {seg_from} through {seg_to - 1}") if __name__ == "__main__": @@ -69,10 +69,10 @@ def render_page(vector): # make vectors of arguments for the processes vectors = [(i, cpu, filename, mat) for i in range(cpu)] - print("Starting %i processes for '%s'." % (cpu, filename)) + print(f"Starting {cpu} processes for '{filename}'.") pool = Pool() # make pool of 'cpu_count()' processes pool.map(render_page, vectors, 1) # start processes passing each a vector t1 = mytime() # stop the timer - print("Total time %g seconds" % round(t1 - t0, 2)) + print(f"Total time {round(t1 - t0, 2):g} seconds") diff --git a/docs/samples/new-annots.py b/docs/samples/new-annots.py index e879a4bfa..bb661e3c7 100644 --- a/docs/samples/new-annots.py +++ b/docs/samples/new-annots.py @@ -45,8 +45,7 @@ def print_descr(annot): """Print a short description to the right of each annot rect.""" annot.parent.insert_text( - annot.rect.br + (10, -5), "%s annotation" % annot.type[1], color=red - ) + annot.rect.br + (10, -5), f"{annot.type[1]} annotation", color=red) doc = pymupdf.open() @@ -166,4 +165,4 @@ def print_descr(annot): annot = page.add_redact_annot(r) print_descr(annot) -doc.save(__file__.replace(".py", "-%i.pdf" % page.rotation), deflate=True) +doc.save(__file__.replace(".py", f"-{page.rotation:d}.pdf"), deflate=True) diff --git a/docs/samples/text-lister.py b/docs/samples/text-lister.py index cc5b651b3..774698595 100644 --- a/docs/samples/text-lister.py +++ b/docs/samples/text-lister.py @@ -32,11 +32,9 @@ def flags_decomposer(flags): for l in b["lines"]: # iterate through the text lines for s in l["spans"]: # iterate through the text spans print("") - font_properties = "Font: '%s' (%s), size %g, color #%06x" % ( - s["font"], # font name - flags_decomposer(s["flags"]), # readable font flags - s["size"], # font size - s["color"], # font color - ) - print("Text: '%s'" % s["text"]) # simple print of text - print(font_properties) + s_font = s['font'] + s_flags = flags_decomposer(s['flags']) + s_size = s['size'] + s_color = s['color'] + print(f"Text: '{s['text']}'") # simple print of text + print(f"Font: '{s_font}' ({s_flags}), size {s_size}, color #{s_color:06x}") diff --git a/docs/the-basics.rst b/docs/the-basics.rst index 9bab2728b..61a386995 100644 --- a/docs/the-basics.rst +++ b/docs/the-basics.rst @@ -109,7 +109,7 @@ To extract all the images from a |PDF| file, do the following: if pix.n - pix.alpha > 3: # CMYK: convert to RGB first pix = pymupdf.Pixmap(pymupdf.csRGB, pix) - pix.save("page_%s-image_%s.png" % (page_index, image_index)) # save the image as png + pix.save(f"page_{page_index}-image_{image_index}.png") # save the image as png pix = None diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 19ddb1968..9b2fa2d05 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -174,7 +174,7 @@ Saving the Page Image in a File ----------------------------------- We can simply store the image in a PNG file:: - pix.save("page-%i.png" % page.number) + pix.save(f"page-{page.number}.png") Displaying the Image in GUIs ------------------------------------------- diff --git a/src/__init__.py b/src/__init__.py index c749e6b8f..7a9736086 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -547,7 +547,7 @@ def __init__(self, annot): def __repr__(self): parent = getattr(self, 'parent', '<>') - return "'%s' annotation on %s" % (self.type[1], str(parent)) + return f"'{self.type[1]}' annotation on {parent}" def __str__(self): return self.__repr__() @@ -1349,7 +1349,7 @@ def set_colors(self, colors=None, stroke=None, fill=None): doc.xref_set_key(self.xref, "C", s) if fill and self.type[0] not in fill_annots: - message("Warning: fill color ignored for annot type '%s'." % self.type[1]) + message(f"Warning: fill color ignored for annot type '{self.type[1]}'.") return if fill in ([], ()): doc.xref_set_key(self.xref, "IC", "[]") @@ -2425,7 +2425,6 @@ def root( self): def set_align(self, align): """Set text alignment via CSS style""" - text = "text-align: %s" if isinstance( align, str): t = align elif align == TEXT_ALIGN_LEFT: @@ -2438,8 +2437,7 @@ def set_align(self, align): t = "justify" else: raise ValueError(f"Unrecognised {align=}") - text = text % t - self.add_style(text) + self.add_style(f"text-align: {t}") return self def set_attribute( self, key, value): @@ -2448,8 +2446,7 @@ def set_attribute( self, key, value): def set_bgcolor(self, color): """Set background color via CSS style""" - text = f"background-color: %s" % self.color_text(color) - self.add_style(text) # does not work on span level + self.add_style(f'background-color: {self.color_text(color)}') # does not work on span level return self def set_bold(self, val=True): @@ -2458,26 +2455,22 @@ def set_bold(self, val=True): val="bold" else: val="normal" - text = "font-weight: %s" % val - self.append_styled_span(text) + self.append_styled_span(f"font-weight: {val}") return self def set_color(self, color): """Set text color via CSS style""" - text = f"color: %s" % self.color_text(color) - self.append_styled_span(text) + self.append_styled_span(f"color: {self.color_text(color)}") return self def set_columns(self, cols): """Set number of text columns via CSS style""" - text = f"columns: {cols}" - self.append_styled_span(text) + self.append_styled_span(f"columns: {cols}") return self def set_font(self, font): """Set font-family name via CSS style""" - text = "font-family: %s" % font - self.append_styled_span(text) + self.append_styled_span(f"font-family: {font}") return self def set_fontsize(self, fontsize): @@ -2505,50 +2498,42 @@ def set_italic(self, val=True): val="italic" else: val="normal" - text = "font-style: %s" % val - self.append_styled_span(text) + self.append_styled_span(f"font-style: {val}") return self def set_leading(self, leading): """Set inter-line spacing value via CSS style - block-level only.""" - text = f"-mupdf-leading: {leading}" - self.add_style(text) + self.add_style(f"-mupdf-leading: {leading}") return self def set_letter_spacing(self, spacing): """Set inter-letter spacing value via CSS style""" - text = f"letter-spacing: {spacing}" - self.append_styled_span(text) + self.append_styled_span(f"letter-spacing: {spacing}") return self def set_lineheight(self, lineheight): """Set line height name via CSS style - block-level only.""" - text = f"line-height: {lineheight}" - self.add_style(text) + self.add_style(f"line-height: {lineheight}") return self def set_margins(self, val): """Set margin values via CSS style""" - text = "margins: %s" % val - self.append_styled_span(text) + self.append_styled_span(f"margins: {val}") return self def set_opacity(self, opacity): """Set opacity via CSS style""" - text = f"opacity: {opacity}" - self.append_styled_span(text) + self.append_styled_span(f"opacity: {opacity}") return self def set_pagebreak_after(self): """Insert a page break after this node.""" - text = "page-break-after: always" - self.add_style(text) + self.add_style("page-break-after: always") return self def set_pagebreak_before(self): """Insert a page break before this node.""" - text = "page-break-before: always" - self.add_style(text) + self.add_style("page-break-before: always") return self def set_properties( @@ -2629,19 +2614,16 @@ def set_properties( def set_text_indent(self, indent): """Set text indentation name via CSS style - block-level only.""" - text = f"text-indent: {indent}" - self.add_style(text) + self.add_style(f"text-indent: {indent}") return self def set_underline(self, val="underline"): - text = "text-decoration: %s" % val - self.append_styled_span(text) + self.append_styled_span(f"text-decoration: {val}") return self def set_word_spacing(self, spacing): """Set inter-word spacing value via CSS style""" - text = f"word-spacing: {spacing}" - self.append_styled_span(text) + self.append_styled_span(f"word-spacing: {spacing}") return self def span_bottom(self): @@ -2699,7 +2681,7 @@ def __init__(self, type_): def __repr__(self): x = ("", "GRAY", "", "RGB", "CMYK")[self.n] - return "Colorspace(CS_%s) - %s" % (x, self.name) + return f"Colorspace(CS_{x}) - {self.name}" def _name(self): return mupdf.fz_colorspace_name(self.this) @@ -3019,12 +3001,12 @@ def __len__(self) -> int: return self.page_count def __repr__(self) -> str: - m = "closed " if self.is_closed else "" + is_closed = "closed " if self.is_closed else "" if self.stream is None: if self.name == "": - return m + "Document()" % self._graft_id - return m + "Document('%s')" % (self.name,) - return m + "Document('%s', )" % (self.name, self._graft_id) + return f"{is_closed}Document()" + return f"{is_closed}Document('{self.name}')" + return f"{is_closed}Document('{self.name}', )" def _addFormFont(self, name, font): """Add new form font.""" @@ -3108,13 +3090,12 @@ def _embeddedFileGet(self, idx): def _embeddedFileIndex(self, item: typing.Union[int, str]) -> int: filenames = self.embfile_names() - msg = "'%s' not in EmbeddedFiles array." % str(item) if item in filenames: idx = filenames.index(item) elif item in range(len(filenames)): idx = item else: - raise ValueError(msg) + raise ValueError(f"'{item}' not in EmbeddedFiles array.") return idx def _embfile_add(self, name, buffer_, filename=None, ufilename=None, desc=None): @@ -3355,7 +3336,7 @@ def _get_char_widths(self, xref: int, bfname: str, ext: str, ordering: int, limi else: buf = JM_get_fontbuffer(pdf, xref) if not buf.m_internal: - raise Exception("font at xref %d is not supported" % xref) + raise Exception(f"font at xref {xref:d} is not supported") font = mupdf.fz_new_font_from_buffer(None, buf, idx, 0) wlist = [] @@ -3605,7 +3586,7 @@ def _set_page_labels(self, labels): xref = self.pdf_catalog() text = self.xref_object(xref, compressed=True) - text = text.replace("/Nums[]", "/Nums[%s]" % labels) + text = text.replace("/Nums[]", f"/Nums[{labels}]") self.update_object(xref, text) def _update_toc_item(self, xref, action=None, title=None, flags=0, collapse=None, color=None): @@ -3953,10 +3934,8 @@ def embfile_add(self, desc: (str) the description. """ filenames = self.embfile_names() - msg = "Name '%s' already exists." % str(name) if name in filenames: - raise ValueError(msg) - + raise ValueError(f"Name '{name}' already exists.") if filename is None: filename = name if ufilename is None: @@ -4582,7 +4561,7 @@ def insert_pdf( outname = os.path.basename(self.name) if not outname: outname = "memory PDF" - message("Inserting '%s' at '%s'" % (inname, outname)) + message(f"Inserting '{inname}' at '{outname}'") # retrieve / make a Graftmap to avoid duplicate objects #log( 'insert_pdf(): Graftmaps') @@ -5708,31 +5687,31 @@ def set_layer(self, config, basestate=None, on=None, off=None, rbgroups=None, lo raise ValueError("bad type: 'on'") s = set(on).difference(ocgs) if s != set(): - raise ValueError("bad OCGs in 'on': %s" % s) + raise ValueError(f"bad OCGs in 'on': {s}") if off: if type(off) not in (list, tuple): raise ValueError("bad type: 'off'") s = set(off).difference(ocgs) if s != set(): - raise ValueError("bad OCGs in 'off': %s" % s) + raise ValueError(f"bad OCGs in 'off': {s}") if locked: if type(locked) not in (list, tuple): raise ValueError("bad type: 'locked'") s = set(locked).difference(ocgs) if s != set(): - raise ValueError("bad OCGs in 'locked': %s" % s) + raise ValueError(f"bad OCGs in 'locked': {s}") if rbgroups: if type(rbgroups) not in (list, tuple): raise ValueError("bad type: 'rbgroups'") for x in rbgroups: if not type(x) in (list, tuple): - raise ValueError("bad RBGroup '%s'" % x) + raise ValueError(f"bad RBGroup '{x}'") s = set(x).difference(ocgs) if s != set(): - raise ValueError("bad OCGs in RBGroup: %s" % s) + raise ValueError(f"bad OCGs in RBGroup: {s}") if basestate: basestate = str(basestate).upper() @@ -5985,14 +5964,14 @@ def xref_get_key(self, xref, key): text = None if mupdf.pdf_is_indirect(subobj): type = "xref" - text = "%i 0 R" % mupdf.pdf_to_num(subobj) + text = f"{mupdf.pdf_to_num(subobj):d} 0 R" elif mupdf.pdf_is_array(subobj): type = "array" elif mupdf.pdf_is_dict(subobj): type = "dict" elif mupdf.pdf_is_int(subobj): type = "int" - text = "%i" % mupdf.pdf_to_int(subobj) + text = f"{mupdf.pdf_to_int(subobj):d}" elif mupdf.pdf_is_real(subobj): type = "float" elif mupdf.pdf_is_null(subobj): @@ -6006,7 +5985,7 @@ def xref_get_key(self, xref, key): text = "false" elif mupdf.pdf_is_name(subobj): type = "name" - text = "/%s" % mupdf.pdf_to_name(subobj) + text = f"/{mupdf.pdf_to_name(subobj)}" elif mupdf.pdf_is_string(subobj): type = "string" text = JM_UnicodeFromStr(mupdf.pdf_to_text_string(subobj)) @@ -6291,7 +6270,7 @@ def __init__( self.this = font def __repr__(self): - return "Font('%s')" % self.name + return f"Font('{self.name}')" @property def ascender(self): @@ -7099,10 +7078,7 @@ def __init__(self): self.xref = 0 # annot value def __repr__(self): - #return "'%s' widget on %s" % (self.field_type_string, str(self.parent)) - # No self.parent. return f'Widget:(field_type={self.field_type_string} script={self.script})' - return "'%s' widget" % (self.field_type_string) def _adjust_font(self): """Ensure text_font is from our list and correctly spelled. @@ -7563,13 +7539,6 @@ def __init__(self, page, document): def __repr__(self): return self.__str__() - CheckParent(self) - x = self.parent.name - if self.parent.stream is not None: - x = "" % (self.parent._graft_id,) - if x == "": - x = "" % self.parent._graft_id - return "page %s of %s" % (self.number, x) def __str__(self): #CheckParent(self) @@ -7578,14 +7547,16 @@ def __str__(self): number = self.this.m_internal.super.number else: number = self.this.m_internal.number - ret = f'page {number}' + if parent: x = self.parent.name if self.parent.stream is not None: - x = "" % (self.parent._graft_id,) + x = "memory" if x == "": - x = "" % self.parent._graft_id - ret += f' of {x}' + x = "new PDF" + ret = f'page {number} of <{x}, doc# {self.parent._graft_id:d}>' + else: + ret = f'page {number}' return ret def _add_caret_annot(self, point): @@ -7930,7 +7901,7 @@ def _addAnnot_FromString(self, linklist): txtpy = linklist[i] text = JM_StrAsChar(txtpy) if not text: - message("skipping bad link / annot item %i.", i) + message(f"skipping bad link / annot item {i:d}.") continue try: annot = mupdf.pdf_add_object( page.doc(), JM_pdf_obj_from_str( page.doc(), text)) @@ -7938,7 +7909,7 @@ def _addAnnot_FromString(self, linklist): mupdf.pdf_array_push( annots, ind_obj) except Exception: if g_exceptions_verbose: exception_info() - message("skipping bad link / annot item %i.\n" % i) + message(f"skipping bad link / annot item {i:d}.\n") def _addWidget(self, field_type, field_name): page = self._pdf_page() @@ -8010,10 +7981,10 @@ def _get_optional_content(self, oc: OptInt) -> OptStr: if oc in props.keys(): return props[oc] i = 0 - mc = "MC%i" % i + mc = f"MC{i:d}" while mc in props.values(): i += 1 - mc = "MC%i" % i + mc = f"MC{i:d}" self._set_resource_property(mc, oc) #log( 'returning {mc=}') return mc @@ -8285,7 +8256,7 @@ def _set_opacity(self, gstate=None, CA=1, ca=1, blendmode=None): tca = int(round(max(ca, 0) * 100)) if tca >= 100: tca = 99 - gstate = "fitzca%02i%02i" % (tCA, tca) + gstate = f"fitzca{tCA:02d}{tca:02d}" if not gstate: return @@ -9394,7 +9365,7 @@ def get_image_bbox(self, name, transform=0): elif imglist == []: raise ValueError('bad image name') else: - raise ValueError("found multiple images named '%s'." % name) + raise ValueError(f"found multiple images named '{name}'.") xref = item[-1] if xref != 0 or transform: try: @@ -9788,7 +9759,7 @@ def set_contents(self, xref): raise ValueError("bad xref") if not doc.xref_is_stream(xref): raise ValueError("xref is no stream") - doc.xref_set_key(self.xref, "Contents", "%i 0 R" % xref) + doc.xref_set_key(self.xref, "Contents", f"{xref:d} 0 R") def set_cropbox(self, rect): """Set the CropBox. Will also change Page.rect.""" @@ -10185,11 +10156,13 @@ def __len__(self): return self.size def __repr__(self): - if not type(self) is Pixmap: return + if not type(self) is Pixmap: + return + colorspace = "none" if self.colorspace: - return "Pixmap(%s, %s, %s)" % (self.colorspace.this.m_internal.name, self.irect, self.alpha) - else: - return "Pixmap(%s, %s, %s)" % ('None', self.irect, self.alpha) + colorspace = self.colorspace.this.m_internal.name + + return f"Pixmap({colorspace}, {self.irect}, {self.alpha})" def _tobytes(self, format_, jpg_quality): ''' @@ -10530,7 +10503,7 @@ def save(self, filename, output=None, jpg_quality=95): if idx is None: raise ValueError(f"Image format {output} not in {tuple(valid_formats.keys())}") if self.alpha and idx in (2, 6, 7): - raise ValueError("'%s' cannot have alpha" % output) + raise ValueError(f"'{output}' cannot have alpha") if self.colorspace and self.colorspace.n > 3 and idx in (1, 2, 4): raise ValueError(f"unsupported colorspace for '{output}'") if idx == 7: @@ -12425,7 +12398,7 @@ def append(self, pos, text, font=None, fontsize=11, language=None, right_to_left if mupdf_cppyy: import cppyy log( f'Unsupported font {cppyy.gbl.mupdf_font_name(font.this.m_internal)=}') - raise ValueError("Unsupported font '%s'." % font.name) + raise ValueError(f"Unsupported font '{font.name}'.") if right_to_left: text = self.clean_rtl(text) text = "".join(reversed(text)) @@ -12575,7 +12548,7 @@ def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix optcont = page._get_optional_content(oc) if optcont is not None: - bdc = "/OC /%s BDC" % optcont + bdc = f"/OC /{optcont} BDC" emc = "EMC" else: bdc = emc = "" @@ -12605,11 +12578,11 @@ def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix continue if line == "BT": new_cont_lines.append(line) - new_cont_lines.append("%i Tr" % render_mode) + new_cont_lines.append(f"{render_mode:d} Tr") continue if line.endswith(" gs"): alp = int(line.split()[0][4:]) + max_alp - line = "/Alp%i gs" % alp + line = f"/Alp{alp:d} gs" elif line.endswith(" Tf"): temp = line.split() fsize = float(temp[1]) @@ -12619,7 +12592,7 @@ def write_text(self, page, color=None, opacity=-1, overlay=1, morph=None, matrix w = 1 new_cont_lines.append(_format_g(w) + " w") font = int(temp[0][2:]) + max_font - line = " ".join(["/F%i" % font] + temp[1:]) + line = " ".join([f"/F{font:d}"] + temp[1:]) elif line.endswith(" rg"): new_cont_lines.append(line.replace("rg", "RG")) elif line.endswith(" g"): @@ -14140,9 +14113,9 @@ def make_escape(ch): elif 0xd800 <= ch <= 0xdfff: # orphaned surrogate return "\\ufffd" elif ch <= 0xffff: - return "\\u%04x" % ch + return f"\\u{ch:04x}" else: - return "\\U%08x" % ch + return f"\\U{ch:08x}" def JM_append_rune(buff, ch): @@ -15030,7 +15003,7 @@ def JM_get_annot_by_xref(page, xref): break annot = mupdf.pdf_next_annot( annot) if not found: - raise Exception("xref %d is not an annot of this page" % xref) + raise Exception(f"xref {xref:d} is not an annot of this page") return annot @@ -15054,7 +15027,7 @@ def JM_get_annot_by_name(page, name): break annot = mupdf.pdf_next_annot(annot) if not found: - raise Exception("'%s' is not an annot of this page" % name) + raise Exception(f"'{name}' is not an annot of this page") return annot @@ -17021,8 +16994,8 @@ def JM_set_object_value(obj, key, value): objstr = JM_EscapeStrFromBuffer(res) # replace 'eyecatcher' by desired 'value' - nullval = "/%s(%s)" % ( skey, eyecatcher) - newval = "/%s %s" % (skey, value) + nullval = f"/{skey}({eyecatcher})" + newval = f"/{skey} {value}" newstr = objstr.replace(nullval, newval, 1) # make PDF object from resulting string @@ -17708,18 +17681,18 @@ def getTJstr(text: str, glyphs: typing.Union[list, tuple, None], simple: bool, o if simple: # each char or its glyph is coded as a 2-byte hex if glyphs is None: # not Symbol, not ZapfDingbats: use char code - otxt = "".join(["%02x" % ord(c) if ord(c) < 256 else "b7" for c in text]) + otxt = "".join([f"{ord(c):02x}" if ord(c) < 256 else "b7" for c in text]) else: # Symbol or ZapfDingbats: use glyphs otxt = "".join( - ["%02x" % glyphs[ord(c)][0] if ord(c) < 256 else "b7" for c in text] + [f"{glyphs[ord(c)][0]:02x}" if ord(c) < 256 else "b7" for c in text] ) return "[<" + otxt + ">]" # non-simple fonts: each char or its glyph is coded as 4-byte hex if ordering < 0: # not a CJK font: use the glyphs - otxt = "".join(["%04x" % glyphs[ord(c)][0] for c in text]) + otxt = "".join([f"{glyphs[ord(c)][0]:04x}" for c in text]) else: # CJK: use the char codes - otxt = "".join(["%04x" % ord(c) for c in text]) + otxt = "".join([f"{ord(c):04x}" for c in text]) return "[<" + otxt + ">]" @@ -17757,7 +17730,7 @@ def make_utf16be(s): continue if oc > 127: # beyond ASCII - r += "\\%03o" % oc + r += f"\\{oc:03o}" continue # now the white spaces @@ -17955,7 +17928,7 @@ def get_text_length(text: str, fontname: str ="helv", fontsize: float =11, encod ): return len(text) * fontsize - raise ValueError("Font '%s' is unsupported" % fontname) + raise ValueError(f"Font '{fontname}' is unsupported") def image_profile(img: ByteString) -> dict: @@ -19047,12 +19020,10 @@ def ConversionHeader(i: str, filename: OptStr ="unknown"): """) - xml = textwrap.dedent(""" + xml = textwrap.dedent(f""" - - """ - % filename - ) + + """) xhtml = textwrap.dedent(""" @@ -19069,7 +19040,7 @@ def ConversionHeader(i: str, filename: OptStr ="unknown"): """) text = "" - json = '{"document": "%s", "pages": [\n' % filename + json = f'{{"document": "{filename}", "pages": [\n' if t == "html": r = html elif t == "json": @@ -19941,7 +19912,7 @@ def repair_mono_font(page: "Page", font: "Font") -> None: width = int(round((font.glyph_advance(32) * 1000))) for xref in xrefs: if not TOOLS.set_font_width(doc, xref, width): - log("Cannot set width for '%s' in xref %i" % (font.name, xref)) + log(f"Cannot set width for '{font.name}' in xref {xref:d}") def sRGB_to_pdf(srgb: int) -> tuple: @@ -20368,8 +20339,8 @@ def _le_butt(annot, p1, p2, lr, fill_color): M = R if lr else L top = (M + (0, -d/2.)) * im bot = (M + (0, d/2.)) * im - ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y) - ap += "%f %f l\n" % (bot.x, bot.y) + ap = f"\nq\n{opacity}{top.x:f} {top.y:f} m\n" + ap += f"{bot.x:f} {bot.y:f} l\n" ap += _format_g(w) + " w\n" ap += scol + "s\nQ\n" return ap @@ -20401,9 +20372,9 @@ def _le_closedarrow(annot, p1, p2, lr, fill_color): p1 *= im p2 *= im p3 *= im - ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) - ap += "%f %f l\n" % (p2.x, p2.y) - ap += "%f %f l\n" % (p3.x, p3.y) + ap = f"\nq\n{opacity}{p1.x:f} {p1.y:f} m\n" + ap += f"{p2.x:f} {p2.y:f} l\n" + ap += f"{p3.x:f} {p3.y:f} l\n" ap += _format_g(w) + " w\n" ap += scol + fcol + "b\nQ\n" return ap @@ -20419,13 +20390,13 @@ def _le_diamond(annot, p1, p2, lr, fill_color): r = Rect(M, M) + (-d, -d, d, d) # the square # the square makes line longer by (2*shift - 1)*width p = (r.tl + (r.bl - r.tl) * 0.5) * im - ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y) + ap = f"q\n{opacity}{p.x:f} {p.y:f} m\n" p = (r.tl + (r.tr - r.tl) * 0.5) * im - ap += "%f %f l\n" % (p.x, p.y) + ap += f"{p.x:f} {p.y:f} l\n" p = (r.tr + (r.br - r.tr) * 0.5) * im - ap += "%f %f l\n" % (p.x, p.y) + ap += f"{p.x:f} {p.y:f} l\n" p = (r.br + (r.bl - r.br) * 0.5) * im - ap += "%f %f l\n" % (p.x, p.y) + ap += f"{p.x:f} {p.y:f} l\n" ap += _format_g(w) + " w\n" ap += scol + fcol + "b\nQ\n" return ap @@ -20443,9 +20414,9 @@ def _le_openarrow(annot, p1, p2, lr, fill_color): p1 *= im p2 *= im p3 *= im - ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) - ap += "%f %f l\n" % (p2.x, p2.y) - ap += "%f %f l\n" % (p3.x, p3.y) + ap = f"\nq\n{opacity}{p1.x:f} {p1.y:f} m\n" + ap += f"{p2.x:f} {p2.y:f} l\n" + ap += f"{p3.x:f} {p3.y:f} l\n" ap += _format_g(w) + " w\n" ap += scol + "S\nQ\n" return ap @@ -20463,9 +20434,9 @@ def _le_rclosedarrow(annot, p1, p2, lr, fill_color): p1 *= im p2 *= im p3 *= im - ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) - ap += "%f %f l\n" % (p2.x, p2.y) - ap += "%f %f l\n" % (p3.x, p3.y) + ap = f"\nq\n{opacity}{p1.x:f} {p1.y:f} m\n" + ap += f"{p2.x:f} {p2.y:f} l\n" + ap += f"{p3.x:f} {p3.y:f} l\n" ap += _format_g(w) + " w\n" ap += scol + fcol + "b\nQ\n" return ap @@ -20483,9 +20454,9 @@ def _le_ropenarrow(annot, p1, p2, lr, fill_color): p1 *= im p2 *= im p3 *= im - ap = "\nq\n%s%f %f m\n" % (opacity, p1.x, p1.y) - ap += "%f %f l\n" % (p2.x, p2.y) - ap += "%f %f l\n" % (p3.x, p3.y) + ap = f"\nq\n{opacity}{p1.x:f} {p1.y:f} m\n" + ap += f"{p2.x:f} {p2.y:f} l\n" + ap += f"{p3.x:f} {p3.y:f} l\n" ap += _format_g(w) + " w\n" ap += scol + fcol + "S\nQ\n" return ap @@ -20500,8 +20471,8 @@ def _le_slash(annot, p1, p2, lr, fill_color): r = Rect(M.x - rw, M.y - 2 * w, M.x + rw, M.y + 2 * w) top = r.tl * im bot = r.br * im - ap = "\nq\n%s%f %f m\n" % (opacity, top.x, top.y) - ap += "%f %f l\n" % (bot.x, bot.y) + ap = f"\nq\n{opacity}{top.x:f} {top.y:f} m\n" + ap += f"{bot.x:f} {bot.y:f} l\n" ap += _format_g(w) + " w\n" ap += scol + "s\nQ\n" return ap @@ -20517,13 +20488,13 @@ def _le_square(annot, p1, p2, lr, fill_color): r = Rect(M, M) + (-d, -d, d, d) # the square # the square makes line longer by (2*shift - 1)*width p = r.tl * im - ap = "q\n%s%f %f m\n" % (opacity, p.x, p.y) + ap = f"q\n{opacity}{p.x:f} {p.y:f} m\n" p = r.tr * im - ap += "%f %f l\n" % (p.x, p.y) + ap += f"{p.x:f} {p.y:f} l\n" p = r.br * im - ap += "%f %f l\n" % (p.x, p.y) + ap += f"{p.x:f} {p.y:f} l\n" p = r.bl * im - ap += "%f %f l\n" % (p.x, p.y) + ap += f"{p.x:f} {p.y:f} l\n" ap += _format_g(w) + " w\n" ap += scol + fcol + "b\nQ\n" return ap @@ -20533,8 +20504,7 @@ def _oval_string(p1, p2, p3, p4): """Return /AP string defining an oval within a 4-polygon provided as points """ def bezier(p, q, r): - f = "%f %f %f %f %f %f c\n" - return f % (p.x, p.y, q.x, q.y, r.x, r.y) + return f"{p.x:f} {p.y:f} {q.x:f} {q.y:f} {r.x:f} {r.y:f} c\n" kappa = 0.55228474983 # magic number ml = p1 + (p4 - p1) * 0.5 # middle points ... @@ -20550,7 +20520,7 @@ def bezier(p, q, r): ul1 = mu + (p4 - mu) * kappa ul2 = ml + (p4 - ml) * kappa # now draw, starting from middle point of left side - ap = "%f %f m\n" % (ml.x, ml.y) + ap = f"{ml.x:f} {ml.y:f} m\n" ap += bezier(ol1, ol2, mo) ap += bezier(or1, or2, mr) ap += bezier(ur1, ur2, mu) diff --git a/src/__main__.py b/src/__main__.py index 35914d6c7..a35148992 100644 --- a/src/__main__.py +++ b/src/__main__.py @@ -15,7 +15,7 @@ from . import pymupdf def mycenter(x): - return (" %s " % x).center(75, "-") + return (f" {x} ").center(75, "-") def recoverpix(doc, item): @@ -41,7 +41,7 @@ def getimage(pix): - pix2 must consist of 1 byte per pixel """ if not (pix1.irect == pix2.irect and pix1.alpha == pix2.alpha == 0 and pix2.n == 1): - pymupdf.message("Warning: unsupported /SMask %i for %i:" % (s, x)) + pymupdf.message(f"Warning: unsupported /SMask {s} for {x}:") pymupdf.message(pix2) pix2 = None return getimage(pix1) # return the pixmap as is @@ -67,9 +67,12 @@ def open_file(filename, password, show=False, pdf=True): if not rc: sys.exit("authentication unsuccessful") if show is True: - pymupdf.message("authenticated as %s" % "owner" if rc > 2 else "user") + auth_level = "user" + if rc > 2: + auth_level = "owner" + pymupdf.message(f"authenticated as {auth_level}") else: - sys.exit("'%s' requires a password" % doc.name) + sys.exit(f"'{doc.name}' requires a password") return doc @@ -77,7 +80,7 @@ def print_dict(item): """Print a Python dictionary.""" l = max([len(k) for k in item.keys()]) + 1 for k, v in item.items(): - msg = "%s: %s" % (k.rjust(l), v) + msg = f"{k.rjust(l)}: {v}" pymupdf.message(msg) @@ -87,7 +90,7 @@ def print_xref(doc, xref): Simulate the PDF source in "pretty" format. For a stream also print its size. """ - pymupdf.message("%i 0 obj" % xref) + pymupdf.message(f"{xref:d} 0 obj") xref_str = doc.xref_object(xref) pymupdf.message(xref_str) if doc.xref_is_stream(xref): @@ -99,7 +102,7 @@ def print_xref(doc, xref): size = "unknown" except Exception: size = "unknown" - pymupdf.message("stream\n...%s bytes" % size) + pymupdf.message(f"stream\n...{size} bytes") pymupdf.message("endstream") pymupdf.message("endobj") @@ -127,17 +130,17 @@ def get_list(rlist, limit, what="page"): if 1 <= i < limit: out_list.append(int(item)) else: - sys.exit("bad %s specification at item %i" % (what, n)) + sys.exit(f"bad {what} specification at item {n:d}") continue try: # this must be a range now, and all of the following must work: i1, i2 = item.split("-") # will fail if not 2 items produced i1 = int(i1) # will fail on non-integers i2 = int(i2) except Exception: - sys.exit("bad %s range specification at item %i" % (what, n)) + sys.exit(f"bad {what} range specification at item {n:d}") if not (1 <= i1 < limit and 1 <= i2 < limit): - sys.exit("bad %s range specification at item %i" % (what, n)) + sys.exit(f"bad {what} range specification at item {n:d}") if i1 == i2: # just in case: a range of equal numbers out_list.append(i1) @@ -175,13 +178,13 @@ def show(args): n = doc.is_form_pdf if n > 0: s = doc.get_sigflags() - pymupdf.message( - "document contains %i root form fields and is %ssigned" - % (n, "not " if s != 3 else "") - ) + sign_str = "" + if s != 3: + sign_str = "not " + pymupdf.message(f"document contains {n} root form fields and is {sign_str}signed") n = doc.embfile_count() if n > 0: - pymupdf.message("document contains %i embedded files" % n) + pymupdf.message(f"document contains {n:d} embedded files") pymupdf.message() if args.catalog: pymupdf.message(mycenter("PDF catalog")) @@ -204,7 +207,7 @@ def show(args): for pno in pagel: n = pno - 1 xref = doc.page_xref(n) - pymupdf.message("Page %i:" % pno) + pymupdf.message(f"Page {pno:d}:") print_xref(doc, xref) pymupdf.message() if args.trailer: @@ -301,7 +304,7 @@ def embedded_copy(args): sys.exit("nothing to copy") intersect = names & set(doc.embfile_names()) # any equal name already in target? if intersect: - sys.exit("following names already exist in receiving PDF: %s" % str(intersect)) + sys.exit(f"following names already exist in receiving PDF: {str(intersect)}") for item in names: info = src.embfile_info(item) @@ -313,7 +316,7 @@ def embedded_copy(args): ufilename=info["ufilename"], desc=info["desc"], ) - pymupdf.message("copied entry '%s' from '%s'" % (item, src.name)) + pymupdf.message(f"copied entry '{item}' from '{src.name}'") src.close() if args.output and args.output != args.input: doc.save(args.output, garbage=3) @@ -352,7 +355,7 @@ def embedded_get(args): filename = args.output if args.output else d["filename"] with open(filename, "wb") as output: output.write(stream) - pymupdf.message("saved entry '%s' as '%s'" % (args.name, filename)) + pymupdf.message(f"saved entry '{args.name}' as '{filename}'") doc.close() @@ -366,12 +369,12 @@ def embedded_add(args): try: doc.embfile_del(args.name) - sys.exit("entry '%s' already exists" % args.name) + sys.exit(f"entry '{args.name}' already exists") except Exception: pass if not os.path.exists(args.path) or not os.path.isfile(args.path): - sys.exit("no such file '%s'" % args.path) + sys.exit(f"no such file '{args.path}'") with open(args.path, "rb") as f: stream = f.read() filename = args.path @@ -401,7 +404,7 @@ def embedded_upd(args): try: doc.embfile_info(args.name) except Exception: - sys.exit("no such embedded file '%s'" % args.name) + sys.exit(f"no such embedded file '{args.name}'") if ( args.path is not None @@ -446,24 +449,25 @@ def embedded_list(args): names = doc.embfile_names() if args.name is not None: if args.name not in names: - sys.exit("no such embedded file '%s'" % args.name) + sys.exit(f"no such embedded file '{args.name}'") else: pymupdf.message() - pymupdf.message( - "printing 1 of %i embedded file%s:" - % (len(names), "s" if len(names) > 1 else "") - ) + plural = "" + if len(names) > 1: + plural = "s" + pymupdf.message(f"printing 1 of {len(names):d} embedded file{plural}:") pymupdf.message() print_dict(doc.embfile_info(args.name)) pymupdf.message() return if not names: - pymupdf.message("'%s' contains no embedded files" % doc.name) + pymupdf.message(f"'{doc.name}' contains no embedded files") return + if len(names) > 1: - msg = "'%s' contains the following %i embedded files" % (doc.name, len(names)) + msg = f"'{doc.name}' contains the following {len(names):d} embedded files" else: - msg = "'%s' contains the following embedded file" % doc.name + msg = f"'{doc.name}' contains the following embedded file" pymupdf.message(msg) pymupdf.message() for name in names: @@ -492,7 +496,7 @@ def extract_objects(args): else: out_dir = args.output if not (os.path.exists(out_dir) and os.path.isdir(out_dir)): - sys.exit("output directory %s does not exist" % out_dir) + sys.exit(f"output directory {out_dir} does not exist") font_xrefs = set() # already saved fonts image_xrefs = set() # already saved images @@ -523,11 +527,11 @@ def extract_objects(args): if type(pix) is dict: ext = pix["ext"] imgdata = pix["image"] - outname = os.path.join(out_dir, "img-%i.%s" % (xref, ext)) + outname = os.path.join(out_dir, f"img-{xref:d}.{ext}") with open(outname, "wb") as outfile: outfile.write(imgdata) else: - outname = os.path.join(out_dir, "img-%i.png" % xref) + outname = os.path.join(out_dir, f"img-{xref:d}.png") pix2 = ( pix if pix.colorspace.n < 4 @@ -536,9 +540,9 @@ def extract_objects(args): pix2.save(outname) if args.fonts: - pymupdf.message("saved %i fonts to '%s'" % (len(font_xrefs), out_dir)) + pymupdf.message(f"saved {len(font_xrefs):d} fonts to '{out_dir}'") if args.images: - pymupdf.message("saved %i images to '%s'" % (len(image_xrefs), out_dir)) + pymupdf.message(f"saved {len(image_xrefs):d} images to '{out_dir}'") doc.close() @@ -584,7 +588,7 @@ def find_line_index(values: List[int], value: int) -> int: i = bisect.bisect_right(values, value) if i: return values[i - 1] - raise RuntimeError("Line for %g not found in %s" % (value, values)) + raise RuntimeError(f"Line for {value:g} not found in {values}") # -------------------------------------------------------------------- def curate_rows(rows: Set[int], GRID) -> List: @@ -690,7 +694,7 @@ def make_textline(left, slot, minslot, lchars): old_x1 = 0 # end coordinate of last char old_ox = 0 # x-origin of last char if minslot <= pymupdf.EPSILON: - raise RuntimeError("program error: minslot too small = %g" % minslot) + raise RuntimeError(f"program error: minslot too small = {minslot:g}") for c in lchars: # loop over characters char, ox, _, cwidth = c diff --git a/src/utils.py b/src/utils.py index e764a5cd1..a093ec649 100644 --- a/src/utils.py +++ b/src/utils.py @@ -341,7 +341,7 @@ def insert_image( raise ValueError("bad filename") if filename and not os.path.exists(filename): - raise FileNotFoundError("No such file: '%s'" % filename) + raise FileNotFoundError(f"No such file: '{filename}'") elif stream and type(stream) not in (bytes, bytearray, io.BytesIO): raise ValueError("stream must be bytes-like / BytesIO") elif pixmap and type(pixmap) is not pymupdf.Pixmap: @@ -1402,8 +1402,7 @@ def set_metadata(doc: pymupdf.Document, m: dict = None) -> None: valid_keys = set(keymap.keys()) diff_set = set(m.keys()).difference(valid_keys) if diff_set != set(): - msg = "bad dict key(s): %s" % diff_set - raise ValueError(msg) + raise ValueError(f"bad dict key(s): {diff_set}") t, temp = doc.xref_get_key(-1, "Info") if t != "xref": @@ -1417,7 +1416,7 @@ def set_metadata(doc: pymupdf.Document, m: dict = None) -> None: if info_xref == 0: # no prev metadata: get new xref info_xref = doc.get_new_xref() doc.update_object(info_xref, "<<>>") # fill it with empty object - doc.xref_set_key(-1, "Info", "%i 0 R" % info_xref) + doc.xref_set_key(-1, "Info", f"{info_xref:d} 0 R") elif m == {}: # remove existing metadata doc.xref_set_key(-1, "Info", "null") doc.init_doc() @@ -1528,13 +1527,13 @@ def set_toc( t1 = toc[i] t2 = toc[i + 1] if not -1 <= t1[2] <= page_count: - raise ValueError("row %i: page number out of range" % i) + raise ValueError(f"row {i:d}: page number out of range") if (type(t2) not in (list, tuple)) or len(t2) not in (3, 4): - raise ValueError("bad row %i" % (i + 1)) + raise ValueError(f"bad row {i + 1:d}") if (type(t2[0]) is not int) or t2[0] < 1: - raise ValueError("bad hierarchy level in row %i" % (i + 1)) + raise ValueError(f"bad hierarchy level in row {i + 1:d}") if t2[0] > t1[0] + 1: - raise ValueError("bad hierarchy level in row %i" % (i + 1)) + raise ValueError(f"bad hierarchy level in row {i + 1:d}") # no formal errors in toc -------------------------------------------------- # -------------------------------------------------------------------------- @@ -1620,49 +1619,25 @@ def set_toc( # ------------------------------------------------------------------------------ # now create each outline item as a string and insert it in the PDF # ------------------------------------------------------------------------------ + name_key_pairs = {"first": "First", "last": "Last", "next": "Next", "parent": "Parent", "prev": "Prev"} for i, ol in enumerate(olitems): txt = "<<" if ol["count"] != 0: - txt += "/Count %i" % ol["count"] + txt += f"/Count {ol['count']:d}" try: txt += ol["dest"] except Exception: # Verbose in PyMuPDF/tests. if g_exceptions_verbose >= 2: pymupdf.exception_info() pass - try: - if ol["first"] > -1: - txt += "/First %i 0 R" % xref[ol["first"]] - except Exception: - if g_exceptions_verbose >= 2: pymupdf.exception_info() - pass - try: - if ol["last"] > -1: - txt += "/Last %i 0 R" % xref[ol["last"]] - except Exception: - if g_exceptions_verbose >= 2: pymupdf.exception_info() - pass - try: - if ol["next"] > -1: - txt += "/Next %i 0 R" % xref[ol["next"]] - except Exception: - # Verbose in PyMuPDF/tests. - if g_exceptions_verbose >= 2: pymupdf.exception_info() - pass - try: - if ol["parent"] > -1: - txt += "/Parent %i 0 R" % xref[ol["parent"]] - except Exception: - # Verbose in PyMuPDF/tests. - if g_exceptions_verbose >= 2: pymupdf.exception_info() - pass - try: - if ol["prev"] > -1: - txt += "/Prev %i 0 R" % xref[ol["prev"]] - except Exception: - # Verbose in PyMuPDF/tests. - if g_exceptions_verbose >= 2: pymupdf.exception_info() - pass + for name, pdf_key in name_key_pairs.items(): + try: + if ol[name] > -1: + txt += f"/{pdf_key} {xref[ol[name]]:d} 0 R" + except Exception: + if g_exceptions_verbose >= 2: + pymupdf.exception_info() + pass try: txt += "/Title" + ol["title"] except Exception: @@ -1673,7 +1648,7 @@ def set_toc( if ol.get("color") and len(ol["color"]) == 3: txt += f"/C[ {_format_g(tuple(ol['color']))}]" if ol.get("flags", 0) > 0: - txt += "/F %i" % ol["flags"] + txt += f"/F {ol['flags']:d}" if i == 0: # special: this is the outline root txt += "/Type/Outlines" # so add the /Type entry @@ -2212,7 +2187,7 @@ def getLinkText(page: pymupdf.Page, lnk: dict) -> str: break i += 1 # add /NM key to object definition - annot = annot.replace("/Link", "/Link/NM(%s)" % name) + annot = annot.replace("/Link", f"/Link/NM({name})") return annot @@ -3677,7 +3652,7 @@ def insert_text( optcont = self.page._get_optional_content(oc) if optcont is not None: - bdc = "/OC /%s BDC\n" % optcont + bdc = f"/OC /{optcont} BDC\n" emc = "EMC\n" else: bdc = emc = "" @@ -3686,11 +3661,11 @@ def insert_text( if alpha is None: alpha = "" else: - alpha = "/%s gs\n" % alpha + alpha = f"/{alpha} gs\n" nres = templ1(bdc, alpha, cm, left, top, fname, fontsize) if render_mode > 0: - nres += "%i Tr " % render_mode + nres += f"{render_mode:d} Tr " nres += _format_g(border_width * fontsize) + " w " if miter_limit is not None: nres += _format_g(miter_limit) + " M " @@ -3717,7 +3692,7 @@ def insert_text( space -= lheight nlines += 1 - nres += "\nET\n%sQ\n" % emc + nres += f"\nET\n{emc}Q\n" # ========================================================================= # end of text insertion @@ -3785,7 +3760,7 @@ def insert_textbox( optcont = self.page._get_optional_content(oc) if optcont is not None: - bdc = "/OC /%s BDC\n" % optcont + bdc = f"/OC /{optcont} BDC\n" emc = "EMC\n" else: bdc = emc = "" @@ -3795,7 +3770,7 @@ def insert_textbox( if alpha is None: alpha = "" else: - alpha = "/%s gs\n" % alpha + alpha = f"/{alpha} gs\n" if rotate % 90 != 0: raise ValueError("rotate must be multiple of 90") @@ -3993,7 +3968,7 @@ def pixlen(x): more = abs(more) if more < pymupdf.EPSILON: more = 0 # don't bother with epsilons - nres = "\nq\n%s%sBT\n" % (bdc, alpha) + cm # initialize output buffer + nres = f"\nq\n{bdc}{alpha}BT\n{cm}" # initialize output buffer templ = lambda a, b, c, d: f"1 0 0 1 {_format_g((a, b))} Tm /{c} {_format_g(d)} Tf " # center, right, justify: output each line with its own specifics text_t = text.splitlines() # split text in lines again @@ -4033,7 +4008,7 @@ def pixlen(x): nres += templ(left, top, fname, fontsize) if render_mode > 0: - nres += "%i Tr " % render_mode + nres += f"{render_mode:d} Tr " nres += _format_g(border_width * fontsize) + " w " if miter_limit is not None: nres += _format_g(miter_limit) + " M " @@ -4045,9 +4020,9 @@ def pixlen(x): nres += color_str if fill is not None: nres += fill_str - nres += "%sTJ\n" % pymupdf.getTJstr(t, tj_glyphs, simple, ordering) + nres += f"{pymupdf.getTJstr(t, tj_glyphs, simple, ordering)}TJ\n" - nres += "ET\n%sQ\n" % emc + nres += f"ET\n{emc}Q\n" self.text_cont += nres self.updateRect(rect) @@ -4089,25 +4064,25 @@ def finish( optcont = self.page._get_optional_content(oc) if optcont is not None: - self.draw_cont = "/OC /%s BDC\n" % optcont + self.draw_cont + self.draw_cont = f"/OC /{optcont} BDC\n" + self.draw_cont emc = "EMC\n" else: emc = "" alpha = self.page._set_opacity(CA=stroke_opacity, ca=fill_opacity) if alpha is not None: - self.draw_cont = "/%s gs\n" % alpha + self.draw_cont + self.draw_cont = f"/{alpha} gs\n" + self.draw_cont if width != 1 and width != 0: self.draw_cont += _format_g(width) + " w\n" if lineCap != 0: - self.draw_cont = "%i J\n" % lineCap + self.draw_cont + self.draw_cont = f"{lineCap:d} J\n" + self.draw_cont if lineJoin != 0: - self.draw_cont = "%i j\n" % lineJoin + self.draw_cont + self.draw_cont = f"{lineJoin:d} j\n" + self.draw_cont if dashes not in (None, "", "[] 0"): - self.draw_cont = "%s d\n" % dashes + self.draw_cont + self.draw_cont = f"{dashes} d\n" + self.draw_cont if closePath: self.draw_cont += "h\n" @@ -4414,8 +4389,7 @@ def remove_hidden(cont_lines): xref_limit = doc.xref_length() for xref in range(1, xref_limit): if not doc.xref_object(xref): - msg = "bad xref %i - clean PDF before scrubbing" % xref - raise ValueError(msg) + raise ValueError(f"bad xref {xref:d} - clean PDF before scrubbing") if javascript and doc.xref_get_key(xref, "S")[1] == "/JavaScript": # a /JavaScript action object obj = "<>" # replace with a null JavaScript @@ -4639,11 +4613,11 @@ def output_justify(start, line): # ------------------------------------------------------------------------- nlines = len(new_lines) if nlines > max_lines: - msg = "Only fitting %i of %i lines." % (max_lines, nlines) + msg = f"Only fitting {max_lines:d} of {nlines:d} lines." if warn is None: pass elif warn: - pymupdf.message("Warning: " + msg) + pymupdf.message(f"Warning: {msg}") else: raise ValueError(msg) @@ -4691,7 +4665,7 @@ def get_oc(doc: pymupdf.Document, xref: int) -> int: raise ValueError("document close or encrypted") t, name = doc.xref_get_key(xref, "Subtype") if t != "name" or name not in ("/Image", "/Form"): - raise ValueError("bad object type at xref %i" % xref) + raise ValueError(f"bad object type at xref {xref}") t, oc = doc.xref_get_key(xref, "OC") if t != "xref": return 0 @@ -4710,15 +4684,15 @@ def set_oc(doc: pymupdf.Document, xref: int, oc: int) -> None: raise ValueError("document close or encrypted") t, name = doc.xref_get_key(xref, "Subtype") if t != "name" or name not in ("/Image", "/Form"): - raise ValueError("bad object type at xref %i" % xref) + raise ValueError(f"bad object type at xref {xref}") if oc > 0: t, name = doc.xref_get_key(oc, "Type") if t != "name" or name not in ("/OCG", "/OCMD"): - raise ValueError("bad object type at xref %i" % oc) + raise ValueError(f"bad object type at xref {oc:d}") if oc == 0 and "OC" in doc.xref_get_keys(xref): doc.xref_set_key(xref, "OC", "null") return None - doc.xref_set_key(xref, "OC", "%i 0 R" % oc) + doc.xref_set_key(xref, "OC", f"{oc:d} 0 R") return None @@ -4745,19 +4719,19 @@ def set_ocmd( def ve_maker(ve): if type(ve) not in (list, tuple) or len(ve) < 2: - raise ValueError("bad 've' format: %s" % ve) + raise ValueError(f"bad 've' format: {ve}") if ve[0].lower() not in ("and", "or", "not"): - raise ValueError("bad operand: %s" % ve[0]) + raise ValueError(f"bad operand: {ve[0]}") if ve[0].lower() == "not" and len(ve) != 2: - raise ValueError("bad 've' format: %s" % ve) - item = "[/%s" % ve[0].title() + raise ValueError(f"bad 've' format: {ve}") + item = f"[/{ve[0].title()}" for x in ve[1:]: if type(x) is int: if x not in all_ocgs: - raise ValueError("bad OCG %i" % x) - item += " %i 0 R" % x + raise ValueError(f"bad OCG {x}") + item += f" {x} 0 R" else: - item += " %s" % ve_maker(x) + item += f" {ve_maker(x)}" item += "]" return item @@ -4766,9 +4740,9 @@ def ve_maker(ve): if ocgs and type(ocgs) in (list, tuple): # some OCGs are provided s = set(ocgs).difference(all_ocgs) # contains illegal xrefs if s != set(): - msg = "bad OCGs: %s" % s + msg = f"bad OCGs: {s}" raise ValueError(msg) - text += "/OCGs[" + " ".join(map(lambda x: "%i 0 R" % x, ocgs)) + "]" + text += f"/OCGs[{' '.join([f'{x:d} 0 R' for x in ocgs])}]" if policy: policy = str(policy).lower() @@ -4779,11 +4753,11 @@ def ve_maker(ve): "alloff": "AllOff", } if policy not in ("anyon", "allon", "anyoff", "alloff"): - raise ValueError("bad policy: %s" % policy) - text += "/P/%s" % pols[policy] + raise ValueError(f"bad policy: {policy}") + text += f"/P/{pols[policy]}" if ve: - text += "/VE%s" % ve_maker(ve) + text += f"/VE{ve_maker(ve)}" text += ">>" @@ -5081,15 +5055,16 @@ def create_label_str(label): Returns: PDF label rule string wrapped in "<<", ">>". """ - s = "%i<<" % label["startpage"] - if label.get("prefix", "") != "": - s += "/P(%s)" % label["prefix"] - if label.get("style", "") != "": - s += "/S/%s" % label["style"] - if label.get("firstpagenum", 1) > 1: - s += "/St %i" % label["firstpagenum"] - s += ">>" - return s + pref_part = "" + if (val := label.get("prefix", "")) != "": + pref_part = f"/P({val})" + styl_part = "" + if (val := label.get("style", "")) != "": + styl_part = f"/S/{val}" + page_part = "" + if (val := label.get("firstpagenum", 1)) > 1: + page_part = f"/St {val}" + return f"{label['startpage']}<<{pref_part}{styl_part}{page_part}>>" def create_nums(labels): """Return concatenated string of all labels rules. @@ -5466,13 +5441,13 @@ def build_subset(buffer, unc_set, gid_set): gid_set.add(189) unc_list = list(gid_set) for unc in unc_list: - unc_file.write("%i\n" % unc) + unc_file.write(f"{unc:d}\n") else: args.append(f"--unicodes-file={uncfile_path}") unc_set.add(255) unc_list = list(unc_set) for unc in unc_list: - unc_file.write("%04x\n" % unc) + unc_file.write(f"{unc:04x}\n") # store fontbuffer as a file with open(oldfont_path, "wb") as fontfile: