[英]How to sort bookmarks in PyPDF2
我的问题类似于使用 PyPdf2 更改 pdf 书签的顺序,除了我需要对目标 PDF 中的书签进行排序。
以下代码“有效”,因为它创建了一个带有排序书签的新 PDF,但它们的目的地不可点击,因为当我在 Adobe Acrobat Reader 中查看它们的属性时,它们的操作为空。
import PyPDF2
from PyPDF2 import PdfReader, PdfWriter
reader = PdfReader("/Users/ME/Documents/in.pdf")
writer = PdfWriter()
outlines = reader.outlines
result = {}
for o in outlines:
if isinstance(o, PyPDF2.generic.Destination): # no sub-bookmarks
result.update({o['/Title']: o})
sorted(result.items(), key=lambda item: item[0])
for pageNum in range(reader.numPages):
writer.addPage(reader.getPage(pageNum))
newPath = '/Users/ME/Documents/out.pdf'
resultPdf = open(newPath, 'wb')
for k,v in result.items():
writer.add_bookmark_dict(v)
writer.write(resultPdf)
resultPdf.close()
如何调整上面的代码以使书签可点击?
我自己通过两种方式解决了这个问题:
两者都具有生成的 PDF 保留原始的缩放设置的优点,以及生成的 PDF 删除了任何重复命名的书签的缺点。 要安装它们,只需将代码复制并粘贴到计算机上的 freesort.py 中,然后打开命令行(即 shell)并运行python3 freesort.py
或执行chmod +x freesort.py
(制作文件可执行文件)然后./freesort.py...
派克pdf:
#!/usr/bin/env python3
"""
freesort.py 2022-07-08 Sean W
Purpose: sort top-level bookmarks only (i.e., leave children alone)
Usage: freesort.py /input/path/foo.pdf /output/path/foo.pdf")
Prereqs: pip3 install pikepdf
"""
from pikepdf import Pdf, OutlineItem
from re import compile, split
import sys
try:
input_file = sys.argv[1]
output_file = sys.argv[2]
except Exception as e:
print(f"Error: {e}. Please check your paths.\nUsage: freesort.py /input/path/foo.pdf /output/path/foo.pdf")
sys.exit(1)
pdf = Pdf.open(input_file, allow_overwriting_input=True)
bookmarks_unsorted = {}
bookmarks = {}
with pdf.open_outline() as outline:
# extract
for o in outline.root:
bookmarks_unsorted.update({o.title: o})
del outline.root[:]
# sort (first parent only) - thanks to https://stackoverflow.com/a/37036428/1231693
dre = compile(r'(\d+)')
bookmarks = dict(sorted(bookmarks_unsorted.items(),
key=lambda l: [int(s) if s.isdigit() else s.lower() for s in split(dre, l[0])]))
# create
for key, val in bookmarks.items():
outline.root.append(val)
pdf.save(output_file)
PyPDF2
#!/usr/bin/env python3
"""
freesort.py 2022-06-21 Sean W
Purpose: sort top-level bookmarks only (i.e., leave children alone)
Usage: freesort.py /input/path/foo.pdf /output/path/foo.pdf")
Prereqs: pip3 install PyPDF2
"""
import PyPDF2
from PyPDF2 import PdfReader, PdfWriter
import sys
try:
input_file = sys.argv[1]
output_file = sys.argv[2]
except Exception as e:
print(f"Error: {e}. Please check your paths.\nUsage: freesort.py /input/path/foo.pdf /output/path/foo.pdf")
sys.exit(1)
reader = PdfReader(input_file)
writer = PdfWriter()
parents_before = {} # before sorting
parents_after = {} # after sorting
outlines = reader.getOutlines()
for o in outlines:
if isinstance(o, PyPDF2.generic.Destination): # no sub-bookmarks
parents_before.update({o['/Title']: outlines.index(o)})
parents_before = dict(sorted(parents_before.items()))
# copy content (this includes annotations)
for pageNum in range(reader.numPages):
writer.addPage(reader.getPage(pageNum))
def add_item(outline_item, parent = None):
fit = outline_item['/Type']
if fit == '/XYZ':
zoom = [outline_item['/Left'], outline_item['/Top'], outline_item['/Zoom']]
else:
zoom = [outline_item['/Top']]
ref = writer.add_bookmark(str(outline_item["/Title"]),
reader.getDestinationPageNumber(outline_item), # page num
parent, # parent
(0, 0, 0), # color
True, # bold
False, # italic
fit,
*zoom)
return ref
# create parents first
for k, v in parents_before.items():
parents_after[v] = add_item(outlines[v])
# now children
for o in outlines:
if isinstance(o, list): # children only
i = outlines.index(o)
for l in o: # each child
add_item(l, parents_after[i - 1])
# save
result_pdf = open(output_file, 'wb')
writer.write(result_pdf)
result_pdf.close()
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.