![](/img/trans.png)
[英]How to extract images from a pdf using the poppler library in Python?
[英]How to extract images from a PDF in pure Python?
import minecart
import os
from NumberOfPages import getPageNumber
def extractImages(filename):
# making new directory if it doesn't exist
new_dir_name = filename[:-4]
if not os.path.exists(new_dir_name):
os.makedirs(new_dir_name + '/images')
os.makedirs(new_dir_name + '/text')
# open the target file
pdf_file = open(filename, 'rb')
# parse the document through the minecart. Document function
doc = minecart.Document(pdf_file)
# getting the number of pages in the pdf file.
num_pages = getPageNumber(filename)
# getting the list of all the pages
page = doc.get_page(num_pages)
count = 0
for page in doc.iter_pages():
for i in range(len(page.images)):
try:
im = page.images[i].as_pil() # requires pillow
name = new_dir_name + '/images/image_' + str(count) + '.jpg'
count = count + 1
im.save(name)
except:
print('Error encountered at %s' % filename)
doc_name = new_dir_name + '/images/info.txt'
with open(doc_name, 'a') as x:
print( x.write('Number of images in document: {}'.format(count)))
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.