Good day. Is there any way to extract image from pdf file using jspdf library or other npm libraries. Maybe there are some solutions for react?
I will be grateful for help.
I got all images from pdf file using 'pdf-lib' and 'stream-mime-type' libraries. Here is the function itself, I hope it will be useful.
const {PDFDocument} = require('pdf-lib');
const fs = require('fs')
const {getMimeType} = require('stream-mime-type')
const createJpgFile = require('./utils/createJpgFile.js')
const path = require("path");
async function getImageFromPdf() {
const existingPdfBytes = await new Promise((resolve, reject) => {
fs.readFile(path.join(rootPath, 'test.pdf'), (err, result) => {
if (err) {
reject(err)
}
if (!err) {
resolve(result)
}
})
})
const pdfDoc = await PDFDocument.load(existingPdfBytes)
const pages = pdfDoc.getPages()
const result = []
pages[0].doc.context.indirectObjects.forEach(el => {
if (el.hasOwnProperty('contents')) result.push(el.contents)
})
const mime = await Promise.all(result.map(async (el) => {
return new Promise(async (resolve) => {
const res = await getMimeType(el)
if (res) {
resolve(res)
}
})
}));
await Promise.all(mime.map(async (el, i) => {
if (el.mime === 'image/jpeg') {
return new Promise(async (resolve) => {
const res = await writeJpgFile(result[i], `image-${i}`,
'jpg')
resolve(res)
})
}
})
)
}
add: I also wrote a function that parses jpg images from a pdf file without using third-party libraries. The function finds the beginning of jpg images by signature. For other formats, you need to use other signatures.
const parserJpegFromPdf = async () => {
const existingPdfBytes = await getArrayBufferFromPdf()
const convertedBuffer = new Uint8Array(existingPdfBytes)
const firstBeginSignatureSymbol = parseInt('ff', 16)
const secondBeginSignatureSymbol = parseInt('d8', 16)
const indexesStartSignatureImage = []
convertedBuffer.forEach((el, i) => {
if (el === firstBeginSignatureSymbol &&
convertedBuffer[i + 1] === secondBeginSignatureSymbol &&
convertedBuffer[i + 2] === firstBeginSignatureSymbol &&
(convertedBuffer[i + 3] === parseInt('e0', 16) ||
convertedBuffer[i + 3] === parseInt('e1', 16) ||
convertedBuffer[i + 3] === parseInt('e2', 16) ||
convertedBuffer[i + 3] === parseInt('e3', 16) ||
convertedBuffer[i + 3] === parseInt('e8', 16))) {
indexesStartSignatureImage.push(i)
}
})
const resultSlicedCodeImage = indexesStartSignatureImage.reduce((arr,
el) => {
arr.push(convertedBuffer.slice(el));
return arr
}, [])
await Promise.all(resultSlicedCodeImage.map(async (el, i) => {
await createFile(el, `test_image_${i}`, 'jpeg')
}))
}
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.