I am having a pdf that contain a table and having data.
I want to convert it to xlsx /xls , I have tried many way ie pdf to json and json to xlsx/xls but I am not getting the result has I want, i want the json data in key and value pair
code
let fs = require('fs'),
PDFParser = require("pdf2json");
let pdfParser = new PDFParser();
pdfParser.on("pdfParser_dataError", errData => console.error(errData.parserError) );
pdfParser.on("pdfParser_dataReady", pdfData => {
fs.writeFile("./pdf2.json", JSON.stringify(pdfData),(error) => {
if(error)
{
console.log(error);
}
});
});
pdfParser.loadPDF("./Sample Data.pdf");
you can use pdf2table or pdfreader to read data from pdf file and shape that data to required format of json.
Adding sample codes for reference
//pdf2table
var pdf2table = require('pdf2table');
var fs = require('fs');
fs.readFile('./tests.pdf', function (err, buffer) {
if (err) return console.log(err);
pdf2table.parse(buffer, function (err, rows, rowsdebug) {
if(err) return console.log(err);
console.log(rows);
});
});
//pdfreader
var pdfreader = require('pdfreader');
var table = new pdfreader.TableParser();
new pdfreader.PdfReader().parseFileItems('tests.pdf', function(err, item){
if( err ) {
console.log( err )
} else {
console.log( item.text)
//create json as you want
}
});
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.