简体   繁体   中英

Turn pdf into array of png's using javascript (with pdf.js)

Im trying to develop a frontend code that asks the user to provide a pdf and then internally (in the users browser) produces an array of png's (via data to url) where each entry in the array corresponds to a page in the pdf:

dat[0] = png of page 1
dat[1] = png of page 2
...

When I test the below code the pages are somehow rendered on top of eachother and rotated.

<script src="http://cdnjs.cloudflare.com/ajax/libs/processing.js/1.4.1/processing-api.min.js"></script><html>
<!--
  Created using jsbin.com
  Source can be edited via http://jsbin.com/pdfjs-helloworld-v2/8598/edit
-->
<body>
  <canvas id="the-canvas" style="border:1px solid black"></canvas>
  <input id='pdf' type='file'/>

  <!-- Use latest PDF.js build from Github -->

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/2.2.4/jquery.min.js"></script>
  <script src="pdf.js"></script>
  <script src="pdf.worker.js"></script>
  <script type="text/javascript">
    //
    // Asynchronous download PDF as an ArrayBuffer
    //
    dat = [];
    

    var pdf = document.getElementById('pdf');
    pdf.onchange = function(ev) {
      if (file = document.getElementById('pdf').files[0]) {
        fileReader = new FileReader();
        fileReader.onload = function(ev) {
          //console.log(ev);
          PDFJS.getDocument(fileReader.result).then(function getPdfHelloWorld(pdf) {
            //
            // Fetch the first page
            //
            number_of_pages = pdf.numPages;

            for(i = 1; i < number_of_pages+1; ++i) {
              pdf.getPage(i).then(function getPageHelloWorld(page) {

              var scale = 1;
              var viewport = page.getViewport(scale);

              //
              // Prepare canvas using PDF page dimensions
              //
              var canvas = document.getElementById('the-canvas');
              var context = canvas.getContext('2d');
              canvas.height = viewport.height;
              canvas.width = viewport.width;

              //
              // Render PDF page into canvas context
              //
              var renderContext = {
                canvasContext: context,
                viewport: viewport};
              page.render(renderContext).then(function() {
                dat.push(canvas.toDataURL('image/png'));
              });
              });
            }
            //console.log(pdf.numPages);
            //console.log(pdf)

          }, function(error){
            console.log(error);
          });
        };
        fileReader.readAsArrayBuffer(file);
      }
    }

  </script>


<style id="jsbin-css">

</style>
<script>

</script>
</body>
</html>

Im only interested in the array dat. When I render the images in the array I see that dat[0] = png of page 1 (correct)
dat[1] = png of page 1 and png page 2 rotated 180 on top of each other
...

How do I ensure a correct rendering of single pages in each entry of the array?

Try rendering the pages on a different canvas. You can create a canvas and append it to the container using

var canvasdiv = document.getElementById('canvas');      
var canvas = document.createElement('canvas');
canvasdiv.appendChild(canvas);

 var url = 'https://file-examples-com.github.io/uploads/2017/10/file-sample_150kB.pdf'; var PDFJS = window['pdfjs-dist/build/pdf']; PDFJS.GlobalWorkerOptions.workerSrc = '//mozilla.github.io/pdf.js/build/pdf.worker.js'; var loadingTask = PDFJS.getDocument(url); loadingTask.promise.then(function(pdf) { var canvasdiv = document.getElementById('canvas'); var totalPages = pdf.numPages var data = []; for (let pageNumber = 1; pageNumber <= totalPages; pageNumber++) { pdf.getPage(pageNumber).then(function(page) { var scale = 1.5; var viewport = page.getViewport({ scale: scale }); var canvas = document.createElement('canvas'); canvasdiv.appendChild(canvas); // Prepare canvas using PDF page dimensions var context = canvas.getContext('2d'); canvas.height = viewport.height; canvas.width = viewport.width; // Render PDF page into canvas context var renderContext = { canvasContext: context, viewport: viewport }; var renderTask = page.render(renderContext); renderTask.promise.then(function() { data.push(canvas.toDataURL('image/png')) console.log(data.length + ' page(s) loaded in data') }); }); } }, function(reason) { // PDF loading error console.error(reason); });
 canvas { border: 1px solid black; margin: 5px; width: 25%; }
 <script src="//mozilla.github.io/pdf.js/build/pdf.js"></script> <div id="canvas"></div>

For those who came here from google for an Angular solution here is an implementation, rendering each page on a different canvas.

pdf-viewer.component.html

<div *ngFor="let page of pages>
 <canvas #canvas hidden ></canvas>
 <img [src]="page">
</div>

pdf-viewer.component.ts

import * as pdfjsLib from 'pdfjs-dist';
pdfjsLib.GlobalWorkerOptions.workerSrc = 'pdf.worker.js';

@Component({
    selector: 'app-pdf-viewer',
    templateUrl: './pdf-viewer.component.html',
    styleUrls: ['./pdf-viewer.component.scss'],
})
export class PdfViewerComponent implements OnInit {
    constructor() { }
    @ViewChildren('canvas') canvas: QueryList<ElementRef<HTMLCanvasElement>>;

    @Input() pdfBase64: string;

    pages: string[] = [];

    ngOnInit(): void {
        this.setPages();
    }

    async setPages(): Promise<void> {
        const pdfDoc = await pdfjsLib.getDocument({ url: this.pdfBase64 }).promise;
        const totalPages = pdfDoc.numPages;
        this.pages = new Array(totalPages);

        for (let i = 0; i < totalPages; i++) {
            pdfDoc.getPage(i + 1).then((page) => {
                const canvas = this.canvas.toArray()[page.pageIndex].nativeElement;

                this.renderPdfPageToCanvas(page, canvas).then(() => {
                    this.pages[page.pageIndex] = canvas.toDataURL('image/png');
                });
            });
        }
    }

    renderPdfPageToCanvas(page: pdfjsLib.PDFPageProxy, canvas: HTMLCanvasElement): pdfjsLib.PDFPromise<pdfjsLib.PDFPageProxy> {
        const viewport = page.getViewport({ scale: 1.0 });
        const height = viewport.height;
        const width = viewport.width;
        canvas.height = height;
        canvas.width = width;
        const renderContext = {
            canvasContext: canvas.getContext('2d'),
            viewport: viewport
        };
        return page.render(renderContext).promise;
    }
}

package.json

{
    ...
    "dependencies": {
        ...
        "@angular/core": "^9.1.11",
        "pdfjs-dist": "2.3.200"
    },
    "devDependencies": {
        ...
        "@types/pdfjs-dist": "2.1.3"
    }
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM