簡體   English   中英

PDFBox 2.0.8-從一個文檔中提取圖像並在另一個文檔中使用

[英]PDFBox 2.0.8 - Extracting an image from one document and using it in another

我正在編寫一個Java應用程序以用作模板讀取器和寫入器。 我在處理文字方面取得了成功,但在圖片處理上卻有些困難...

獲取圖像是簡單的部分-使用擴展PDFStreamEngine的類

package readingPdf;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.DrawObject;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
import org.apache.pdfbox.contentstream.operator.state.Restore;
import org.apache.pdfbox.contentstream.operator.state.Save;
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.util.Matrix;

public class ImageStripper extends PDFStreamEngine {

    ArrayList<Object  []> imagesData = null;
    public ImageStripper() throws IOException {
        // preparing PDFStreamEngine
        addOperator(new Concatenate());
        addOperator(new DrawObject());
        addOperator(new SetGraphicsStateParameters());
        addOperator(new Save());
        addOperator(new Restore());
        addOperator(new SetMatrix());
        imagesData = new ArrayList<Object[]>();
    }

    @Override
    protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
        String operation = operator.getName();
        if ("Do".equals(operation)) {
            COSName objectName = (COSName) operands.get(0);
            // get the PDF object
            PDXObject xobject = getResources().getXObject(objectName);
            // check if the object is an image object
            if (xobject instanceof PDImageXObject) {
                Object[] imageData = new Object[3];
                PDImageXObject image = (PDImageXObject) xobject;

                Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();

                // position of image in the pdf in terms of user space units
                System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY()
                        + " in user space units");

                imageData[0] = ctmNew.getTranslateX();// xPos
                imageData[1] = ctmNew.getTranslateY();// yPos

                imageData[2] = image;//Image

                imagesData.add(imageData);

            } else if (xobject instanceof PDFormXObject) {
                PDFormXObject form = (PDFormXObject) xobject;
                showForm(form);
            }
        } else {
            super.processOperator(operator, operands);
        }
    }

    public ArrayList<Object[]> getImagesList(){
        return imagesData;
    }
}

接下來是其實現

public class PDFManager{

    private PDFParser parser;
    private PDDocument pdDoc;
    private PDDocument retDoc;
    private COSDocument cosDoc;
    private PDPage page;
    private String filePath;
    private File file; 

    public PDDocument transferImage() throws IOException {
        this.pdDoc = null;
        this.cosDoc = null;

        file = new File(filePath);
        parser = new PDFParser(new RandomAccessFile(file, "r"));
        parser.parse();
        cosDoc = parser.getDocument();
        pdDoc = new PDDocument(cosDoc);

        //Get Image Data
        ImageStripper imageStripper = new ImageStripper();
        imageStripper.processPage(pdDoc.getPage(0));
        ArrayList<Object []> imageList = imageStripper.getImagesList();

        //Close Doc
        pdDoc.close();
        cosDoc.close();

        //Create new PDF Doc
        retDoc = new PDDocument();
        page = new PDPage(new PDRectangle(PDRectangle.A4.getHeight(), PDRectangle.A4.getWidth())); 
        retDoc.addPage(page);

        PDPageContentStream cs = new PDPageContentStream(retDoc, page, AppendMode.OVERWRITE, true);

        for(int pos = 0; pos < imageList.size() ; pos++) {
            Object [] imageData = imageList.get(pos);

            float xPos = (float)imageData[0];
            float yPos = (float)imageData[1];
            PDImageXObject image = (PDImageXObject)imageData[2];
            cs.drawImage(image, xPos, yPos);
        }

        cs.close();
        return retDoc;
    }

    public static void main(String[] args) throws IOException {

        PDFManager pdfManager = new PDFManager();

        PDDocument doc =pdfManager.ToText("c:\\test\\test.pdf"); 

        doc.save("c:\\test\\test2.pdf");
        doc.close();
    }
}

現在問題出在我正在寫的地方,叫cs.drawImage 除了嘗試保存新文件時,所有代碼執行都沒有任何問題。我得到了異常, COSStream has been closed and cannot be read. Perhaps its enclosing PDDocument has been closed? COSStream has been closed and cannot be read. Perhaps its enclosing PDDocument has been closed?

我懷疑仍然存在元數據,將圖像鏈接到調用PDImageXobject.createFromFile("c:\\\\test\\\\testImage.png", doc)從中提取的原始文檔,該文檔將完美寫入PDImageXObject 當我將PDDocument寫入的PDImageXObject傳遞到PDImageXObject我懷疑它會以某種方式鏈接。

我無法將圖像保存到臨時位置,因為這只是測試POC。

任何援助將不勝感激

@蒂爾曼·豪舍爾

感謝您的解決方案

我將原始文檔的結尾移到了一個單獨的方法中,該方法在寫入文件后調用

public void closeFiles(){
    pdDoc.close();
    cosDoc.close();
}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM