[英]PDFBox 2.0.8 - Extracting an image from one document and using it in another
我正在编写一个Java应用程序以用作模板读取器和写入器。 我在处理文字方面取得了成功,但在图片处理上却有些困难...
获取图像是简单的部分-使用扩展PDFStreamEngine的类
package readingPdf;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.contentstream.PDFStreamEngine;
import org.apache.pdfbox.contentstream.operator.DrawObject;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.contentstream.operator.state.Concatenate;
import org.apache.pdfbox.contentstream.operator.state.Restore;
import org.apache.pdfbox.contentstream.operator.state.Save;
import org.apache.pdfbox.contentstream.operator.state.SetGraphicsStateParameters;
import org.apache.pdfbox.contentstream.operator.state.SetMatrix;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.util.Matrix;
public class ImageStripper extends PDFStreamEngine {
ArrayList<Object []> imagesData = null;
public ImageStripper() throws IOException {
// preparing PDFStreamEngine
addOperator(new Concatenate());
addOperator(new DrawObject());
addOperator(new SetGraphicsStateParameters());
addOperator(new Save());
addOperator(new Restore());
addOperator(new SetMatrix());
imagesData = new ArrayList<Object[]>();
}
@Override
protected void processOperator(Operator operator, List<COSBase> operands) throws IOException {
String operation = operator.getName();
if ("Do".equals(operation)) {
COSName objectName = (COSName) operands.get(0);
// get the PDF object
PDXObject xobject = getResources().getXObject(objectName);
// check if the object is an image object
if (xobject instanceof PDImageXObject) {
Object[] imageData = new Object[3];
PDImageXObject image = (PDImageXObject) xobject;
Matrix ctmNew = getGraphicsState().getCurrentTransformationMatrix();
// position of image in the pdf in terms of user space units
System.out.println("position in PDF = " + ctmNew.getTranslateX() + ", " + ctmNew.getTranslateY()
+ " in user space units");
imageData[0] = ctmNew.getTranslateX();// xPos
imageData[1] = ctmNew.getTranslateY();// yPos
imageData[2] = image;//Image
imagesData.add(imageData);
} else if (xobject instanceof PDFormXObject) {
PDFormXObject form = (PDFormXObject) xobject;
showForm(form);
}
} else {
super.processOperator(operator, operands);
}
}
public ArrayList<Object[]> getImagesList(){
return imagesData;
}
}
接下来是其实现
public class PDFManager{
private PDFParser parser;
private PDDocument pdDoc;
private PDDocument retDoc;
private COSDocument cosDoc;
private PDPage page;
private String filePath;
private File file;
public PDDocument transferImage() throws IOException {
this.pdDoc = null;
this.cosDoc = null;
file = new File(filePath);
parser = new PDFParser(new RandomAccessFile(file, "r"));
parser.parse();
cosDoc = parser.getDocument();
pdDoc = new PDDocument(cosDoc);
//Get Image Data
ImageStripper imageStripper = new ImageStripper();
imageStripper.processPage(pdDoc.getPage(0));
ArrayList<Object []> imageList = imageStripper.getImagesList();
//Close Doc
pdDoc.close();
cosDoc.close();
//Create new PDF Doc
retDoc = new PDDocument();
page = new PDPage(new PDRectangle(PDRectangle.A4.getHeight(), PDRectangle.A4.getWidth()));
retDoc.addPage(page);
PDPageContentStream cs = new PDPageContentStream(retDoc, page, AppendMode.OVERWRITE, true);
for(int pos = 0; pos < imageList.size() ; pos++) {
Object [] imageData = imageList.get(pos);
float xPos = (float)imageData[0];
float yPos = (float)imageData[1];
PDImageXObject image = (PDImageXObject)imageData[2];
cs.drawImage(image, xPos, yPos);
}
cs.close();
return retDoc;
}
public static void main(String[] args) throws IOException {
PDFManager pdfManager = new PDFManager();
PDDocument doc =pdfManager.ToText("c:\\test\\test.pdf");
doc.save("c:\\test\\test2.pdf");
doc.close();
}
}
现在问题出在我正在写的地方,叫cs.drawImage
。 除了尝试保存新文件时,所有代码执行都没有任何问题。我得到了异常, COSStream has been closed and cannot be read. Perhaps its enclosing PDDocument has been closed?
COSStream has been closed and cannot be read. Perhaps its enclosing PDDocument has been closed?
我怀疑仍然存在元数据,将图像链接到调用PDImageXobject.createFromFile("c:\\\\test\\\\testImage.png", doc)
从中提取的原始文档,该文档将完美写入PDImageXObject
。 当我将PDDocument
写入的PDImageXObject
传递到PDImageXObject
我怀疑它会以某种方式链接。
我无法将图像保存到临时位置,因为这只是测试POC。
任何援助将不胜感激
@蒂尔曼·豪舍尔
感谢您的解决方案
我将原始文档的结尾移到了一个单独的方法中,该方法在写入文件后调用
public void closeFiles(){
pdDoc.close();
cosDoc.close();
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.