簡體   English   中英

使用 PDFBox 將 PDF 轉換為 PDF/A

[英]Converting PDF to PDF/A with PDFBox

目前我正在嘗試將 PDF 轉換為 PDF/A。

但是不知何故我不知道我是否可以轉換顏色空間有什么辦法這樣做嗎?

這是我的代碼,但是:

PDDocumentInformation info = doc.getDocumentInformation();
System.out.println("Page Count=" + doc.getNumberOfPages());
System.out.println("Title=" + info.getTitle());
System.out.println("Author=" + info.getAuthor());
System.out.println("Subject=" + info.getSubject());
System.out.println("Keywords=" + info.getKeywords());
System.out.println("Creator=" + info.getCreator());
System.out.println("Producer=" + info.getProducer());
System.out.println("Creation Date=" + info.getCreationDate());
System.out.println("Modification Date=" + info.getModificationDate());
System.out.println("Trapped=" + info.getTrapped());


PDDocumentCatalog cat = doc.getDocumentCatalog();
XMPMetadata xmp = XMPMetadata.createXMPMetadata();

PDFAIdentificationSchema pdfaid = xmp.createAndAddPFAIdentificationSchema();
pdfaid.setConformance("A");
pdfaid.setPart(3);
pdfaid.setAboutAsSimple(null);


DublinCoreSchema dublinCoreSchema = xmp.createAndAddDublinCoreSchema();
dublinCoreSchema.setTitle(info.getTitle());

dublinCoreSchema.addCreator(info.getAuthor());


AdobePDFSchema adobePDFSchema = xmp.createAndAddAdobePDFSchema();
adobePDFSchema.setProducer(info.getProducer());


XMPBasicSchema xmpBasicSchema = xmp.createAndAddXMPBasicSchema();
xmpBasicSchema.setCreatorTool(info.getCreator());
xmpBasicSchema.setCreateDate(info.getCreationDate());
xmpBasicSchema.setModifyDate(info.getModificationDate());

xmp.addSchema(pdfaid);
XmpSerializer serializer = new XmpSerializer();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.serialize(xmp, baos, true);


InputStream colorProfile = PdfConverter.class.getResourceAsStream("/sRGBColorSpaceProfile.icm");
PDOutputIntent oi = new PDOutputIntent(doc, colorProfile);

oi.setInfo("sRGB IEC61966-2.1");
oi.setOutputCondition("sRGB IEC61966-2.1");
oi.setOutputConditionIdentifier("sRGB IEC61966-2.1");
oi.setRegistryName("http://www.color.org");


cat.addOutputIntent(oi);
PDMetadata metadata = new PDMetadata(doc);
metadata.importXMPMetadata(baos.toByteArray());
cat.setMetadata(metadata);

顏色空間被添加但是在驗證時我得到:

2.3.2 : Unexpected key in Graphic object definition, The ColorSpace is unknown

對於每個頁面/元素,它經常出現。

我能做些什么來反對它嗎? 喜歡轉換 ColorsSpace? 使用另一個庫?

我找到了將pdf轉換為pdfA的技巧。

  1. 填寫PDF表格
  2. 將其轉換為圖像
  3. 按照 PDFBox 網站中的說明創建有效的 PDFA 表單
  4. 填充作為結果創建的圖像

在這個例子中,我使用了: OoPdfFormExample.pdf ,可以在互聯網上輕松找到。

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.pdmodel.interactive.form.PDAcroForm;
import org.apache.pdfbox.preflight.Format;
import org.apache.pdfbox.preflight.PreflightDocument;
import org.apache.pdfbox.preflight.ValidationResult;
import org.apache.pdfbox.preflight.exception.SyntaxValidationException;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.xml.XmpSerializer;

import javax.xml.transform.TransformerException;
import java.awt.image.BufferedImage;
import java.io.*;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Random;

public class CreatePDFAFile {

    private static final String OUTPUT_DIR = "tmp";
    static String separator = FileSystems.getDefault().getSeparator();


    public static void main(String[] args) throws IOException {
        Path tmpDir = getRandomPath();
        String fileInput = fillForm("template/OoPdfFormExample.pdf", tmpDir);
        String image = PDF2Image(fileInput, tmpDir);
        String pdfa = createPDFA(image, tmpDir);
        checkPDFAValidation(pdfa);
    }

    private static String fillForm(String formTemplate, Path path) throws IOException {
        String fileOut = path + separator + "FillForm.pdf";
        try (PDDocument pdfDocument = PDDocument.load(new File(formTemplate))) {
            PDAcroForm acroForm = pdfDocument.getDocumentCatalog().getAcroForm();
            if (acroForm != null) {
                acroForm.getField(acroForm.getFields().get(0).getFullyQualifiedName()).setValue("TEST");
            }
            acroForm.refreshAppearances();
            acroForm.flatten();
            pdfDocument.save(fileOut);
        }
        return fileOut;
    }

    public static String PDF2Image(String fileInput, Path path) {
        String fileName = "";
        try (final PDDocument document = PDDocument.load(new File(fileInput))) {
            PDFRenderer pdfRenderer = new PDFRenderer(document);
            for (int page = 0; page < document.getNumberOfPages(); ++page) {
                BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
                fileName = path + separator + "image-" + page + ".png";
                ImageIOUtil.writeImage(bim, fileName, 300);
            }
        } catch (IOException e) {
            System.err.println("Exception while trying to create pdf document - " + e);
        }
        return fileName;
    }

    public static String createPDFA(String imagePath, Path path) throws IOException {
        try (PDDocument doc = new PDDocument()) {
            PDPage page = new PDPage();
            doc.addPage(page);
            PDFont font = PDType0Font.load(doc, new File("template" + separator + "LiberationSans-Regular.ttf"));
            if (!font.isEmbedded()) {
                throw new IllegalStateException("PDF/A compliance requires that all fonts used for"
                        + " text rendering in rendering modes other than rendering mode 3 are embedded.");
            }
            try (PDPageContentStream contents = new PDPageContentStream(doc, page)) {
                contents.beginText();
                contents.setFont(font, 12);
                contents.newLineAtOffset(100, 700);
                contents.showText("");
                contents.endText();
            }

            // add XMP metadata
            XMPMetadata xmp = XMPMetadata.createXMPMetadata();

            String fileName = path + separator + "FinalPDFAFile.pdf";
            try {
                DublinCoreSchema dc = xmp.createAndAddDublinCoreSchema();
                dc.setTitle(fileName);

                PDFAIdentificationSchema id = xmp.createAndAddPFAIdentificationSchema();
                id.setPart(1);
                id.setConformance("B");

                XmpSerializer serializer = new XmpSerializer();
                ByteArrayOutputStream baos = new ByteArrayOutputStream();
                serializer.serialize(xmp, baos, true);

                PDMetadata metadata = new PDMetadata(doc);
                metadata.importXMPMetadata(baos.toByteArray());
                doc.getDocumentCatalog().setMetadata(metadata);
            } catch (BadFieldValueException | TransformerException e) {
                throw new IllegalArgumentException(e);
            }

            // sRGB output intent
            InputStream colorProfile = new FileInputStream(new File("template/sRGB.icc"));
            PDOutputIntent intent = new PDOutputIntent(doc, colorProfile);
            intent.setInfo("");
            intent.setOutputCondition("");
            intent.setOutputConditionIdentifier("");
            intent.setRegistryName("");
            doc.getDocumentCatalog().addOutputIntent(intent);

            PDImageXObject pdImage = PDImageXObject.createFromFile(imagePath, doc);

            try (PDPageContentStream contentStream = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, true, true)) {
                float scale = 1 / 5f;
                contentStream.drawImage(pdImage, 20, 20, pdImage.getWidth() * scale, pdImage.getHeight() * scale);
            }
            doc.save(fileName);
            return fileName;
        }
    }

    private static void checkPDFAValidation(String fileName) throws IOException {

        ValidationResult result = null;
        PreflightParser parser = new PreflightParser(fileName);
        try {

            parser.parse(Format.PDF_A1B);
            PreflightDocument document = parser.getPreflightDocument();
            document.validate();
            // Get validation result
            result = document.getResult();
            document.close();

        } catch (SyntaxValidationException e) {
            result = e.getResult();
        }

        if (result.isValid()) {
            System.out.println("The file " + fileName + " is a valid PDF/A-1b file");
        } else {
            System.out.println("The file" + fileName + " is not valid, error(s) :");
            for (ValidationResult.ValidationError error : result.getErrorsList()) {
                System.out.println(error.getErrorCode() + " : " + error.getDetails());
            }
        }

    }

    private static Path getRandomPath() throws IOException {
        String path = generateRandom();
        Path tmpDir = Paths.get(OUTPUT_DIR + separator + path + separator);
        Files.createDirectory(tmpDir);
        return tmpDir;
    }

    private static String generateRandom() {
        String aToZ = "ABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890";
        Random rand = new Random();
        StringBuilder res = new StringBuilder();
        for (int i = 0; i < 17; i++) {
            int randIndex = rand.nextInt(aToZ.length());
            res.append(aToZ.charAt(randIndex));
        }
        return res.toString();
    }

}

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM