[英]Make some characters as invisible from an existing PDF document by using PDFBox
我嘗試使用Apache PDFBox從現有PDF中讀取“Tj”運算符。 當我完成這項任務時,我嘗試更換一些角色。 例如,讓我們考慮一個包含(hello world)Tj的pdf文檔,因此這段代碼將“hello”替換為“hi123”。 因此,修改后的文檔變為包含(hi123世界)Tj而不是(hello world)Tj。
我現在面臨的一個大問題是如何編輯此代碼以使“hello”成為文本呈現模式3(換句話說:文本呈現模式“不可見”)。 所以我不想用“hi123”替換“你好”,而是讓“你好”消失(模式不可見)。 因此,修改后的文檔只包含“世界”,其中“你好”變得不可見。
我的代碼到目前為止:
public class Test1 {
private static Test1 tes;
private static final String src="...";
private static PDPageContentStream content;
private static PDType1Font font;
public static void CreatePdf(String src) throws IOException, COSVisitorException{
PDRectangle rec= new PDRectangle(400,400);
PDDocument document= null;
document = new PDDocument();
PDPage page = new PDPage(rec);
document.addPage(page);
PDDocumentInformation info=document.getDocumentInformation();
info.setAuthor("PdfBox");
info.setCreator("Pdf");
info.setSubject("Stéganographie");
info.setTitle("Stéganographie dans les documents PDF");
info.setKeywords("Stéganographie, pdf");
content= new PDPageContentStream(document, page);
font= PDType1Font.HELVETICA;
String texte="hello world";
content.beginText();
content.setFont(font, 12);
content.moveTextPositionByAmount(15, 385);
// content.appendRawCommands("3 Tr");
content.drawString(texte);
content.endText();
content.close();
document.save("doc.pdf");
document.close();
}
public static void main(String[] args) throws IOException, COSVisitorException {
tes= new Test1();
tes.CreatePdf(src);
PDDocument doc ;
doc = PDDocument.load("doc.pdf");
List pages = doc.getDocumentCatalog().getAllPages();
for (int i = 0; i < pages.size(); i++) {
PDPage page = (PDPage) pages.get(i);
PDStream contents = page.getContents();
PDFStreamParser parser = new PDFStreamParser(contents.getStream());
parser.parse();
List tokens = parser.getTokens();
for (int j = 0; j < tokens.size(); j++)
{
Object next = tokens.get(j);
if (next instanceof PDFOperator) {
PDFOperator op = (PDFOperator) next;
// Tj and TJ are the two operators that display strings in a PDF
if (op.getOperation().equals("Tj"))
{
// Tj takes one operator and that is the string
// to display so lets update that operator
COSString previous = (COSString) tokens.get(j - 1);
String string = previous.getString();
System.out.println(string);
//Word you want to change. Currently this code changes word "hello" to "hi123"
string = string.replaceFirst("hello", "hi123");
previous.reset();
previous.append(string.getBytes("ISO-8859-1"));
}
}
}
// now that the tokens are updated we will replace the page content stream.
PDStream updatedStream = new PDStream(doc);
OutputStream out = updatedStream.createOutputStream();
ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
tokenWriter.writeTokens(tokens);
page.setContents(updatedStream);
}
doc.save("a.pdf");
doc.close();
}
}
基本上你必須通過用八個標記3 Tr (hello) Tj 0 Tr ( world) Tj
替換兩個標記(hello world) Tj
來改變tokens
列表。
因此,替換你的循環
List tokens = parser.getTokens();
for (int j = 0; j < tokens.size(); j++)
{
[...]
}
通過類似的東西
List tokens = parser.getTokens();
for (int j = 0; j < tokens.size(); j++)
{
Object next = tokens.get(j);
if (next instanceof PDFOperator)
{
PDFOperator op = (PDFOperator) next;
// Tj and TJ are the two operators that display strings in a PDF
if (op.getOperation().equals("Tj"))
{
tokens.set(j-1, COSInteger.get(3));
tokens.set(j, PDFOperator.getOperator("Tr"));
tokens.add(++j, new COSString("hello"));
tokens.add(++j, PDFOperator.getOperator("Tj"));
tokens.add(++j, COSInteger.get(0));
tokens.add(++j, PDFOperator.getOperator("Tr"));
tokens.add(++j, new COSString(" world"));
tokens.add(++j, PDFOperator.getOperator("Tj"));
}
}
}
最終代碼:
public class Test1 {
private static Test1 tes;
private static final String src="...";
private static PDPageContentStream content;
private static PDType1Font font;
public static void CreatePdf(String src) throws IOException, COSVisitorException{
PDRectangle rec= new PDRectangle(400,400);
PDDocument document= null;
document = new PDDocument();
PDPage page = new PDPage(rec);
document.addPage(page);
PDDocumentInformation info=document.getDocumentInformation();
info.setAuthor("PdfBox");
info.setCreator("Pdf");
info.setSubject("Stéganographie");
info.setTitle("Stéganographie dans les documents PDF");
info.setKeywords("Stéganographie, pdf");
content= new PDPageContentStream(document, page);
font= PDType1Font.HELVETICA;
String texte="hello world";
content.beginText();
content.setFont(font, 12);
content.moveTextPositionByAmount(15, 385);
// content.appendRawCommands("3 Tr");
content.drawString(texte);
content.endText();
content.close();
document.save("doc.pdf");
document.close();
}
/**
* @param args the command line arguments
*/
public static void main(String[] args) throws IOException, COSVisitorException {
// TODO code application logic here
tes= new Test1();
tes.CreatePdf(src);
PDDocument doc ;
doc = PDDocument.load("doc.pdf");
List pages = doc.getDocumentCatalog().getAllPages();
for (int i = 0; i < pages.size(); i++) {
PDPage page = (PDPage) pages.get(i);
PDStream contents = page.getContents();
PDFStreamParser parser = new PDFStreamParser(contents.getStream());
parser.parse();
List tokens = parser.getTokens();
for (int j = 0; j < tokens.size(); j++)
{
Object next = tokens.get(j);
if (next instanceof PDFOperator) {
PDFOperator op = (PDFOperator) next;
// Tj and TJ are the two operators that display strings in a PDF
if (op.getOperation().equals("Tj"))
{
// Tj takes one operator and that is the string
// to display so lets update that operator
//COSString previous = (COSString) tokens.get(j - 1);
//String string = previous.getString();
//System.out.println(string);
//Word you want to change. Currently this code changes word "hello" to "hi123"
// string = string.replaceFirst("hello", "hi123");
// previous.reset();
// previous.append(string.getBytes("ISO-8859-1"));
tokens.set(j-1, COSInteger.get(3));
tokens.set(j, PDFOperator.getOperator("Tr"));
tokens.add(++j, new COSString("hello"));
tokens.add(++j, PDFOperator.getOperator("Tj"));
tokens.add(++j, COSInteger.get(0));
tokens.add(++j, PDFOperator.getOperator("Tr"));
tokens.add(++j, new COSString(" world"));
tokens.add(++j, PDFOperator.getOperator("Tj"));
}
}
}
// now that the tokens are updated we will replace the page content stream.
PDStream updatedStream = new PDStream(doc);
OutputStream out = updatedStream.createOutputStream();
ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
tokenWriter.writeTokens(tokens);
page.setContents(updatedStream);
}
doc.save("a.pdf");
doc.close();
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.