简体   繁体   中英

How to read alt text of image in word document apache.poi

How to read/edit alt text on image of word document with apache.poi?

ms-word

I tried to find proper function, but nothing inside of the XWPFPictureData object.

java代码

XWPFDocument.getAllPictures only provides access to the XWPFPictureData which is the representation of the binary file data of the embedded pictures only. That does not contain the meta data of the shapes of the pictures in the document.

There is also XWPFRun.getEmbeddedPictures which gets XWPFPicture . This is the representation of the CTPicture element, which contains some of the meta data of the picture shapes but not all.

A picture in a Word document is stored in a graphic drawing which contains either an inline or an anchor. Pictures which are positioned in line with the text are stored in an inline. Pictures which are anchored somewhere in the document and having text flow are stored in an anchor. Either the inline or the anchor contain all meta data of the shapes.

So the need is to get either the inline or the anchor of the picture. Having this one can get the CTNonVisualDrawingProps which are the representation of the alternate text data for the picture.

The closest element which one can get using apache poi directly is the XWPFPicture . Having that one could try to get the inline or the anchor which contains that found picture.

Following complete example shows this. The method org.apache.xmlbeans.XmlObject getInlineOrAnchor(XWPFRun run, XWPFPicture picture) is to get the inline or the anchor from the XWPFPicture picture contained in the XWPFRun run . The method org.openxmlformats.schemas.drawingml.x2006.main.CTNonVisualDrawingProps getNonVisualDrawingProps(org.apache.xmlbeans.XmlObject inlineOrAnchor) gets the CTNonVisualDrawingProps from this then. CTNonVisualDrawingProps provides getId , getName , getTitle and getDescr . The getDescr returns the text which Word shows as the Alt Text.

import java.io.FileInputStream;

import org.apache.poi.xwpf.usermodel.*;

import java.util.List;

public class WordReadAllContent {
    
 static org.apache.xmlbeans.XmlObject getInlineOrAnchor(org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture ctPictureToFind, org.apache.xmlbeans.XmlObject inlineOrAnchor) {
  String declareNameSpaces = "declare namespace pic='http://schemas.openxmlformats.org/drawingml/2006/picture'; ";
  org.apache.xmlbeans.XmlObject[] selectedObjects = inlineOrAnchor.selectPath(
   declareNameSpaces 
   + "$this//pic:pic");
  for (org.apache.xmlbeans.XmlObject selectedObject : selectedObjects) {
   if (selectedObject instanceof org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture) {
    org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture ctPicture = (org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture)selectedObject;
    if (ctPictureToFind.equals(ctPicture)) {
     // this is the inlineOrAnchor for that picture   
     return inlineOrAnchor;
    }        
   }          
  }
  return null;
 }
                
 static org.apache.xmlbeans.XmlObject getInlineOrAnchor(XWPFRun run, XWPFPicture picture) {
  org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture ctPictureToFind = picture.getCTPicture();
  for (org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDrawing drawing : run.getCTR().getDrawingList()) {
   for (org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline inline : drawing.getInlineList()) {
    org.apache.xmlbeans.XmlObject inlineOrAnchor = getInlineOrAnchor(ctPictureToFind, inline);
    // if inlineOrAnchor is not null, then this is the inline for that picture
    if (inlineOrAnchor != null) return inlineOrAnchor;
   }
   for (org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTAnchor anchor : drawing.getAnchorList()) {
    org.apache.xmlbeans.XmlObject inlineOrAnchor = getInlineOrAnchor(ctPictureToFind, anchor);
    // if inlineOrAnchor is not null, then this is the anchor for that picture
    if (inlineOrAnchor != null) return inlineOrAnchor;
   }
  }
  return null;
 }

 static org.openxmlformats.schemas.drawingml.x2006.main.CTNonVisualDrawingProps getNonVisualDrawingProps(org.apache.xmlbeans.XmlObject inlineOrAnchor) {
  if (inlineOrAnchor == null) return null;
  if (inlineOrAnchor instanceof org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline) {
   org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline inline = (org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTInline)inlineOrAnchor;
   return inline.getDocPr();    
  } else if (inlineOrAnchor instanceof org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTAnchor) {
   org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTAnchor anchor = (org.openxmlformats.schemas.drawingml.x2006.wordprocessingDrawing.CTAnchor)inlineOrAnchor;
   return anchor.getDocPr();
  }
  return null;
 }

 static String getSummary(org.openxmlformats.schemas.drawingml.x2006.main.CTNonVisualDrawingProps nonVisualDrawingProps) {
  if (nonVisualDrawingProps == null) return "";
  String summary = "Id:=" + nonVisualDrawingProps.getId();
  summary += " Name:=" + nonVisualDrawingProps.getName();
  summary += " Title:=" + nonVisualDrawingProps.getTitle();
  summary += " Descr:=" + nonVisualDrawingProps.getDescr();
  return summary;
 }
 
 static void traversePictures(XWPFRun run, List<XWPFPicture> pictures) throws Exception {
  for (XWPFPicture picture : pictures) {
   //System.out.println(picture);
   
   String picturesNonVisualDrawingProps = getSummary(getNonVisualDrawingProps(getInlineOrAnchor(run, picture)));
   System.out.println(picturesNonVisualDrawingProps);
   
   XWPFPictureData pictureData = picture.getPictureData();
   //System.out.println(pictureData);
  }
 }

 static void traverseRunElements(List<IRunElement> runElements) throws Exception {
  for (IRunElement runElement : runElements) {
   if (runElement instanceof XWPFFieldRun) {
    XWPFFieldRun fieldRun = (XWPFFieldRun)runElement;
    //System.out.println(fieldRun.getClass().getName());
    //System.out.println(fieldRun);
    traversePictures(fieldRun, fieldRun.getEmbeddedPictures());
   } else if (runElement instanceof XWPFHyperlinkRun) {
    XWPFHyperlinkRun hyperlinkRun = (XWPFHyperlinkRun)runElement;
    //System.out.println(hyperlinkRun.getClass().getName());
    //System.out.println(hyperlinkRun);
    traversePictures(hyperlinkRun, hyperlinkRun.getEmbeddedPictures());
   } else if (runElement instanceof XWPFRun) {
    XWPFRun run = (XWPFRun)runElement;
    //System.out.println(run.getClass().getName());
    //System.out.println(run);
    traversePictures(run, run.getEmbeddedPictures());
   } else if (runElement instanceof XWPFSDT) {
    XWPFSDT sDT = (XWPFSDT)runElement;
    //System.out.println(sDT);
    //System.out.println(sDT.getContent());
    //ToDo: The SDT may have traversable content too.
   }
  }
 }

 static void traverseTableCells(List<ICell> tableICells) throws Exception {
  for (ICell tableICell : tableICells) {
   if (tableICell instanceof XWPFSDTCell) {
    XWPFSDTCell sDTCell = (XWPFSDTCell)tableICell;
    //System.out.println(sDTCell);
    //ToDo: The SDTCell may have traversable content too.
   } else if (tableICell instanceof XWPFTableCell) {
    XWPFTableCell tableCell = (XWPFTableCell)tableICell;
    //System.out.println(tableCell);
    traverseBodyElements(tableCell.getBodyElements());
   }
  }
 }

 static void traverseTableRows(List<XWPFTableRow> tableRows) throws Exception {
  for (XWPFTableRow tableRow : tableRows) {
   //System.out.println(tableRow);
   traverseTableCells(tableRow.getTableICells());
  }
 }

 static void traverseBodyElements(List<IBodyElement> bodyElements) throws Exception {
  for (IBodyElement bodyElement : bodyElements) {
   if (bodyElement instanceof XWPFParagraph) {
    XWPFParagraph paragraph = (XWPFParagraph)bodyElement;
    //System.out.println(paragraph);
    traverseRunElements(paragraph.getIRuns());
   } else if (bodyElement instanceof XWPFSDT) {
    XWPFSDT sDT = (XWPFSDT)bodyElement;
    //System.out.println(sDT);
    //System.out.println(sDT.getContent());
    //ToDo: The SDT may have traversable content too.
   } else if (bodyElement instanceof XWPFTable) {
    XWPFTable table = (XWPFTable)bodyElement;
    //System.out.println(table);
    traverseTableRows(table.getRows());
   }
  }
 }

 public static void main(String[] args) throws Exception {
  String inFilePath = "./WordDocument.docx";
  XWPFDocument document = new XWPFDocument(new FileInputStream(inFilePath));
  traverseBodyElements(document.getBodyElements());
  document.close();
 }

}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM