[英]C# Extract Text from .XPS Document
我一直在使用另一個StackOverflow這個問題的答案作為解決這個問題的參考,但是我遇到了一個問題。 我在FixedDocumentSequence
收到一個錯誤,說無法找到它。 我已經添加了對PresentationCore
, PresentationFramework
, WindowsBase
和ReachFramework
,我不太確定是否需要為FixedDocumentSequence
添加另一個引用。
這是我的代碼:
public string convertXPS(string fileName)
{
XpsDocument _xpsDocument = new XpsDocument(fileName, System.IO.FileAccess.Read);
IXpsFixedDocumentSequenceReader fixedDocSeqReader = _xpsDocument.FixedDocumentSequenceReader;
IXpsFixedDocumentReader _document = fixedDocSeqReader.FixedDocuments[0];
FixedDocumentSequence sequence = _xpsDocument.GetFixedDocumentSequence();
string _fullPageText = "";
for (int pageCount = 0; pageCount < sequence.DocumentPaginator.PageCount; ++pageCount)
{
IXpsFixedPageReader _page = _document.FixedPages[pageCount];
StringBuilder _currentText = new StringBuilder();
System.Xml.XmlReader _pageContentReader = _page.XmlReader;
if (_pageContentReader != null)
{
while (_pageContentReader.Read())
{
if (_pageContentReader.Name == "Glyphs")
{
if (_pageContentReader.HasAttributes)
{
if (_pageContentReader.GetAttribute("UnicodeString") != null)
{
_currentText.
Append(_pageContentReader.
GetAttribute("UnicodeString"));
}
}
}
}
}
_fullPageText += _currentText.ToString();
}
return _fullPageText;
}
[STAThread]
static void Main(string[] args)
{
try
{
XpsDocument _xpsDocument = new XpsDocument(@"C:\Users\admin-\Desktop\testing.xps", System.IO.FileAccess.Read);
IXpsFixedDocumentSequenceReader fixedDocSeqReader = _xpsDocument.FixedDocumentSequenceReader;
IXpsFixedDocumentReader _document = fixedDocSeqReader.FixedDocuments[0];
FixedDocumentSequence sequence = _xpsDocument.GetFixedDocumentSequence();
string _fullPageText = "";
for (int pageCount = 0; pageCount < sequence.DocumentPaginator.PageCount; ++pageCount)
{
IXpsFixedPageReader _page = _document.FixedPages[pageCount];
StringBuilder _currentText = new StringBuilder();
System.Xml.XmlReader _pageContentReader = _page.XmlReader;
if (_pageContentReader != null)
{
while (_pageContentReader.Read())
{
if (_pageContentReader.Name == "Glyphs")
{
if (_pageContentReader.HasAttributes)
{
if (_pageContentReader.GetAttribute("UnicodeString") != null)
{
_currentText.
Append(_pageContentReader.
GetAttribute("UnicodeString"));
}
}
}
}
}
_fullPageText += _currentText.ToString();
}
}
catch(Exception e)
{
}
}
我不認為代碼有太大的變化,嘗試添加[STAThread]幫助我讀取xps,我也只使用上面提到的參考來讀取文件,我也得到了相同的錯誤,但不知何故解決了它,你得到90%的結果
另請參閱添加System.Windows.Documents;
需要哪個引用System.Windows.Documents;
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.