[英]How to convert ms word file with image and equation into html with the help memoryStream in C#
我正在使用下面的編碼,它工作正常。 這些編程將Word文件轉換為帶有圖像的HTML文件。
方程式有問題。我無法轉換ms word文件方程式HTML。
有人可以幫忙嗎?
FileUpload1.SaveAs(Server.MapPath(FileUpload1.FileName));
string imageDirectoryName = FileUpload1.FileName + "_files";
DirectoryInfo dirInfo = new DirectoryInfo(Server.MapPath(imageDirectoryName));
if (dirInfo.Exists)
{
// Delete the directory and files.
foreach (var f in dirInfo.GetFiles())
f.Delete();
dirInfo.Delete();
}
int imageCounter = 0;
byte[] byteArray = File.ReadAllBytes(sourceDocumentFileName);
using (MemoryStream memoryStream = new MemoryStream())
{
memoryStream.Write(byteArray, 0, byteArray.Length);
using (WordprocessingDocument doc =
WordprocessingDocument.Open(memoryStream, true))
{
HtmlConverterSettings settings = new HtmlConverterSettings()
{
//PageTitle = "Test Title",
//ConvertFormatting = false,
};
XElement html = HtmlConverter.ConvertToHtml(doc, settings,
imageInfo =>
{
DirectoryInfo localDirInfo = new DirectoryInfo(Server.MapPath(imageDirectoryName));
if (!localDirInfo.Exists)
localDirInfo.Create();
++imageCounter;
string extension = imageInfo.ContentType.Split('/')[1].ToLower();
ImageFormat imageFormat = null;
if (extension == "png")
{
// Convert the .png file to a .jpeg file.
extension = "jpeg";
imageFormat = ImageFormat.Jpeg;
}
else if (extension == "bmp")
imageFormat = ImageFormat.Bmp;
else if (extension == "jpeg")
imageFormat = ImageFormat.Jpeg;
else if (extension == "tiff")
imageFormat = ImageFormat.Tiff;
else if (extension == "wmf")
imageFormat = ImageFormat.Jpeg;
else if (extension == "png")
imageFormat = ImageFormat.Png;
// If the image format is not one that you expect, ignore it,
// and do not return markup for the link.
if (imageFormat == null)
return null;
string imageFileName = imageDirectoryName + "/image" +
imageCounter.ToString() + "." + extension;
try
{
imageInfo.Bitmap.Save(Server.MapPath(imageFileName), imageFormat);
}
catch (System.Runtime.InteropServices.ExternalException)
{
return null;
}
XElement img = new XElement(Xhtml.img,
new XAttribute(NoNamespace.src, imageFileName),
imageInfo.ImgStyleAttribute,
imageInfo.AltText != null ?
new XAttribute(NoNamespace.alt, imageInfo.AltText) : null);
return img;
});
File.WriteAllText(fileInfo.Directory.FullName + "/" + fileInfo.Name.Substring(0,
fileInfo.Name.Length - fileInfo.Extension.Length) + ".html",
html.ToStringNewLineOnAttributes());
}
}
第1步 -你應該在這里得到了解如何讓數學對象的Word文件在這里
第2步 -遍歷word文件的段落並在其中選擇OfficeMath對象,將其轉換為MathML (請參閱第1步),並且可以根據需要轉換為LaTex(我認為在HTML中使用LaTex會很友好)
注意:在步驟1中從MMOL2MML進行轉換時,轉換為LaTex將會類似, 請參見此處獲取文件
步驟3-在步驟2中的對象之前/之后插入一個文本對象,其內容為MathML / LaTex(在步驟2中)。 使用此步驟是因為使用HtmlConverter.ConvertToHtml
將丟失Word內容中的數學對象,因此在對象數學之前/之后插入文本時,HTML中將提供文本
這是我的代碼:
using (WordprocessingDocument doc = WordprocessingDocument.Open(docFilePath, true))
{
foreach (var paragraph in doc.MainDocumentPart.RootElement.Descendants<Paragraph>())
{
foreach (var ele in paragraph.Descendants<DocumentFormat.OpenXml.Math.OfficeMath>())
{
string wordDocXml = ele.OuterXml;
XslCompiledTransform xslTransform = new XslCompiledTransform();
xslTransform.Load(officeMathMLSchemaFilePath);
var result = "";
using (TextReader tr = new StringReader(wordDocXml))
{
// Load the xml of your main document part.
using (XmlReader reader = XmlReader.Create(tr))
{
using (MemoryStream ms = new MemoryStream())
{
XmlWriterSettings settings = xslTransform.OutputSettings.Clone();
// Configure xml writer to omit xml declaration.
settings.ConformanceLevel = ConformanceLevel.Fragment;
settings.OmitXmlDeclaration = true;
XmlWriter xw = XmlWriter.Create(ms, settings);
// Transform our OfficeMathML to MathML.
xslTransform.Transform(reader, xw);
ms.Seek(0, SeekOrigin.Begin);
using (StreamReader sr = new StreamReader(ms, Encoding.UTF8))
{
result = MathML2Latex(sr.ReadToEnd());
officeMLFormulas.Add(result);
}
}
}
}
Run run = new Run();
run.Append(new Text(result));
ele.InsertBeforeSelf(run);
}
}
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.