簡體   English   中英

在C#中將MS單詞表轉換為html

[英]Convert MS word tables to html in C#

我正在用C#寫一個Word插件,它將所有格式替換為xml標簽,現在我想用標准標簽將word中的表轉換為html,表的行數和列數可能不同,我的意思是表包含合並的單元格或列

相同:

-------------------------
|  1  |  2  |  3  |  4  |
|     -------------------
|     |  5  |  6  |  7  |
|     -------------------
|     |        8        |
|     -------------------
|     |  9  | 10  | 11  |
|------------------------
| 12  | 13  | 14  | 15  |
-------------------------

單元格1是在一列中四行的合並,而單元格8是在一行中三列的合並

如何轉換?

不久前我們遇到了類似的項目,希望以下代碼可以為您提供一個起點。 HTML部分

<span style="font-size:18px;"><div>  
    <input id="File1" type="file" runat="server"/>  
    <asp:Button ID="btnConvert" runat="server" Text="Convert" OnClick="btnConvert_Click" />  
</div></span> 

C#部分使用系統;
使用System.Data;
使用System.Configuration;
使用System.Collections;
使用System.Collections.Generic;
使用System.Linq;
使用System.Web;
使用System.Web.Security;
使用System.Web.UI;
使用System.Web.UI.WebControls;
使用System.Web.UI.WebControls.WebParts;
使用System.Web.UI.HtmlControls;
使用System.IO;

protected void Page_Load(object sender, EventArgs e)  
        {  

        }  

        /// <summary>  
        /// word to Html  
        /// </summary>  
        /// <param name="sender"></param>  
        /// <param name="e"></param>  
        protected void btnConvert_Click(object sender, EventArgs e)  
        {  
            try  
            {  

                //upload  
                //uploadWord(File1);  
                //convert  
                wordToHtml(File1);  
            }  
            catch (Exception ex)  
            {  
                throw ex;  
            }  
            finally  
            {  
                Response.Write("Convert successfully!");  
            }  

        }  

        //upload and convert to html wordToHtml(wordFilePath)  
        ///<summary>  
        ///upload and save as html  
        ///</summary>  
        ///<param name="wordFilePath">word doc file path on client machine</param>  
        ///<returns>html save path</returns>  
        public string wordToHtml(System.Web.UI.HtmlControls.HtmlInputFile wordFilePath)  
        {  
            Microsoft.Office.Interop.Word.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();  
            Type wordType = word.GetType();  
            Microsoft.Office.Interop.Word.Documents docs = word.Documents;  

            // open doc file  
            Type docsType = docs.GetType();  

            //upload to server and parse as html  
            string filePath = uploadWord(wordFilePath);  

            //if upload is success  
            if (filePath == "0")  
                return "0";  
            //if file is word  
            if (filePath == "1")  
                return "1";  

            object fileName = filePath;  

            Microsoft.Office.Interop.Word.Document doc = (Microsoft.Office.Interop.Word.Document)docsType.InvokeMember("Open",  
            System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, true, true });  

            // convert and save  
            Type docType = doc.GetType();  

            string filename = System.DateTime.Now.Year.ToString() + System.DateTime.Now.Month.ToString() + System.DateTime.Now.Day.ToString() +  
            System.DateTime.Now.Hour.ToString() + System.DateTime.Now.Minute.ToString() + System.DateTime.Now.Second.ToString();  

            // if directory exist, create if not  
            if (!Directory.Exists(Server.MapPath("~\\html")))  
            {  
                // create upload directory  
                Directory.CreateDirectory(Server.MapPath("~\\html"));  
            }  

            //html save location  
            string ConfigPath = HttpContext.Current.Server.MapPath("html/" + filename + ".html");  
            object saveFileName = ConfigPath;  

            /*For Microsoft Word 9 Object Library, if using 10, then use below: 
         * docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod, 
         * null, doc, new object[]{saveFileName, Word.WdSaveFormat.wdFormatFilteredHTML}); 
         * other formats: 
         * wdFormatHTML 
         * wdFormatDocument 
         * wdFormatDOSText 
         * wdFormatDOSTextLineBreaks 
         * wdFormatEncodedText 
         * wdFormatRTF 
         * wdFormatTemplate 
         * wdFormatText 
         * wdFormatTextLineBreaks 
         * wdFormatUnicodeText 
         */  
            docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,  
            null, doc, new object[] { saveFileName, Microsoft.Office.Interop.Word.WdSaveFormat.wdFormatFilteredHTML });  

            //close document 
            docType.InvokeMember("Close", System.Reflection.BindingFlags.InvokeMethod,  
            null, doc, new object[] { null, null, null });  

            // exit Word  
            wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);  
            //goto converted html page  
            return ("/" + filename + ".html");  

        }  


        public string uploadWord(System.Web.UI.HtmlControls.HtmlInputFile uploadFiles)  
        {  
            if (uploadFiles.PostedFile != null)  
            {  
                string fileName = uploadFiles.PostedFile.FileName;  

                int extendNameIndex = fileName.LastIndexOf(".");  
                string extendName = fileName.Substring(extendNameIndex);  
                string newName = "";  
                try  
                {  
                    //check if is word format  
                    if (extendName == ".doc" || extendName == ".docx")  
                    {  

                        DateTime now = DateTime.Now;  
                        newName = now.DayOfYear.ToString() + uploadFiles.PostedFile.ContentLength.ToString();  

                        // check if directory exist, create one if not  
                        if (!Directory.Exists(Server.MapPath("~\\wordTmp")))  
                        {  
                            // create upload directory  
                            Directory.CreateDirectory(Server.MapPath("~\\wordTmp"));  
                        }  

                        //upload path, wordTemp relative to parrent  
                        uploadFiles.PostedFile.SaveAs(System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName));  
                    }  
                    else  
                    {  
                        return "1";  
                    }  
                }  
                catch  
                {  
                    return "0";  
                }  
                //return "http://" + HttpContext.Current.Request.Url.Host + HttpContext.Current.Request.ApplicationPath + "/wordTmp/" + newName + extendName;  
                return System.Web.HttpContext.Current.Server.MapPath("wordTmp/" + newName + extendName);  
            }  
            else  
            {  
                return "0";  
            }  
        }</span>  

這是簡單的技巧,只需將word table復制並粘貼到dreamweaver >設計模式中即可。 當您在代碼模式下查看時,您將從那里獲得所有html標記,您只需復制該代碼並粘貼到c#文件中即可。

請享用。

tanx進行回復,我找到了一種將單詞表轉換為html的方法。
我寫這段代碼:

private static void ConvertTableToHTML()
    {
        try
        {
            foreach (Table tb in Common.WordApplication.ActiveDocument.Tables)
            {

                for (int r = 1; r <= tb.Rows.Count; r++)
                {
                    for (int c = 1; c <= tb.Columns.Count; c++)
                    {
                        try
                        {
                            Cell cell = tb.Cell(r, c);
                            foreach (Paragraph paragraph in cell.Range.Paragraphs)
                            {
                                Tagging(paragraph.Range, "P");
                            }
                            Tagging(cell.Range, "TD");                                
                        }
                        catch (Exception e)
                        {
                            if (e.Message.Contains("The requested member of the collection does not exist."))
                            {
                                //Most likely a part of a merged cell, so skip over.
                            }
                            else throw;
                        }
                    }
                    try
                    {
                        Row row = tb.Rows[r];
                        Tagging(row.Range, "TR");                            
                    }
                    catch (Exception ex)
                    {
                        bool initialTrTagInserted = false;
                        int columnsIndex = 1;
                        int columnsCount = tb.Columns.Count;
                        while (!initialTrTagInserted && columnsIndex <= columnsCount)
                        {
                            try
                            {
                                Cell cell = tb.Cell(r, columnsIndex);
                                cell.Range.InsertBefore("<TR>");
                                initialTrTagInserted = true;
                            }
                            catch (Exception e)
                            {
                            }
                            columnsIndex++;
                        }

                        columnsIndex = tb.Columns.Count;
                        bool endTrTagInserted = false;
                        while (!endTrTagInserted && columnsIndex >= 1)
                        {
                            try
                            {
                                Cell cell = tb.Cell(r, columnsIndex);
                                cell.Range.InsertAfter("</TR>");
                                endTrTagInserted = true;
                            }
                            catch (Exception e)
                            {
                            }
                            columnsIndex--;
                        }
                    }
                }
                Common.Tagging2(tb.Range, "Table");                    

                object separator = "";
                object nestedTable = true;
                tb.ConvertToText(separator, nestedTable);
            }
        }
        catch (Exception ex) { MessageBox.Show(ex.Message); }
    }


public static void Tagging(Range range, string TagName)
    {
        try
        {
            range.InsertBefore("<" + TagName + ">");
            range.InsertAfter("</" + TagName + ">");
        }
        catch (Exception ex) { throw new Exception(ex.Message); }
    }

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM