[英]remove HTML tags from cell strings : excel Formula
我在excel表中有一個帶HTML標簽的數據,如下所示:
<b>This is test data<br>Nice
<div> Go on this is next Cell
Very goood <b>.....</b>
所以,基本上我想在Excel工作表中刪除或替換所有帶有空格的html標簽。
使用<*>
模式應用Replace All
:
要打開此功能,請轉到功能區Home > Find & Select > Replace...
或只需按CTRL + H.
使用TRIM
功能可以進一步移除額外的空間。 祝好運!
在Excel中打開VBA(Alt + F11),單擊右側項目瀏覽器中的項目名稱(電子表格名稱)。 插入 - >新模塊。 將用戶定義的函數粘貼到模塊窗口中。 保存為允許宏的.XLSM。
假設您的數據在單元格A2中,鍵入函數'= StripHTML(A2)'。 你也可以在這里下載一個工作示例:
http://jfrancisconsulting.com/how-to-strip-html-tags-in-excel/
Function StripHTML(cell As Range) As String
Dim RegEx As Object
Set RegEx = CreateObject("vbscript.regexp")
Dim sInput As String
Dim sOut As String
sInput = cell.Text
sInput = Replace(sInput, "\x0D\x0A", Chr(10))
sInput = Replace(sInput, "\x00", Chr(10))
'replace HTML breaks and end of paragraphs with line breaks
sInput = Replace(sInput, "</P>", Chr(10) & Chr(10))
sInput = Replace(sInput, "<BR>", Chr(10))
'replace bullets with dashes
sInput = Replace(sInput, "<li>", "-")
'add back all of the special characters
sInput = Replace(sInput, "–", "–")
sInput = Replace(sInput, "—", "—")
sInput = Replace(sInput, "¡", "¡")
sInput = Replace(sInput, "¿", "¿")
sInput = Replace(sInput, """, "")
sInput = Replace(sInput, "“", "")
sInput = Replace(sInput, "”", "")
sInput = Replace(sInput, "", "'")
sInput = Replace(sInput, "‘", "'")
sInput = Replace(sInput, "’", "’")
sInput = Replace(sInput, "«", "«")
sInput = Replace(sInput, "»", "»")
sInput = Replace(sInput, " ", " ")
sInput = Replace(sInput, "&", "&")
sInput = Replace(sInput, "¢", "¢")
sInput = Replace(sInput, "©", "©")
sInput = Replace(sInput, "÷", "÷")
sInput = Replace(sInput, ">", ">")
sInput = Replace(sInput, "<", "<")
sInput = Replace(sInput, "µ", "µ")
sInput = Replace(sInput, "·", "·")
sInput = Replace(sInput, "¶", "¶")
sInput = Replace(sInput, "±", "±")
sInput = Replace(sInput, "€", "€")
sInput = Replace(sInput, "£", "£")
sInput = Replace(sInput, "®", "®")
sInput = Replace(sInput, "§", "§")
sInput = Replace(sInput, "™", "™")
sInput = Replace(sInput, "¥", "¥")
sInput = Replace(sInput, "á", "á")
sInput = Replace(sInput, "Á", "Á")
sInput = Replace(sInput, "à", "à")
sInput = Replace(sInput, "À", "À")
sInput = Replace(sInput, "â", "â")
sInput = Replace(sInput, "Â", "Â")
sInput = Replace(sInput, "å", "å")
sInput = Replace(sInput, "Å", "Å")
sInput = Replace(sInput, "ã", "ã")
sInput = Replace(sInput, "Ã", "Ã")
sInput = Replace(sInput, "ä", "ä")
sInput = Replace(sInput, "Ä", "Ä")
sInput = Replace(sInput, "æ", "æ")
sInput = Replace(sInput, "Æ", "Æ")
sInput = Replace(sInput, "ç", "ç")
sInput = Replace(sInput, "Ç", "Ç")
sInput = Replace(sInput, "é", "é")
sInput = Replace(sInput, "É", "É")
sInput = Replace(sInput, "è", "è")
sInput = Replace(sInput, "È", "È")
sInput = Replace(sInput, "ê", "ê")
sInput = Replace(sInput, "Ê", "Ê")
sInput = Replace(sInput, "ë", "ë")
sInput = Replace(sInput, "Ë", "Ë")
sInput = Replace(sInput, "í", "í")
sInput = Replace(sInput, "Í", "Í")
sInput = Replace(sInput, "ì", "ì")
sInput = Replace(sInput, "Ì", "Ì")
sInput = Replace(sInput, "î", "î")
sInput = Replace(sInput, "Î", "Î")
sInput = Replace(sInput, "ï", "ï")
sInput = Replace(sInput, "Ï", "Ï")
sInput = Replace(sInput, "ñ", "ñ")
sInput = Replace(sInput, "Ñ", "Ñ")
sInput = Replace(sInput, "ó", "ó")
sInput = Replace(sInput, "Ó", "Ó")
sInput = Replace(sInput, "ò", "ò")
sInput = Replace(sInput, "Ò", "Ò")
sInput = Replace(sInput, "ô", "ô")
sInput = Replace(sInput, "Ô", "Ô")
sInput = Replace(sInput, "ø", "ø")
sInput = Replace(sInput, "Ø", "Ø")
sInput = Replace(sInput, "õ", "õ")
sInput = Replace(sInput, "Õ", "Õ")
sInput = Replace(sInput, "ö", "ö")
sInput = Replace(sInput, "Ö", "Ö")
sInput = Replace(sInput, "ß", "ß")
sInput = Replace(sInput, "ú", "ú")
sInput = Replace(sInput, "Ú", "Ú")
sInput = Replace(sInput, "ù", "ù")
sInput = Replace(sInput, "Ù", "Ù")
sInput = Replace(sInput, "û", "û")
sInput = Replace(sInput, "Û", "Û")
sInput = Replace(sInput, "ü", "ü")
sInput = Replace(sInput, "Ü", "Ü")
sInput = Replace(sInput, "ÿ", "ÿ")
sInput = Replace(sInput, "", "´")
sInput = Replace(sInput, "", "`")
'replace all the remaining HTML Tags
With RegEx
.Global = True
.IgnoreCase = True
.MultiLine = True
.Pattern = "<[^>]+>" 'Regular Expression for HTML Tags.
End With
sOut = RegEx.Replace(sInput, "")
StripHTML = sOut
Set RegEx = Nothing
End Function
由於上面的宏對我不起作用,我自己修復了。 這是我的第一個劇本,如果你們可以改進它們,讓它更快,添加更多,那么你們非常歡迎!
好吧,我以前沒有編程經驗(除了6年前的一些非常基本的Java),但是在一些幫助下,很多猜測(實際上是小時)我設法制作這個腳本,它就像一個魅力去除大多數和8#文本,但它不會用換行符替換<BR>
(你可以通過按CTRL + H來執行此操作,“find: <br>
”“替換:(現在按住ALT並使用類型0010與你的NUMPAD。一個小點應該在替換窗口中閃爍,然后點擊“全部替換”)。
將下面的代碼粘貼到用戶模塊中(alt + f11,右鍵單擊Sheet1-> insert-> Module-> paste code)
然后通過File-> Options-> Customize Ribbon->選中Developer復選框來創建一個按鈕。 然后轉到開發人員選項卡 - >插入 - >按鈕 - >然后放置按鈕並右鍵單擊 - >指定宏 - >選擇刪除標記。
Sub RemoveTags()
Dim r As Range
Selection.NumberFormat = "@" 'set cells to text numberformat
With CreateObject("vbscript.regexp")
.Pattern = "\<.*?\>"
.Global = True
For Each r In Selection
r.Value = Replace(.Replace(r.Value, ""), "’", " ")
r.Value2 = Replace(.Replace(r.Value2, ""), "–", " ")
Next r
For Each r In Selection
r.Value = Replace(.Replace(r.Value, ""), "‘", " ")
r.Value2 = Replace(.Replace(r.Value2, ""), "
", " ")
Next r
For Each r In Selection
r.Value = Replace(.Replace(r.Value, ""), "
", " ")
r.Value2 = Replace(.Replace(r.Value2, ""), "’s", " ")
Next r
End With
End Sub
Private Sub CommandButton1_Click()
End Sub
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.