繁体   English   中英

在 VBA 中优化 HTML 解析

[英]Optimizing HTML Parsing in VBA

我有一些 VBA 代码向https://nt3-s.zacks.com/fundamreports/Default.aspx发出请求并将所有数据提取到工作表中。 我的问题是我的解析技术需要很长时间。 有没有人对我如何改进下面的代码有什么建议? 也许我应该利用 QuerySelectorAll() 或其他一些方法......

Sub Financials(ticker, SheetName As String)
Dim XMLPage As New MSXML2.XMLHTTP60
Dim RP1, RP2, QP1, QP2, QP3, QP4, QP5, QP6 As String
Dim HTMLDoc As New MSHTML.HTMLDocument
Dim HTMLRow, HTMLCell As MSHTML.IHTMLElement
Dim i As Integer
Dim r, c, offset, shift As Integer

'Application.ScreenUpdating = False

shift = 50

' STANDARDIZED FINANCIAL STATEMENTS

' Generate income statements
'============================

XMLPage.Open "Post", "https://nt3-s.zacks.com/fundamreports/Default.aspx", False

' Header
XMLPage.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"

' Body Parameters
' Required parameters so every table value can be editted
RP1 = "__VIEWSTATE=%2FwEPDwUJNDg5NjgzODkyD2QWAgIDD2QWAgIBD2QWBAIPDxAPFgIeB0VuYWJsZWRnZGRkZAITDxAPFgIfAGdkZGRkGAEFHl9fQ29udHJvbHNSZXF1aXJlUG9zdEJhY2tLZXlfXxYBBQhidG5FeGNlbHC%2FJVg5ST2AoHtph1XAsQL0bObX"

' String is too long to be typed in one line...
a1 = "%2FwEWdALpx6COCwKm07CqBwKSh%2B%2FQCgL96MW%2BBgLN6MW%2BBgLav7PDDQLCv%2FPADQLNv%2FPADQLcv%2FPADQLfv%2FPADQLev%2FPADQLZv%2FPADQLYv%2FPADQLbv%2FPADQLav%2FPADQLFv%2FPADQLduMOkAwLq0v%2FjCQLq0uO8AQLq0teZCgLq0rvyAwLq0q%2FPDALq0pOoBALq0oeFDQLq0uvhBgLq0p%2BJAwLq0oPiDALX%2B53NDwLX%2B4GmBwLX%2B%2FUCAtf72d8JAtf7zbgBAtf7sZUKAtf7pe4DAtf7icsMAtf7vfIJAtf7oc8CApOYyLkHApOYvBICk5ig7wkCk5iUyAICk5j4pAoCk5jsgQMCk5jQ2gwCk5jEtwQCk5jo3gICk5jcuwoC%2BKHuog0C%2BKHS%2FwYC%2BKHG2A8C%2BKGqtQcC%2BKGeDgL4oYLrCQL4ofbHAgL4odqgCgL4oY7ICAL4ofIkAuW2jIwDAuW28OgMAuW25MUFAsTR38AHAvO744cNAvO7%2F9gFAvO7y%2F0OAvO7p5YHAvO7s6sIAvO7j0wC87ub4QkC87v3hQIC87uD7QcC87ufhggCzpKBqQsCzpKdwgMCzpLp5gQCzpLFuw0CzpLR3AUCzpKt8Q4CzpK5igcCzpKVrwgCzpKhlg0CzpK9qwYCivHU3Q"
a2 = "MCivGg9gQCivG8iw0CivGIrAYCivHkwA4CivHw5QcCivHMvggCivHYUwKK8fS6BgKK8cDfDgLhyPLGCQLhyM6bAgLhyNq8CwLhyLbRAwLhyILqBALhyJ6PDQLhyOqjBgLhyMbEDgLhyJKsDALhyO7ABAL835DoBwL83%2ByMCAL83%2FihAQLnzLDSBAL4zLDSBAL5zLDSBAL6zLDSBAL8zLDSBAL9zLDSBAK3wpPeDgKFwpPeDgKuh8zbAwKhh8zbAwLR2eHwDAKFt7SHCcQGh8eHqGGqe%2F6U9Wz%2FpSk9FF8d"

RP2 = "&__EVENTVALIDATION=" & a1 & a2

' Query Parameters
QP1 = "&tbTicker=" & ticker
QP2 = "&ddBasis=Q"
QP3 = "&ddPeriod=0"
QP4 = "&ddFrom=" & CStr(Year(DateAdd("yyyy", -10, Date)))
QP5 = "&ddTo=" & CStr(Year(Date))
QP6 = "&ddFormType=0"

XMLPage.send RP1 & RP2 & QP1 & QP2 & QP3 & QP4 & QP5 & QP6

HTMLDoc.body.innerHTML = XMLPage.responseText

r = 1
c = 1
offset = 0 * shift

Sheets(SheetName).Select

Cells.Select
Selection.ClearContents

'Extract data from table
For Each HTMLRow In HTMLDoc.getElementsByTagName("tr")
    For Each HTMLCell In HTMLRow.getElementsByTagName("th")
        Cells(r, c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
    For Each HTMLCell In HTMLRow.getElementsByTagName("td")
        Cells(r, c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
c = 1
r = r + 1
Next HTMLRow

' Generate balance statements
'======================================
'Request
XMLPage.Open "Post", "https://nt3-s.zacks.com/fundamreports/Default.aspx", False

' Header
XMLPage.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"

QP6 = "&ddFormType=1"

XMLPage.send RP1 & RP2 & QP1 & QP2 & QP3 & QP4 & QP5 & QP6

HTMLDoc.body.innerHTML = XMLPage.responseText

r = 1
c = 1
offset = 1 * shift

'Extract data from table
For Each HTMLRow In HTMLDoc.getElementsByTagName("tr")
    For Each HTMLCell In HTMLRow.getElementsByTagName("th")
        Cells(r, c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
    For Each HTMLCell In HTMLRow.getElementsByTagName("td")
        Cells(r, c + offset).value = HTMLCell.innerText
        c = c + 1
    Next HTMLCell
c = 1
r = r + 1
Next HTMLRow

编辑:我更改了代码以包含有关我的程序的更多详细信息。 该子将一次收集多个公司的损益表、资产负债表、现金流量和其他指标。 我包含了几个我如何使用 POST 方法解析数据的实例。

通过每次迭代计算,将Application.Calculation = xlManual添加到宏的开头,将Application.Calculation = xlAutomatic到末尾,这将防止在每次传递循环后更新计算。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM