簡體   English   中英

Excel - VBA Web 抓取 - getElementsByTagName

[英]Excel - VBA Web Scraping - getElementsByTagName

我正在運行這段代碼,它完美地顯示了我想要獲得的這些田徑表演:

Sub WebScraping()

Dim XMLPage As New MSXML2.XMLHTTP60
Dim HTMLDoc As New MSHTML.HTMLDocument

Dim Records As MSHTML.IHTMLElementCollection
Dim Record As MSHTML.IHTMLElement
Dim HTMLIms As MSHTML.IHTMLElementCollection
Dim HTMLIm As MSHTML.IHTMLElement

Dim URL As String
Dim RowNum As Integer: RowNum = 1

Dim NumPage As Integer

Sheets("Sheet1").Range("a1:z10000").ClearContents

For NumPage = 1 To 4

    URL = "https://www.worldathletics.org/records/toplists/sprints/100-metres/outdoor/men/senior/2020?page=" & NumPage & ""
    
    XMLPage.Open "Get", URL, False
    XMLPage.setRequestHeader "Content-Type", "text/xml"
    
    XMLPage.send
    
    HTMLDoc.body.innerHTML = XMLPage.responseText
    
    Set Records = HTMLDoc.getElementById("toplists").getElementsByTagName("table")(0).getElementsByTagName("tbody")(0).getElementsByTagName("tr")
    
    For Each Record In Records

        Set HTMLIms = Record.getElementsByTagName("td")
    
            For Each HTMLIm In HTMLIms
        
                Sheets("Sheet1").Cells(RowNum, 1).Value = HTMLIms.Item(0).innerText
                Sheets("Sheet1").Cells(RowNum, 2).Value = HTMLIms.Item(1).innerText
                Sheets("Sheet1").Cells(RowNum, 3).Value = HTMLIms.Item(2).innerText
                Sheets("Sheet1").Cells(RowNum, 4).Value = HTMLIms.Item(3).innerText
                Sheets("Sheet1").Cells(RowNum, 5).Value = HTMLIms.Item(4).innerText
                Sheets("Sheet1").Cells(RowNum, 6).Value = HTMLIms.Item(5).innerText
                Sheets("Sheet1").Cells(RowNum, 7).Value = HTMLIms.Item(6).innerText
                Sheets("Sheet1").Cells(RowNum, 9).Value = HTMLIms.Item(8).innerText
                Sheets("Sheet1").Cells(RowNum, 10).Value = HTMLIms.Item(9).innerText
                Sheets("Sheet1").Cells(RowNum, 11).Value = HTMLIms.Item(10).innerText
    
            Next HTMLIm
            
            RowNum = RowNum + 1
    Next Record

Next NumPage

End Sub

但是當我想插入一個代碼來獲取運動員的 id 時我遇到了問題(這也可以單獨使用):

'Athletes' codes
RowNum = 1
Set HTMLIms = HTMLDoc.getElementsByTagName("a")
    
For Each HTMLIm In HTMLIms
    If Left(HTMLIm.getAttribute("href"), 24) = "about:/athletes/athlete=" Then
    Sheets("Sheet1").Cells(RowNum, 12).Value = Right(HTMLIm.getAttribute("href"), Len(HTMLIm.getAttribute("href")) - _
    (InStr(HTMLIm.getAttribute("href"), "=")))
    RowNum = RowNum + 1
    End If
Next HTMLIm

有人可以幫我先了解如何在其中插入第二個代碼嗎?

提前致謝。

嘗試將 ID 號包含在一個 go 中。 為了讓您在第 3 列中的不同數字旁邊獲得+-號,我使用了一個小技巧。您可以合並 rest,因為我在此處粘貼了相關部分。

部分合並代碼:

Sub WebScraping()
    Const Url As String = "https://www.worldathletics.org/records/toplists/sprints/100-metres/outdoor/men/senior/2020?page="
    Dim XMLPage As New XMLHTTP60, HTMLDoc As New HTMLDocument
    Dim Record As Object, NumPage As Integer
    Dim RowNum As Integer, Ws As Worksheet, I&
    
    Set Ws = ThisWorkbook.Worksheets("Sheet1")

    For NumPage = 1 To 2
        XMLPage.Open "Get", Url & NumPage, False
        XMLPage.setRequestHeader "Content-Type", "text/xml"
        XMLPage.send
        HTMLDoc.body.innerHTML = XMLPage.responseText
    
        For Each Record In HTMLDoc.getElementsByTagName("table")(0).getElementsByTagName("tbody")(0).getElementsByTagName("tr")
            RowNum = RowNum + 1: Ws.Cells(RowNum, 1).Value = Record.getElementsByTagName("td").Item(0).innerText
            Ws.Cells(RowNum, 2).Value = Record.getElementsByTagName("td").Item(1).innerText
            Ws.Cells(RowNum, 3).Value = "'" & Record.getElementsByTagName("td").Item(2).innerText
            Ws.Cells(RowNum, 4).Value = Record.getElementsByTagName("td").Item(3).innerText
            On Error Resume Next
            Ws.Cells(RowNum, 5).Value = Split(Record.getElementsByTagName("td").Item(3).getElementsByTagName("a")(0).getAttribute("href"), "=")(1)
            On Error GoTo 0
        Next Record
    Next NumPage
End Sub

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM