簡體   English   中英

使用宏將數據從網站抓取到 Excel...丟失

[英]Scraping data from website to Excel using a macro...lost

我對此完全陌生,但這是我的范圍。 我正在運行一個宏來從業務系統中提取數據。 提取此信息后,我想要一個宏來獲取某些字段,將它們放入網站表單中,單擊提交,然后將某些數據結果刮回並粘貼回 excel。 一切正常,減去刮取並粘貼回 excel。

請幫忙!

我搜索了整個堆棧溢出並觀看了視頻以試圖弄清楚我需要做什么,但我一定是誤解了一些東西。

Sub Track()
Range("B2").Select

'This should call to PT and deliver tracking info

Dim IE As Object
Dim tbl As Object, td As Object



 Set IE = CreateObject("InternetExplorer.Application") 'Set IEapp = 
 InternetExplorer
 IE.Visible = True

      IE.Navigate "https://www.partstown.com/track-my-order"
      With IEapp
          Do
          DoEvents
          Loop Until IE.readyState = 4



'Input PO and zip
 Call IE.Document.getElementById("orderNo").SetAttribute("value", 
 "4500969111")
'ActiveCell.Offset(0, 2).Select
 Call IE.Document.getElementById("postalCode").SetAttribute("value", 
 "37040")
 IE.Document.forms(7).Submit

 Application.Wait Now + TimeValue("00:00:09")

'this is where i am stuck. I know this isnt right but tried to piece it 
 together
 Set elemCollection = IE.Document.getelElementsByTagname("table.account- 
 table details _tc_table_highlighted")

 For t = 0 To (elemCollection.Length - 1)
 For r = 0 To (elemCollection(t).Rows.Length - 1)
    For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
 ThisWorkbook.Worksheets(1).Cells(r + 1, c + 1) = 
 elemCollection(t).Rows.Cells(c).innertext
 Next c
 Next r
 Next t

 End With


 End Sub

這是我想要它拉什么:航運列 QTY 已訂購 QTY 已發貨 產品 並以線性方式顯示:Shipping、QTY 已訂購、QTY 已發貨、產品

IE瀏覽器:

我把它做得比平時更冗長,所以你可以看到每一步。

關鍵事項:

1) 正確的頁面加載等待While .Busy Or .readyState < 4: DoEvents: Wend

2) 盡可能按 id 選擇元素。 #是一個 css id 選擇器 css 選擇器通過 .document 的querySelector方法應用並檢索頁面中與指定模式匹配的第一個元素

3)需要一個定時循環來等待結果出現

4)訂單數量等信息是一個換行符分隔的字符串。 似乎最容易在這些換行符上拆分,然后通過索引訪問結果數組中的各個項目

5)我根據您的規范將結果排列在一個數組中,然后將該數組一次性寫入工作表

6)“。” .order-history__item-descript--min類選擇器,即返回具有order-history__item-descript--min class的第一個元素

7) [x=y] 是[data-label=Shipping]屬性 = 值選擇器,即返回具有值為Shipping data-label屬性的第一個元素

8) .details-table a 組合使用后代組合.details-table a “”來指定我想要a標簽元素,它的父元素為.details-table

VBA:

Option Explicit

'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub RetrieveInfo()
    Dim ie As InternetExplorer, ele As Object, t As Date
    Const MAX_WAIT_SEC As Long = 5

    Set ie = New InternetExplorer

    With ie
        .Visible = True
        .Navigate2 "https://www.partstown.com/track-my-order"

        While .Busy Or .readyState < 4: DoEvents: Wend

        With .document
            .querySelector("#orderNo").Value = "4500969111"
            .querySelector("#postalCode").Value = "37040"
            .querySelector("#orderLookUpForm").submit  
        End With

        While .Busy Or .readyState < 4: DoEvents: Wend

        Dim shipping As String, order As String, items() As String
        With .document
            t = Timer
            Do
                On Error Resume Next
                Set ele = .querySelector("[data-label=Shipping]")
                On Error GoTo 0
                If Timer - t > MAX_WAIT_SEC Then Exit Do
            Loop While ele Is Nothing

            If ele Is Nothing Then Exit Sub

            shipping = ele.innerText
            order = .querySelector(".order-history__item-descript--min").innerText
            items = Split(order, vbNewLine)

            Dim qtyOrdered As Long, qtyShipped As String, product As String

            qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
            qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
            product = .querySelector(".details-table a").Title

            Dim results()
            results = Array(shipping, qtyOrdered, qtyShipped, product)
            ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(1, UBound(results) + 1) = results

        End With
        .Quit
    End With
End Sub

如果不熟悉 HTML,請查看:

https://developer.mozilla.org/en-US/docs/Web/HTML

如果不熟悉 css 選擇器,請查看:

https://flukeout.github.io/


XMLHTTP:

整個事情也可以用XHR來完成。 這比打開瀏覽器要快得多。

XHR:

使用 XMLHttpRequest (XHR) 對象與服務器交互。 您可以從 URL 檢索數據,而無需進行整頁 [render]

在這種情況下,我對登錄頁面執行初始GET請求以檢索CSRFToken ,以便在您手動輸入數據並按提交時重新制定頁面向服務器發出的POST請求。 您可以在服務器響應中獲得所需的數據。 我在 POST 發送行的正文中傳遞一個查詢字符串.send "orderNo=4500969111&postalCode=37040&CSRFToken=" & csrft ; 你可以在那里看到你的參數。

Option Explicit
Public Sub GetInfo()
    Dim html As HTMLDocument, csrft As String  '<  VBE > Tools > References > Microsoft HTML Object Library
    Set html = New HTMLDocument

    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.partstown.com", False
        .send

        html.body.innerHTML = .responseText

        csrft = html.querySelector("[name=CSRFToken]").Value

        .Open "POST", "https://www.partstown.com/track-my-order", False
        .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
        .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
        .setRequestHeader "Accept-Encoding", "gzip, deflate"
        .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
        .send "orderNo=4500969111&postalCode=37040&CSRFToken=" & csrft

        html.body.innerHTML = .responseText
    End With

    Dim shipping As String, order As String, items() As String

    shipping = html.querySelector("[data-label=Shipping]").innerText
    order = html.querySelector(".order-history__item-descript--min").innerText
    items = Split(order, vbNewLine)

    Dim qtyOrdered As Long, qtyShipped As String, product As String

    qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
    qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
    product = html.querySelector(".details-table a").Title

    Dim results()
    results = Array(shipping, qtyOrdered, qtyShipped, product)
    ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(1, UBound(results) + 1) = results
End Sub

循環示例:

Option Explicit

Public Sub GetInfo()
    Dim html As HTMLDocument, csrft As String, lastRow As Long, sourceValues() '<  VBE > Tools > References > Microsoft HTML Object Library
    Set html = New HTMLDocument
    Dim ws As Worksheet, i As Long
    Set ws = ThisWorkbook.Worksheets("Sheet4")
    lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
    sourceValues = ws.Range("B2:D" & lastRow).Value
    Dim results()
    ReDim results(1 To UBound(sourceValues, 1), 1 To 4)
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.partstown.com", False
        .send
        html.body.innerHTML = .responseText

        csrft = html.querySelector("[name=CSRFToken]").Value
        Stop
        For i = LBound(sourceValues, 1) To UBound(sourceValues, 1)
            If sourceValues(i, 1) <> vbNullString And sourceValues(i, 3) <> vbNullString Then
                DoEvents
                .Open "POST", "https://www.partstown.com/track-my-order", False
                .setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
                .setRequestHeader "User-Agent", "Mozilla/5.0"
                .setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
                .setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
                .setRequestHeader "Accept-Encoding", "gzip, deflate"
                .setRequestHeader "Accept-Language", "en-US,en;q=0.9"
                .send "orderNo=" & sourceValues(i, 1) & "&postalCode=" & sourceValues(i, 3) & "&CSRFToken=" & csrft

                html.body.innerHTML = .responseText

                Dim shipping As String, order As String, items() As String

                shipping = html.querySelector("[data-label=Shipping]").innerText
                order = html.querySelector(".order-history__item-descript--min").innerText
                items = Split(order, vbNewLine)

                Dim qtyOrdered As Long, qtyShipped As String, product As String

                qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
                qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
                product = html.querySelector(".details-table a").Title

                results(i, 1) = shipping
                results(i, 2) = qtyOrdered
                results(i, 3) = qtyShipped
                results(i, 4) = product
            End If
            'Application.Wait Now + TimeSerial(0, 0, 1)
        Next
    End With
    'results written out from row 2 column E
    ws.Cells(2, 5).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM