[英]Scraping data from website to Excel using a macro...lost
我對此完全陌生,但這是我的范圍。 我正在運行一個宏來從業務系統中提取數據。 提取此信息后,我想要一個宏來獲取某些字段,將它們放入網站表單中,單擊提交,然后將某些數據結果刮回並粘貼回 excel。 一切正常,減去刮取並粘貼回 excel。
請幫忙!
我搜索了整個堆棧溢出並觀看了視頻以試圖弄清楚我需要做什么,但我一定是誤解了一些東西。
Sub Track()
Range("B2").Select
'This should call to PT and deliver tracking info
Dim IE As Object
Dim tbl As Object, td As Object
Set IE = CreateObject("InternetExplorer.Application") 'Set IEapp =
InternetExplorer
IE.Visible = True
IE.Navigate "https://www.partstown.com/track-my-order"
With IEapp
Do
DoEvents
Loop Until IE.readyState = 4
'Input PO and zip
Call IE.Document.getElementById("orderNo").SetAttribute("value",
"4500969111")
'ActiveCell.Offset(0, 2).Select
Call IE.Document.getElementById("postalCode").SetAttribute("value",
"37040")
IE.Document.forms(7).Submit
Application.Wait Now + TimeValue("00:00:09")
'this is where i am stuck. I know this isnt right but tried to piece it
together
Set elemCollection = IE.Document.getelElementsByTagname("table.account-
table details _tc_table_highlighted")
For t = 0 To (elemCollection.Length - 1)
For r = 0 To (elemCollection(t).Rows.Length - 1)
For c = 0 To (elemCollection(t).Rows(r).Cells.Length - 1)
ThisWorkbook.Worksheets(1).Cells(r + 1, c + 1) =
elemCollection(t).Rows.Cells(c).innertext
Next c
Next r
Next t
End With
End Sub
這是我想要它拉什么:航運列 QTY 已訂購 QTY 已發貨 產品 並以線性方式顯示:Shipping、QTY 已訂購、QTY 已發貨、產品
我把它做得比平時更冗長,所以你可以看到每一步。
關鍵事項:
1) 正確的頁面加載等待While .Busy Or .readyState < 4: DoEvents: Wend
2) 盡可能按 id 選擇元素。 #
是一個 css id 選擇器。 css 選擇器通過 .document 的querySelector
方法應用並檢索頁面中與指定模式匹配的第一個元素
3)需要一個定時循環來等待結果出現
4)訂單數量等信息是一個換行符分隔的字符串。 似乎最容易在這些換行符上拆分,然后通過索引訪問結果數組中的各個項目
5)我根據您的規范將結果排列在一個數組中,然后將該數組一次性寫入工作表
6)“。” 是.order-history__item-descript--min
的類選擇器,即返回具有order-history__item-descript--min
class
的第一個元素
7) [x=y] 是[data-label=Shipping]
的屬性 = 值選擇器,即返回具有值為Shipping
data-label
屬性的第一個元素
8) .details-table a
組合使用后代組合.details-table a
“”來指定我想要a
標簽元素,它的父元素為.details-table
VBA:
Option Explicit
'VBE > Tools > References:
' Microsoft Internet Controls
Public Sub RetrieveInfo()
Dim ie As InternetExplorer, ele As Object, t As Date
Const MAX_WAIT_SEC As Long = 5
Set ie = New InternetExplorer
With ie
.Visible = True
.Navigate2 "https://www.partstown.com/track-my-order"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#orderNo").Value = "4500969111"
.querySelector("#postalCode").Value = "37040"
.querySelector("#orderLookUpForm").submit
End With
While .Busy Or .readyState < 4: DoEvents: Wend
Dim shipping As String, order As String, items() As String
With .document
t = Timer
Do
On Error Resume Next
Set ele = .querySelector("[data-label=Shipping]")
On Error GoTo 0
If Timer - t > MAX_WAIT_SEC Then Exit Do
Loop While ele Is Nothing
If ele Is Nothing Then Exit Sub
shipping = ele.innerText
order = .querySelector(".order-history__item-descript--min").innerText
items = Split(order, vbNewLine)
Dim qtyOrdered As Long, qtyShipped As String, product As String
qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
product = .querySelector(".details-table a").Title
Dim results()
results = Array(shipping, qtyOrdered, qtyShipped, product)
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(1, UBound(results) + 1) = results
End With
.Quit
End With
End Sub
如果不熟悉 HTML,請查看:
https://developer.mozilla.org/en-US/docs/Web/HTML
如果不熟悉 css 選擇器,請查看:
整個事情也可以用XHR來完成。 這比打開瀏覽器要快得多。
使用 XMLHttpRequest (XHR) 對象與服務器交互。 您可以從 URL 檢索數據,而無需進行整頁 [render]
在這種情況下,我對登錄頁面執行初始GET
請求以檢索CSRFToken
,以便在您手動輸入數據並按提交時重新制定頁面向服務器發出的POST
請求。 您可以在服務器響應中獲得所需的數據。 我在 POST 發送行的正文中傳遞一個查詢字符串.send "orderNo=4500969111&postalCode=37040&CSRFToken=" & csrft
; 你可以在那里看到你的參數。
Option Explicit
Public Sub GetInfo()
Dim html As HTMLDocument, csrft As String '< VBE > Tools > References > Microsoft HTML Object Library
Set html = New HTMLDocument
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.partstown.com", False
.send
html.body.innerHTML = .responseText
csrft = html.querySelector("[name=CSRFToken]").Value
.Open "POST", "https://www.partstown.com/track-my-order", False
.setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
.setRequestHeader "Accept-Encoding", "gzip, deflate"
.setRequestHeader "Accept-Language", "en-US,en;q=0.9"
.send "orderNo=4500969111&postalCode=37040&CSRFToken=" & csrft
html.body.innerHTML = .responseText
End With
Dim shipping As String, order As String, items() As String
shipping = html.querySelector("[data-label=Shipping]").innerText
order = html.querySelector(".order-history__item-descript--min").innerText
items = Split(order, vbNewLine)
Dim qtyOrdered As Long, qtyShipped As String, product As String
qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
product = html.querySelector(".details-table a").Title
Dim results()
results = Array(shipping, qtyOrdered, qtyShipped, product)
ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Resize(1, UBound(results) + 1) = results
End Sub
循環示例:
Option Explicit
Public Sub GetInfo()
Dim html As HTMLDocument, csrft As String, lastRow As Long, sourceValues() '< VBE > Tools > References > Microsoft HTML Object Library
Set html = New HTMLDocument
Dim ws As Worksheet, i As Long
Set ws = ThisWorkbook.Worksheets("Sheet4")
lastRow = ws.Cells(ws.rows.Count, "B").End(xlUp).Row
sourceValues = ws.Range("B2:D" & lastRow).Value
Dim results()
ReDim results(1 To UBound(sourceValues, 1), 1 To 4)
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", "https://www.partstown.com", False
.send
html.body.innerHTML = .responseText
csrft = html.querySelector("[name=CSRFToken]").Value
Stop
For i = LBound(sourceValues, 1) To UBound(sourceValues, 1)
If sourceValues(i, 1) <> vbNullString And sourceValues(i, 3) <> vbNullString Then
DoEvents
.Open "POST", "https://www.partstown.com/track-my-order", False
.setRequestHeader "Referer", "https://www.partstown.com/track-my-order"
.setRequestHeader "User-Agent", "Mozilla/5.0"
.setRequestHeader "Content-Type", "application/x-www-form-urlencoded"
.setRequestHeader "Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
.setRequestHeader "Accept-Encoding", "gzip, deflate"
.setRequestHeader "Accept-Language", "en-US,en;q=0.9"
.send "orderNo=" & sourceValues(i, 1) & "&postalCode=" & sourceValues(i, 3) & "&CSRFToken=" & csrft
html.body.innerHTML = .responseText
Dim shipping As String, order As String, items() As String
shipping = html.querySelector("[data-label=Shipping]").innerText
order = html.querySelector(".order-history__item-descript--min").innerText
items = Split(order, vbNewLine)
Dim qtyOrdered As Long, qtyShipped As String, product As String
qtyOrdered = CLng(Replace$(items(0), "Qty Ordered: ", vbNullString))
qtyShipped = CLng(Replace$(items(1), "Qty Shipped: ", vbNullString))
product = html.querySelector(".details-table a").Title
results(i, 1) = shipping
results(i, 2) = qtyOrdered
results(i, 3) = qtyShipped
results(i, 4) = product
End If
'Application.Wait Now + TimeSerial(0, 0, 1)
Next
End With
'results written out from row 2 column E
ws.Cells(2, 5).Resize(UBound(results, 1), UBound(results, 2)) = results
End Sub
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.