简体   繁体   中英

VBA - Extract data from html to excel

I am having a problem with coding. I used excel VBA to extract data from a webpage to excel. The webpage is https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en Click "All transaction" and it displays a second table at the bottom of the first table. I would like to extract data from the bottom table (not the top one).

Here is the code:

Sub PropertyTransactions()
Dim ieObj As InternetExplorer
Dim htmlEle As IHTMLElement
Dim i As Integer

i = 1

Set ieObj = New InternetExplorer
ieObj.Visible = True
ieObj.navigate "https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en"

Application.Wait Now + TimeValue("00:00:05")

For Each htmlEle In ieObj.document.getElementsByClassName("tablesorter")(0).getElementsByTagName("tr")
Set htmlEle = ActiveDocument.all.tags("head").Item(0)
    With ActiveSheet
        .Range("A" & i).Value = htmlEle.Children(0).textContent
        .Range("B" & i).Value = htmlEle.Children(1).textContent
        .Range("C" & i).Value = htmlEle.Children(2).textContent
        .Range("D" & i).Value = htmlEle.Children(3).textContent
        .Range("E" & i).Value = htmlEle.Children(4).textContent
        .Range("F" & i).Value = htmlEle.Children(5).textContent
        .Range("G" & i).Value = htmlEle.Children(6).textContent
        .Range("H" & i).Value = htmlEle.Children(7).textContent
        .Range("I" & i).Value = htmlEle.Children(8).textContent
        .Range("J" & i).Value = htmlEle.Children(9).textContent
    End With

    i = i + 1
Next htmlEle

End Sub

However, there is an error in this line:
For Each htmlEle In ieObj.document.getElementsByClassName("tablesorter")(0).getElementsByTagName("tr")

It displays a run-time error 91: object variable or with block variable not set. How can I fix it? Thank you very much!

I use late binding but that doesn't matter. Please read the comments in the macro:

Sub PropertyTransactions()

  Dim url As String
  Dim browser As Object
  Dim nodeTransactionTab As Object
  Dim nodeTransactionTable As Object
  Dim nodesHeaderAll As Object
  Dim nodeHeaderOne As Object
  Dim nodesTrAll As Object
  Dim nodeTrOne As Object
  Dim nodesTdAll As Object
  Dim nodeTdOne As Object
  Dim currentRow As Long
  Dim currentColumn As Long

  'Initialize variables
  currentRow = 1
  currentColumn = 1
  url = "https://proptx.midland.com.hk/utx/index.jsp?est_id=E12837&lang=en"

  'Initialize Internet Explorer, set visibility,
  'call URL and wait until page is fully loaded
  Set browser = CreateObject("internetexplorer.application")
  browser.Visible = True 'You can set this to False to make the IE invisible
  browser.navigate url
  Do Until browser.ReadyState = 4: DoEvents: Loop

  'First we must click the 'All Transaction' tab to load the table you want
  'The tab works like a button and has the id 'tx_record_3'
  'The nature of an id is that it is unique
  'With the following lines you can try to get elements by them ids
  'The method getElementByID() doesn't build a node collection
  'We switch off all runtime errors when we use it because we get
  'a runtime error if there is nor element with that id
  On Error Resume Next
  Set nodeTransactionTab = browser.document.getElementByID("tx_record_3")
  On Error GoTo 0

  'Now we check if we have an html element 'nodeTransactionTab'
  If Not nodeTransactionTab Is Nothing Then
    'Click the tab and give a short break to load the transaction table
    nodeTransactionTab.Click
    Application.Wait Now + TimeValue("00:00:02")

    'Now we can get the transaction table
    'You try that with the css class 'tablesorter' but the table has also an id
    '(The CSS class 'tablesorter' can also only be found after the transaction
    'table has been loaded)
    On Error Resume Next
    Set nodeTransactionTable = browser.document.getElementByID("Tx_hist_table")
    On Error GoTo 0
  Else
    'The object nodeTransactionTab couldn't be build
    MsgBox "No transaction tab found"
  End If

  'To avoid deep if nesting, we can check at this point whether the transaction
  'table was found, because an object is nothing by default
  If Not nodeTransactionTable Is Nothing Then
    'We trust the header is present
    Set nodesHeaderAll = nodeTransactionTable.getElementsByTagName("th")

    'Get header of transaction table
    For Each nodeHeaderOne In nodesHeaderAll
      ActiveSheet.Cells(currentRow, currentColumn).Value = Trim(nodeHeaderOne.innertext)
      currentColumn = currentColumn + 1
    Next nodeHeaderOne

    'Prepare next line
    currentRow = currentRow + 1
    currentColumn = 1

    'We trust the values are present too
    'Get all value table rows
    Set nodesTrAll = nodeTransactionTable.getElementsByTagName("tbody")(0).getElementsByTagName("tr")

    'Get one row of values after the other from transaction table
    For Each nodeTrOne In nodesTrAll
      'Get all cells of next row
      Set nodesTdAll = nodeTrOne.getElementsByTagName("td")

      'Get values cell by cell
      For Each nodeTdOne In nodesTdAll
        ActiveSheet.Cells(currentRow, currentColumn).Value = Trim(nodeTdOne.innertext)
        currentColumn = currentColumn + 1
      Next nodeTdOne
    'Prepare next line
    currentRow = currentRow + 1
    currentColumn = 1
    Next nodeTrOne
  Else
    'The object nodeTransactionTable couldn't be build
    MsgBox "No transaction table found"
  End If

  'Clean up
  browser.Quit
  Set browser = Nothing
  Set nodeTransactionTab = Nothing
  Set nodesHeaderAll = Nothing
  Set nodeHeaderOne = Nothing
  Set nodesTrAll = Nothing
  Set nodeTrOne = Nothing
  Set nodesTdAll = Nothing
  Set nodeTdOne = Nothing
End Sub

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM