繁体   English   中英

使用Excel 2010 VBA从XML文档获取数据,如果存在节点,则节点长度返回0

[英]Getting data from XML doc using Excel 2010 VBA, node length returns 0 when node is present

我正在尝试开发一个VBA脚本,该脚本从Excel 2010电子表格中获取ISSN期刊名称,并查询Sherpa / Romeo Web服务(版权政策数据库)。 然后,使用每个标题的自存档策略填充电子表格。

尽管它适用于我的测试电子表格中的大多数行,但似乎无法获取大量期刊标题的信息。 巧合的是,它们都是一个发布者,我不确定它会不会有所作为,因为它是XML。 我认为我的代码肯定有问题,但是当存在日记标记时,我无法确定为什么它认为“ resp.getElementsByTagName(“ journal”)。Length“为0。 这是未按预期方式运行的ISSN的XML结果的链接 我试图将其与可行的结果进行比较,我看不出有什么区别。

我是这一切的新手,任何提示将不胜感激。 我希望有人能帮帮忙 :)

这是ISSN的列表,我在带有“ journal”标签的数据旁边添加了**,但未提取。

0387-7604 **
1864-7782
1932-6203
0927-5568
0302-9743
1687-725X
0302-9743
0387-7604
0302-9743
1871-5192 **
1448-0220
1550-2783
1466-853X **
1438-4639 **
1642-431X
0142-0615
0096-140X
1746-1391
1096-3480
1065-9471
0260-2938
1055-9965
1084-8045 **

我的代码:

Private Sub btnCopyright_Click()
Dim wsISSN As Worksheet
Set wsISSN = ThisWorkbook.Sheets("ISSN_2")
Dim ISSN As String
Dim URL As String
Dim baseURL As String
baseURL = "http://www.sherpa.ac.uk/romeo/api29.php?" 'I removed my API key' 

Dim i As Integer
Dim Last As Integer

i = 1
Last = wsISSN.Range("D6000").End(xlUp).Row
If Last = 1 Then Exit Sub

For i = 2 To Last 'second row to last row'
    Dim req As New xmlhttp
    ISSN = Cells(i, 4).Value

    If ISSN = "Invalid" Or ISSN = "" Then
        GoTo skipISSN
    End If

    URL = baseURL & "&issn=" & ISSN
    req.Open "GET", URL, False
    req.Send
    Debug.Print (req.ResponseText)

    Dim resp As New DOMDocument
    resp.LoadXML req.ResponseText
    Debug.Print (resp.getElementsByTagName("journal").Length)

    If resp.getElementsByTagName("journal").Length = 0 Then
        Cells(i, 5).Value = "unknown"
        Cells(i, 6).Value = "unknown"
        Cells(i, 7).Value = "unknown"
        Cells(i, 8).Value = "unknown"
        Cells(i, 9).Value = "unknown"
        Cells(i, 10).Value = "unknown"
        Cells(i, 11).Value = "unknown"
        GoTo skipISSN
    End If

        Dim preprint As String
        Dim preRest As String

        Debug.Print (resp.getElementsByTagName("prearchiving").Length)
        If resp.getElementsByTagName("prearchiving").Length = 0 Then
            Cells(i, 5).Value = "-"
        Else
            preprint = resp.SelectSingleNode("//preprints/prearchiving").Text
            If preprint = "can" Then
                Cells(i, 5).Value = "Yes"
            ElseIf preprint = "restricted" Then
                Cells(i, 5).Value = "restricted"
            Else
                Cells(i, 5).Value = "unknown"
            End If
        End If

        'any restrictions for archiving preprint?'
        Debug.Print (resp.getElementsByTagName("prerestrictions").Length)
        If resp.getElementsByTagName("prerestrictions").Length = 0 Then
            Cells(i, 6).Value = "-"
        Else
            preRest = resp.SelectSingleNode("//preprints/prerestrictions").Text
            Debug.Print (preRest)
            If preRest <> "" Then
                Cells(i, 6).Value = preRest
            Else
                Cells(i, 6).Value = "none"
            End If
        End If

        'is postprint allowed?'
        Dim postprint As String
        Dim postRest As String
        Debug.Print (resp.getElementsByTagName("postarchiving").Length)
        If resp.getElementsByTagName("postarchiving").Length = 0 Then
            Cells(i, 7).Value = "-"
        Else
            postprint = resp.SelectSingleNode("//postprints/postarchiving").Text
            If postprint = "can" Then
                Cells(i, 7).Value = "Yes"
            ElseIf postprint = "restricted" Then
                Cells(i, 7).Value = "restricted"
            Else
                Cells(i, 7).Value = "unknown"
            End If
        End If

        'any restrictions for archiving postprint?'
        Debug.Print (resp.getElementsByTagName("postrestrictions").Length)
        If resp.getElementsByTagName("postrestrictions").Length = 0 Then
            Cells(i, 8).Value = "-"
        Else
            postRest = resp.SelectSingleNode("//postprints/postrestrictions").Text
            Debug.Print (postRest)
            If postRest <> "" Then
                Cells(i, 8).Value = postRest
            Else
                Cells(i, 8).Value = "none"
            End If
        End If

        'is publishers version allowed?'

        Dim allCond As String

        Debug.Print (resp.getElementsByTagName("condition").Length)
        If resp.getElementsByTagName("condition").Length = 0 Then
            Cells(i, 9).Value = "-"
            Cells(i, 10).Value = "-"
            Cells(i, 11).Value = "-"
        Else
            allCond = resp.SelectSingleNode("//conditions").Text
            Debug.Print (allCond)
                If InStr(allCond, "embargo") > 0 Then
                    Cells(i, 9).Value = "maybe"
                    Cells(i, 10).Value = "yes"
                    Cells(i, 11).Value = allCond
                ElseIf InStr(allCond, "Publisher's version/PDF may be used") = 0 Then
                    Cells(i, 9).Value = "no"
                    Cells(i, 10).Value = "-"
                    Cells(i, 11).Value = allCond
                ElseIf InStr(allCond, "Publisher's version/PDF may be used") > 0 Then
                    Cells(i, 9).Value = "yes"
                    Cells(i, 10).Value = "-"
                    Cells(i, 11).Value = allCond
                ElseIf allCond = "" Then
                    Cells(i, 9).Value = "-"
                    Cells(i, 10).Value = "-"
                    Cells(i, 11).Value = "-"
                Else
                    Cells(i, 9).Value = "-"
                    Cells(i, 10).Value = "-"
                    Cells(i, 11).Value = allCond
                End If
            End If

skipISSN:

    Next i
End Sub

要解决这个特殊的无效编码问题,请尝试修改以下代码:

Sub foo()

Dim URL As String
' access key removed - see link in question for value to use
URL = "http://sherpa.ac.uk/romeo/api29.php?ak=...&issn=0387-7604"

Dim req As New XMLHTTP60
req.Open "GET", URL, False
req.send

Dim resp As New DOMDocument60
resp.validateOnParse = False
resp.setProperty "ProhibitDTD", False

resp.loadXML StrConv(req.responseBody, vbUnicode)
Debug.Print resp.getElementsByTagName("journal").Item(0).XML

Set resp = Nothing
Set req = Nothing

End Sub

重要部分如下:

  • 使用“微软XML,V6.0”对象- XMLHTTP60DOMDocument60 -而不是3.0的对象- XMLHTTPDOMDocument
  • 由于使用DOMDocument60 ,因此需要显式允许DTD。 另外,关闭validateOnParse
  • 而不是使用XMLHTTP60对象中的ResponseTextXMLHTTP60使用responseBody (字节数组),然后使用StrConv将系统的默认代码页转换为Unicode

此代码也应适用于问题中给出的代码已成功检索到的ISSN。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM