简体   繁体   English

从 HTML<a>标签</a>中提取数据

[英]Extract data from HTML <a> tags

I am trying to extract data from a web page which contains account data.我正在尝试从包含帐户数据的网页中提取数据。 With help I have managed to extract some info fro the ID attribute and text but still cannot extract other attributes.在帮助下,我设法从 ID 属性和文本中提取了一些信息,但仍然无法提取其他属性。 I need to extract Class, Codigo and Nombre in addition to the info I already get.除了我已经获得的信息之外,我还需要提取 Class、Codigo 和 Nombre。 In my code the Class column comes returns 'Undefined'.在我的代码中,Class 列返回“未定义”。

code ```代码```

var x = document.querySelectorAll("a");

var myarray = []
function isNumeric(str) {
  if (typeof str != "string") return false 
  return !isNaN(str) && 
         !isNaN(parseFloat(str)) 
}
for (var i=0; i<x.length; i++){
var nametext = x[i].textContent;
var cleantext = nametext.replace(/\s+/g, ' ').trim();
var cleanlink = x[i].id;
    if(isNumeric(cleanlink)){
        var classid = x[i].class
        myarray.push([cleantext,classid,cleanlink]);// lots of other stuff on page so only want 
    }                                               // items with numeric id
};
function make_table() {
    var table = '<table><thead><th>Name</th><th>Class</th><th>Id</th></thead><tbody>';
   for (var i=0; i<myarray.length; i++) {
            table += '<tr><td>'+ myarray[i][0] + '</td><td>'+ myarray[i][1]   +'</td><td>'+myarray[i][2]+'</td></tr>';
    };
 
    var w = window.open("");
w.document.write(table); 
}
make_table()
HTML sample from Web page
   
             <tr style="display:none;">
          <td class="text-center" style="color:#116eff;font-size:11px;">
           <span class="glyphicon glyphicon-plus" onclick="PlanCuenta.toggleGrupo(this);" style="cursor:pointer;">
           </span>
          </td>
          <td class="pad-cuenta" style="padding-left:105px;font-weight: bold;">
           <a class="cuenta-group" codigo="1.1.4.3" nombre="Anticipo a Proveedores" data-placement="right" data-popover-content="#popover_content_wrapper" data-title="Menú Opciones" data-trigger="focus" desc="1.1.4.3 Anticipo a Proveedores" href="javascript:void(0);" id="619105" padre="619102" rel="" style="color:black;">
            1.1.4.3 Anticipo a Proveedores
           </a>
          </td>
          
          <td style="text-align:right;color:black;" width="160">
           $565.62
          </td>
          
         </tr>
         
         <tr style="display:none;">
          <td class="text-center" style="color:#116eff;font-size:11px;">
           <span class="" onclick="PlanCuenta.toggleGrupo(this);" style="">
           </span>
          </td>
          <td class="pad-cuenta" style="padding-left:140px;font-style: italic;">
           <a class="cuenta" codigo="1.1.4.3.1" nombre="Narvaez Maria, Antic." data-placement="right" data-popover-content="#popover_content_wrapper" data-title="Menú Opciones" data-trigger="focus" desc="1.1.4.3.1 Narvaez Maria, Antic." href="javascript:void(0);" id="903864" padre="619105" rel="" style="color:black;">
            1.1.4.3.1 Narvaez Maria, Antic.
           </a>
          </td>
    

You need to use the className property of the element, rather than class , which is an HTML attribute.您需要使用元素的className属性,而不是class ,它是一个 HTML 属性。 See MDN className docs .请参阅MDN 类名文档

 var x = document.querySelectorAll("a"); var myarray = [] function isNumeric(str) { if (typeof str != "string") return false return !isNaN(str) && !isNaN(parseFloat(str)) } for (var i = 0; i < x.length; i++) { var nametext = x[i].textContent; var cleantext = nametext.replace(/\s+/g, ' ').trim(); var cleanlink = x[i].id; if (isNumeric(cleanlink)) { var classid = x[i].className myarray.push([cleantext, classid, cleanlink]); // lots of other stuff on page so only want } // items with numeric id }; function make_table() { var table = '<table><thead><th>Name</th><th>Class</th><th>Id</th></thead><tbody>'; for (var i = 0; i < myarray.length; i++) { table += '<tr><td>' + myarray[i][0] + '</td><td>' + myarray[i][1] + '</td><td>' + myarray[i][2] + '</td></tr>'; }; return table; } console.log(make_table());
 <tr style="display:none;"> <td class="text-center" style="color:#116eff;font-size:11px;"> <span class="glyphicon glyphicon-plus" onclick="PlanCuenta.toggleGrupo(this);" style="cursor:pointer;"> </span> </td> <td class="pad-cuenta" style="padding-left:105px;font-weight: bold;"> <a class="cuenta-group" codigo="1.1.4.3" nombre="Anticipo a Proveedores" data-placement="right" data-popover-content="#popover_content_wrapper" data-title="Menú Opciones" data-trigger="focus" desc="1.1.4.3 Anticipo a Proveedores" href="javascript:void(0);" id="619105" padre="619102" rel="" style="color:black;"> 1.1.4.3 Anticipo a Proveedores </a> </td> <td style="text-align:right;color:black;" width="160"> $565.62 </td> </tr> <tr style="display:none;"> <td class="text-center" style="color:#116eff;font-size:11px;"> <span class="" onclick="PlanCuenta.toggleGrupo(this);" style=""> </span> </td> <td class="pad-cuenta" style="padding-left:140px;font-style: italic;"> <a class="cuenta" codigo="1.1.4.3.1" nombre="Narvaez Maria, Antic." data-placement="right" data-popover-content="#popover_content_wrapper" data-title="Menú Opciones" data-trigger="focus" desc="1.1.4.3.1 Narvaez Maria, Antic." href="javascript:void(0);" id="903864" padre="619105" rel="" style="color:black;"> 1.1.4.3.1 Narvaez Maria, Antic. </a> </td> </tr>

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM