简体   繁体   English

节点js xpath解析html表

[英]node js xpath parsing html table

I would like to use xpath node js for from a html file to retrieve the information as a variable to use later the information to be in the form of several table I try this little piece of code but I The following error. 我想使用xpath节点js从html文件中检索信息作为变量,以供以后使用信息以多个表的形式使用,我尝试了这小段代码,但出现以下错误。 I put all the code and the attached file : 我把所有的代码和附件:

titre 滴度

titre 滴度

<p>
    <FONT COLOR="blue" SIZE='+2'>
        <B><U>
   blabkbkba </U></B>
    </FONT>
</p>


<p>
    <FONT COLOR="red" SIZE='+2'>
        <B> font </B>
    </FONT>
</p>

<TABLE BORDER="1" CELLPADDING="3" CELLSPACING="0">
    <TR bgcolor="#DCDCDC">
        <TD><B> info1  </B></TD><TD><B> info2 </B></TD><TD><B> info3 </B></TD>
        <TD><B> info4  </B></TD><TD><B> info4 </B></TD><TD><B> info5 </B></TD>
        <TD><B> info6  </B></TD><TD><B> info7 </B></TD><TD><B> info7 </B></TD>
        <TD><B> info8  </B></TD><TD><B> info9 </B></TD><TD><B> info10</B></TD>
        <TD><B> info11 </B></TD><TD><B> info12</B></TD>
    </TR>

    <TR bgcolor="#B0C4DE">
        <TD><B> info11  </B></TD><TD><B> info21 </B></TD><TD><B> info31 </B></TD>
        <TD><B> info41  </B></TD><TD><B> info41 </B></TD><TD><B> info51 </B></TD>
        <TD><B> info61  </B></TD><TD><B> info71 </B></TD><TD><B> info71 </B></TD>
        <TD><B> info81  </B></TD><TD><B> info91 </B></TD><TD><B> info101</B></TD>
        <TD><B> info111 </B></TD><TD><B> info121</B></TD>
    </TR>

</TABLE>
<TABLE BORDER="1" CELLPADDING="3" CELLSPACING="0">
    <TR bgcolor="#DCDCDC">
        <TD><B> info1  </B></TD><TD><B> info2 </B></TD><TD><B> info3 </B></TD>
        <TD><B> info4  </B></TD><TD><B> info4 </B></TD><TD><B> info5 </B></TD>
        <TD><B> info6  </B></TD><TD><B> info7 </B></TD><TD><B> info7 </B></TD>
        <TD><B> info8  </B></TD><TD><B> info9 </B></TD><TD><B> info10</B></TD>
        <TD><B> info11 </B></TD><TD><B> info12</B></TD>
    </TR>

    <TR bgcolor="#B0C4DE">
        <TD><B> info11  </B></TD><TD><B> info21 </B></TD><TD><B> info31 </B></TD>
        <TD><B> info41  </B></TD><TD><B> info41 </B></TD><TD><B> info51 </B></TD>
        <TD><B> info61  </B></TD><TD><B> info71 </B></TD><TD><B> info71 </B></TD>
        <TD><B> info81  </B></TD><TD><B> info91 </B></TD><TD><B> info101</B></TD>
        <TD><B> info111 </B></TD><TD><B> info121</B></TD>
    </TR>

</TABLE>
<TABLE BORDER="1" CELLPADDING="3" CELLSPACING="0">
    <TR bgcolor="#DCDCDC">
        <TD><B> info1  </B></TD><TD><B> info2 </B></TD><TD><B> info3 </B></TD>
        <TD><B> info4  </B></TD><TD><B> info4 </B></TD><TD><B> info5 </B></TD>
        <TD><B> info6  </B></TD><TD><B> info7 </B></TD><TD><B> info7 </B></TD>
        <TD><B> info8  </B></TD><TD><B> info9 </B></TD><TD><B> info10</B></TD>
        <TD><B> info11 </B></TD><TD><B> info12</B></TD>
    </TR>

    <TR bgcolor="#B0C4DE">
        <TD><B> info11  </B></TD><TD><B> info21 </B></TD><TD><B> info31 </B></TD>
        <TD><B> info41  </B></TD><TD><B> info41 </B></TD><TD><B> info51 </B></TD>
        <TD><B> info61  </B></TD><TD><B> info71 </B></TD><TD><B> info71 </B></TD>
        <TD><B> info81  </B></TD><TD><B> info91 </B></TD><TD><B> info101</B></TD>
        <TD><B> info111 </B></TD><TD><B> info121</B></TD>
    </TR>

</TABLE>

<p>
    <a href="info.jsp">info</a>
</p>

Here is the code that I use to try in a first rtmeps to recover the td contents can you help me understand my mistake: 这是我用来尝试第一个rtmeps来恢复td内容的代码,您可以帮助我理解我的错误:

var  fs        = require('fs'),
     xpath     = require('xpath'),
     dom       = require('xmldom').DOMParser,
     promise   = require('promise');

function loadDistantFile (url) {
  return new Promise(function (resolve, reject) {
    fs.readFile(url, 'utf8', function (err, data) {
      if (err) {
        reject(err);
      } else {
        resolve(data);
      }
    });
  });
}

loadDistantFile('./CrewSchedulejspShort.htm').then(function (content) {
    var doc = new dom().parseFromString(content);
    var nodes = xpath.select("//TABLE", doc);

    for (i = 0; i < nodes.length; i++) {
    var tr     = xpath.select(nodes[i], "//TR").data;
    var td     = xpath.select(tr[0], "//TD"[0]).data;
    console.log('td = '+td);
}
console.log(nodes[1].toString());  
    //console.log("Node: " + nodes[0].toString())
}).catch(function (err) {
    console.error('Erreur !');
    console.dir(err);
});

i have this response : 我有这个回应:

Erreur ! Erreur!

Error: XPath parse error
    at XPathParser.parse (/Users/user/Desktop/lire html/node_modules/xpath/xpath.js:1185:11)
    at new XPathExpression (/Users/user/Desktop/lire html/node_modules/xpath/xpath.js:4278:17)
    at Object.exports.selectWithResolver (/Users/user/Desktop/lire html/node_modules/xpath/xpath.js:4699:19)
    at Object.exports.select (/Users/user/Desktop/lire html/node_modules/xpath/xpath.js:4682:17)
    at /Users/user/Desktop/lire html/app.js:31:24
user:lire html user$ 

The error occurs inside your for loop where you try to use your xpath expressions to extract the data. 该错误发生在您的for循环内,在for循环中您尝试使用xpath表达式提取数据。

What you have currently is, 您目前拥有的是

for (i = 0; i < nodes.length; i++) {
    var tr     = xpath.select(nodes[i], "//TR").data;
    var td     = xpath.select(tr[0], "//TD"[0]).data;
    console.log('td = '+td);
}

You should change it as, 您应该将其更改为

for (i = 0; i < nodes.length; i++) {
    var tr     = xpath.select("//TR",nodes[i]);
    var td     = xpath.select("//TD",nodes[i]);
    console.log('td = '+td);
}

Here is the working code for your reference 这是工作代码供您参考

var  fs        = require('fs'),
     xpath     = require('xpath'),
     dom       = require('xmldom').DOMParser,
     promise   = require('promise');

function loadDistantFile (url) {
  return new Promise(function (resolve, reject) {
    fs.readFile(url, 'utf8', function (err, data) {
      if (err) {
        reject(err);
      } else {
        resolve(data);
      }
    });
  });
}

loadDistantFile('./CrewSchedulejspShort.htm').then(function (content) {
    var doc = new dom().parseFromString(content);
    var nodes = xpath.select("//TABLE", doc);

    for (i = 0; i < nodes.length; i++) {
      //console.log(nodes[i]);

    /*var tr     = xpath.select(nodes[i], "//TR").data;
    var td     = xpath.select(tr[0], "//TD"[0]).data;*/

    var tr     = xpath.select("//TR",nodes[i]);
    var td     = xpath.select("//TD",nodes[i]);
    console.log('td = '+td);
}
//console.log(nodes[1].toString());  
    //console.log("Node: " + nodes[0].toString())
}).catch(function (err) {
    console.error('Erreur !');
    console.dir(err);
});

Hope this helps!. 希望这可以帮助!。

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM