[英]jQuery/cheerio get combined HTML between tags
您可以在没有任何库的情况下解析 2 <hr class="cl-right">
之间的 HTML 内容,仅使用常见的字符串操作。 也许这个解决方法会帮助你:
let content = '<!-- Plugin inserted: [begin] -->'
content+= '<div class="news-single-item">'
content+= '<div style="float: right;"></div>'
content+= '<a href="nachrichten/einzelansicht/article/im-dienst.html" title="Neue Schulsanitate im Dienst">Neue Schulsanitater im Dienst</a>'
content+= '<div class="news-single-rightbox"></div>'
content+= '<h1>Exkursion der Literaturkurse</h1>'
content+= '<hr class="cl-right">'
content+= '<div class="news-single-img"></div>'
content+= '<p></p>'
content+= '<p></p>'
content+= '<p>(Sophie Daubenspeck fOr die Literaturk </p>'
content+= '<p> </p>'
content+= '<p> </p>'
content+= '<hr class="cl-right">'
content+= '<div class="news-single-backlink"></div>'
content+= '</div>'
content+= '<!-- Plugin inserted: [end] -->'
content+= '</div>'
const getContentInside = html => {
return html.split('<hr class="cl-right">')[1]
}
console.log(getContentInside(content))
然后,如果您从 URL 请求内容:
const url = 'http://www.example.com/your.desired.url.here'
const request = require ('request-promise')
const getContentInside = html => {
return html.split('<hr class="cl-right">')[1]
}
const startParse = async url => {
const html = await request(url)
const contentHR = getContentInside(html)
console.log(contentHR)
}
startParse(url)
有几种方法。 一种方法是使用nextUntil
:
const html = ` <p>some stuff</p> <hr class="cl-right"> <div class="news-single-img"> want </div> <p>all</p> <p>of</p> <p>this</p> <hr class="cl-right"> <p>some more stuff</p> `; const text = $(html, "hr.cl-right").nextUntil("hr.cl-right").map((i, e) => $(e).text().trim()).get().slice(0, -1); console.log(text);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
如果你想要 HTML 作为一个字符串,你可以使用一个 outerHTML 策略,比如:
const html = ` <p>some stuff</p> <hr class="cl-right"> <div class="news-single-img"> want </div> <p>all</p> <p>of</p> <p>this</p> <hr class="cl-right"> <p>some more stuff</p> `; const result = $(html, "hr.cl-right").nextUntil("hr.cl-right").slice(0, -1).wrapAll("<div>").parent().html(); console.log(result);
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
对于 cheerio,请尝试:
const cheerio = require("cheerio"); // 1.0.0-rc.12
const html = `
<p>some stuff</p>
<hr class="cl-right">
<div class="news-single-img">
want
</div>
<p>all</p>
<p>of</p>
<p>this</p>
<hr class="cl-right">
<p>some more stuff</p>
`;
const $ = cheerio.load(html)
const text = $("hr.cl-right")
.first()
.nextUntil("hr.cl-right")
.map((i, e) => $(e).text().trim())
.get();
console.log(text);
对于 HTML:
const html = `
<p>some stuff</p>
<hr class="cl-right">
<div class="news-single-img">
want
</div>
<p>all</p>
<p>of</p>
<p>this</p>
<hr class="cl-right">
<p>some more stuff</p>
`;
const $ = cheerio.load(html);
const htmlOut = $.html($("hr.cl-right").first().nextUntil("hr.cl-right"));
console.log(htmlOut);
也可以看看:
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.