[英]C# HTML Tag in a Tag
我有点泡菜。 我要在网站上获取一张图片列表。 我知道该怎么做,但是我必须过滤掉图像的位置。
例如,我想在id为“ theseImages”的div标签中抓取图像,但是另一个div标签在id为“ notTheseImages”的标签中还有另一组图像。 通过标签“ img”将每个标签循环到HtmlElementCollection中将忽略div,因为它还会从“ notTheseImages”中获取图像。
有没有一种方法可以在检查检查那些图像在div标签中的位置时遍历图像?
这可以帮助您选择当前的HTML,并可能在将来使用:)
protected HtmlElement[] GetElementsByParent(HtmlDocument document, HtmlElement baseElement = null, params string[] singleSelectors)
{
if (singleSelectors == null || singleSelectors.Length == 0)
{
throw new Exception("Please give at least 1 selector!");
}
IList<HtmlElement> result = new List<HtmlElement>();
bool last = singleSelectors.Length == 1;
string singleSelector = singleSelectors[0];
if (string.IsNullOrWhiteSpace(singleSelector) || string.IsNullOrWhiteSpace(singleSelector.Trim()))
{
return null;
}
singleSelector = singleSelector.Trim();
if (singleSelector.StartsWith("#"))
{
var item = document.GetElementById(singleSelector.Substring(1));
if (item == null)
{
return null;
}
if (last)
{
result.Add(item);
}
else
{
var results = GetElementsByParent(document, item, singleSelectors.Skip(1).ToArray());
if (results != null && results.Length > 0)
{
foreach (var res in results)
{
result.Add(res);
}
}
}
}
else if (singleSelector.StartsWith("."))
{
if (baseElement == null)
{
baseElement = document.Body;
}
foreach (HtmlElement child in baseElement.Children)
{
string cls;
if (!string.IsNullOrWhiteSpace((cls = child.GetAttribute("class"))))
{
if (cls.Split(' ').Contains(singleSelector.Substring(1)))
{
if (last)
{
result.Add(child);
}
else
{
var results = GetElementsByParent(document, child, singleSelectors.Skip(1).ToArray());
if (results != null && results.Length > 0)
{
foreach (var res in results)
{
result.Add(res);
}
}
}
}
}
}
}
else
{
HtmlElementCollection elements = null;
if (baseElement != null)
{
elements = baseElement.GetElementsByTagName(singleSelector);
}
else
{
elements = document.GetElementsByTagName(singleSelector);
}
foreach (HtmlElement item in elements)
{
if (last)
{
result.Add(item);
}
else
{
var results = GetElementsByParent(document, item, singleSelectors.Skip(1).ToArray());
if (results != null && results.Length > 0)
{
foreach (var res in results)
{
result.Add(res);
}
}
}
}
}
return result.ToArray();
}
private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
// here we can query
var result = GetElementsByParent(webBrowser1.Document, null, "#theseImages", "img");
}
结果将包含#theseImages下的图像
请注意,GetElementsByParent尚未经过测试,我只是针对您的用例进行了测试,似乎还可以。
确定文档完成后,别忘了只开始查询;)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.