[英]Create XML Nodes based on XPath?
有没有人知道从XPath表达式以编程方式创建XML层次结构的现有方法?
例如,如果我有一个XML片段,例如:
<feed>
<entry>
<data></data>
<content></content>
</entry>
</feed>
给定XPath表达式/ feed / entry / content / @ source我会:
<feed>
<entry>
<data></data>
<content @source=""></content>
</entry>
</feed>
我意识到使用XSLT是可能的,但由于我正在尝试完成的动态特性,固定转换将无法工作。
我在C#工作,但如果有人使用其他语言的解决方案,请加入。
谢谢您的帮助!
在您提供的示例中,唯一创建的是属性...
XmlElement element = (XmlElement)doc.SelectSingleNode("/feed/entry/content");
if (element != null)
element.SetAttribute("source", "");
如果您真正想要的是能够创建不存在的层次结构,那么您可以使用自己的简单xpath解析器。 我不知道将属性保留在xpath中。 我宁愿将节点作为一个元素进行投射,并在.SetAttribute上进行操作,就像我在这里所做的那样:
static private XmlNode makeXPath(XmlDocument doc, string xpath)
{
return makeXPath(doc, doc as XmlNode, xpath);
}
static private XmlNode makeXPath(XmlDocument doc, XmlNode parent, string xpath)
{
// grab the next node name in the xpath; or return parent if empty
string[] partsOfXPath = xpath.Trim('/').Split('/');
string nextNodeInXPath = partsOfXPath.First();
if (string.IsNullOrEmpty(nextNodeInXPath))
return parent;
// get or create the node from the name
XmlNode node = parent.SelectSingleNode(nextNodeInXPath);
if (node == null)
node = parent.AppendChild(doc.CreateElement(nextNodeInXPath));
// rejoin the remainder of the array as an xpath expression and recurse
string rest = String.Join("/", partsOfXPath.Skip(1).ToArray());
return makeXPath(doc, node, rest);
}
static void Main(string[] args)
{
XmlDocument doc = new XmlDocument();
doc.LoadXml("<feed />");
makeXPath(doc, "/feed/entry/data");
XmlElement contentElement = (XmlElement)makeXPath(doc, "/feed/entry/content");
contentElement.SetAttribute("source", "");
Console.WriteLine(doc.OuterXml);
}
这是我的快速入侵,只要您使用/configuration/appSettings/add[@key='name']/@value
等格式,也可以创建属性。
static XmlNode createXPath(XmlDocument doc, string xpath)
{
XmlNode node=doc;
foreach (string part in xpath.Substring(1).Split('/'))
{
XmlNodeList nodes=node.SelectNodes(part);
if (nodes.Count>1) throw new ComponentException("Xpath '"+xpath+"' was not found multiple times!");
else if (nodes.Count==1) { node=nodes[0]; continue; }
if (part.StartsWith("@"))
{
var anode=doc.CreateAttribute(part.Substring(1));
node.Attributes.Append(anode);
node=anode;
}
else
{
string elName, attrib=null;
if (part.Contains("["))
{
part.SplitOnce("[", out elName, out attrib);
if (!attrib.EndsWith("]")) throw new ComponentException("Unsupported XPath (missing ]): "+part);
attrib=attrib.Substring(0, attrib.Length-1);
}
else elName=part;
XmlNode next=doc.CreateElement(elName);
node.AppendChild(next);
node=next;
if (attrib!=null)
{
if (!attrib.StartsWith("@")) throw new ComponentException("Unsupported XPath attrib (missing @): "+part);
string name, value;
attrib.Substring(1).SplitOnce("='", out name, out value);
if (string.IsNullOrEmpty(value) || !value.EndsWith("'")) throw new ComponentException("Unsupported XPath attrib: "+part);
value=value.Substring(0, value.Length-1);
var anode=doc.CreateAttribute(name);
anode.Value=value;
node.Attributes.Append(anode);
}
}
}
return node;
}
SplitOnce是一种扩展方法:
public static void SplitOnce(this string value, string separator, out string part1, out string part2)
{
if (value!=null)
{
int idx=value.IndexOf(separator);
if (idx>=0)
{
part1=value.Substring(0, idx);
part2=value.Substring(idx+separator.Length);
}
else
{
part1=value;
part2=null;
}
}
else
{
part1="";
part2=null;
}
}
样品:
public static void Set(XmlDocument doc, string xpath, string value)
{
if (doc==null) throw new ArgumentNullException("doc");
if (string.IsNullOrEmpty(xpath)) throw new ArgumentNullException("xpath");
XmlNodeList nodes=doc.SelectNodes(xpath);
if (nodes.Count>1) throw new ComponentException("Xpath '"+xpath+"' was not found multiple times!");
else if (nodes.Count==0) createXPath(doc, xpath).InnerText=value;
else nodes[0].InnerText=value;
}
例如
Set(doc, "/configuration/appSettings/add[@key='Server']/@value", "foobar");
这个想法的一个问题是xpath“破坏”信息。
有无数个xml树可以匹配许多xpath。 现在在某些情况下,就像你给出的例子一样,有一个明显的最小xml树,它匹配你的xpath,你有一个使用“=”的谓词。
但是,例如,如果谓词使用不相等,或者除了相等之外的任何其他算术运算符,则存在无限多种可能性。 您可以尝试选择一个“规范”的xml树,例如,需要最少的位来表示。
例如,假设你有xpath /feed/entry/content[@source > 0]
。 现在任何适当结构的xml树,其中节点内容具有值> 0的属性源将匹配,但是存在大于零的无限数量的数字。 通过选择“最小”值(大概为1),您可以尝试规范化xml。
Xpath谓词可以包含非常任意的算术表达式,因此对此的一般解决方案即使不是不可能也是非常困难的。 你可以想象那里有一个巨大的等式,它必须反过来解决,才能得出与这个等式匹配的数值; 但由于可能存在无限数量的匹配值(只要它实际上是不等式而不是等式),就需要找到规范的解决方案。
其他形式的许多表达也会破坏信息。 例如,像“或”这样的运营商总是破坏信息。 如果你知道(X or Y) == 1
,你不知道X是1,Y是1,还是两者都是1; 所有你肯定知道的是其中一个是1! 因此,如果你有一个使用OR的表达式,你无法分辨哪个节点或输入到OR的值应该是1(你可以做出任意选择并将它们都设置为1,因为这样可以满足表达式,因为两种选择,其中只有一种是1)。
现在假设xpath中有几个表达式引用同一组值。 然后,你最终会得到一个几乎不可能解决的联立方程或不等式系统。 同样,如果将允许的xpath限制为其全部功率的一小部分,则可以解决此问题。 我怀疑完全一般的情况类似于图灵停止问题; 在这种情况下,给定一个任意程序(xpath),找出一组与程序匹配的一致数据,并且在某种意义上是最小的。
这是我的版本。 希望这也会对某人有所帮助。
public static void Main(string[] args)
{
XmlDocument doc = new XmlDocument();
XmlNode rootNode = GenerateXPathXmlElements(doc, "/RootNode/FirstChild/SecondChild/ThirdChild");
Console.Write(rootNode.OuterXml);
}
private static XmlDocument GenerateXPathXmlElements(XmlDocument xmlDocument, string xpath)
{
XmlNode parentNode = xmlDocument;
if (xmlDocument != null && !string.IsNullOrEmpty(xpath))
{
string[] partsOfXPath = xpath.Split('/');
string xPathSoFar = string.Empty;
foreach (string xPathElement in partsOfXPath)
{
if(string.IsNullOrEmpty(xPathElement))
continue;
xPathSoFar += "/" + xPathElement.Trim();
XmlNode childNode = xmlDocument.SelectSingleNode(xPathSoFar);
if(childNode == null)
{
childNode = xmlDocument.CreateElement(xPathElement);
}
parentNode.AppendChild(childNode);
parentNode = childNode;
}
}
return xmlDocument;
}
Mark Miller的Java解决方案的C#版本
/// <summary>
/// Makes the X path. Use a format like //configuration/appSettings/add[@key='name']/@value
/// </summary>
/// <param name="doc">The doc.</param>
/// <param name="xpath">The xpath.</param>
/// <returns></returns>
public static XmlNode createNodeFromXPath(XmlDocument doc, string xpath)
{
// Create a new Regex object
Regex r = new Regex(@"/+([\w]+)(\[@([\w]+)='([^']*)'\])?|/@([\w]+)");
// Find matches
Match m = r.Match(xpath);
XmlNode currentNode = doc.FirstChild;
StringBuilder currentPath = new StringBuilder();
while (m.Success)
{
String currentXPath = m.Groups[0].Value; // "/configuration" or "/appSettings" or "/add"
String elementName = m.Groups[1].Value; // "configuration" or "appSettings" or "add"
String filterName = m.Groups[3].Value; // "" or "key"
String filterValue = m.Groups[4].Value; // "" or "name"
String attributeName = m.Groups[5].Value; // "" or "value"
StringBuilder builder = currentPath.Append(currentXPath);
String relativePath = builder.ToString();
XmlNode newNode = doc.SelectSingleNode(relativePath);
if (newNode == null)
{
if (!string.IsNullOrEmpty(attributeName))
{
((XmlElement)currentNode).SetAttribute(attributeName, "");
newNode = doc.SelectSingleNode(relativePath);
}
else if (!string.IsNullOrEmpty(elementName))
{
XmlElement element = doc.CreateElement(elementName);
if (!string.IsNullOrEmpty(filterName))
{
element.SetAttribute(filterName, filterValue);
}
currentNode.AppendChild(element);
newNode = element;
}
else
{
throw new FormatException("The given xPath is not supported " + relativePath);
}
}
currentNode = newNode;
m = m.NextMatch();
}
// Assure that the node is found or created
if (doc.SelectSingleNode(xpath) == null)
{
throw new FormatException("The given xPath cannot be created " + xpath);
}
return currentNode;
}
这是Christian Peeters解决方案的改进版本,支持xpath表达式中的命名空间。
public static XNode CreateNodeFromXPath(XElement elem, string xpath)
{
// Create a new Regex object
Regex r = new Regex(@"/*([a-zA-Z0-9_\.\-\:]+)(\[@([a-zA-Z0-9_\.\-]+)='([^']*)'\])?|/@([a-zA-Z0-9_\.\-]+)");
xpath = xpath.Replace("\"", "'");
// Find matches
Match m = r.Match(xpath);
XNode currentNode = elem;
StringBuilder currentPath = new StringBuilder();
XPathNavigator XNav = elem.CreateNavigator();
while (m.Success)
{
String currentXPath = m.Groups[0].Value; // "/ns:configuration" or "/appSettings" or "/add"
String NamespaceAndElementName = m.Groups[1].Value; // "ns:configuration" or "appSettings" or "add"
String filterName = m.Groups[3].Value; // "" or "key"
String filterValue = m.Groups[4].Value; // "" or "name"
String attributeName = m.Groups[5].Value; // "" or "value"
XNamespace nspace = "";
string elementName;
int p = NamespaceAndElementName.IndexOf(':');
if (p >= 0)
{
string ns = NamespaceAndElementName.Substring(0, p);
elementName = NamespaceAndElementName.Substring(p + 1);
nspace = XNav.GetNamespace(ns);
}
else
elementName = NamespaceAndElementName;
StringBuilder builder = currentPath.Append(currentXPath);
String relativePath = builder.ToString();
XNode newNode = (XNode)elem.XPathSelectElement(relativePath, XNav);
if (newNode == null)
{
if (!string.IsNullOrEmpty(attributeName))
{
((XElement)currentNode).Attribute(attributeName).Value = "";
newNode = (XNode)elem.XPathEvaluate(relativePath, XNav);
}
else if (!string.IsNullOrEmpty(elementName))
{
XElement newElem = new XElement(nspace + elementName);
if (!string.IsNullOrEmpty(filterName))
{
newElem.Add(new XAttribute(filterName, filterValue));
}
((XElement)currentNode).Add(newElem);
newNode = newElem;
}
else
{
throw new FormatException("The given xPath is not supported " + relativePath);
}
}
currentNode = newNode;
m = m.NextMatch();
}
// Assure that the node is found or created
if (elem.XPathEvaluate(xpath, XNav) == null)
{
throw new FormatException("The given xPath cannot be created " + xpath);
}
return currentNode;
}
这是基于Mark Miller代码的增强型RegEx
/([\w]+)(?:(?:[\[])(@|)([\w]+)(?:([!=<>]+)(?:(?:(?:')([^']+)(?:'))|([^']+))|)(?:[]])|)|([.]+))
Group 1: Node name
Group 2: @ (or Empty, for non attributes)
Group 3: Attribute Key
Group 4: Attribute Value (if string)
Group 5: Attribute Value (if number)
Group 6: .. (dots, one or more)
我需要一个XNode而不是XmlNode实现,并且RegEx不适合我(因为带有。或 - 的元素名称不起作用)
那么这对我有用的是什么:
public static XNode createNodeFromXPath(XElement elem, string xpath)
{
// Create a new Regex object
Regex r = new Regex(@"/*([a-zA-Z0-9_\.\-]+)(\[@([a-zA-Z0-9_\.\-]+)='([^']*)'\])?|/@([a-zA-Z0-9_\.\-]+)");
xpath = xpath.Replace("\"", "'");
// Find matches
Match m = r.Match(xpath);
XNode currentNode = elem;
StringBuilder currentPath = new StringBuilder();
while (m.Success)
{
String currentXPath = m.Groups[0].Value; // "/configuration" or "/appSettings" or "/add"
String elementName = m.Groups[1].Value; // "configuration" or "appSettings" or "add"
String filterName = m.Groups[3].Value; // "" or "key"
String filterValue = m.Groups[4].Value; // "" or "name"
String attributeName = m.Groups[5].Value; // "" or "value"
StringBuilder builder = currentPath.Append(currentXPath);
String relativePath = builder.ToString();
XNode newNode = (XNode)elem.XPathSelectElement(relativePath);
if (newNode == null)
{
if (!string.IsNullOrEmpty(attributeName))
{
((XElement)currentNode).Attribute(attributeName).Value = "";
newNode = (XNode)elem.XPathEvaluate(relativePath);
}
else if (!string.IsNullOrEmpty(elementName))
{
XElement newElem = new XElement(elementName);
if (!string.IsNullOrEmpty(filterName))
{
newElem.Add(new XAttribute(filterName, filterValue));
}
((XElement)currentNode).Add(newElem);
newNode = newElem;
}
else
{
throw new FormatException("The given xPath is not supported " + relativePath);
}
}
currentNode = newNode;
m = m.NextMatch();
}
// Assure that the node is found or created
if (elem.XPathEvaluate(xpath) == null)
{
throw new FormatException("The given xPath cannot be created " + xpath);
}
return currentNode;
}
如果从后到前处理XPath字符串,则更容易处理非root用户的XPath,例如。 // a / b / c ...它应该支持Gordon的XPath语法,尽管我还没试过......
static private XmlNode makeXPath(XmlDocument doc, string xpath)
{
string[] partsOfXPath = xpath.Split('/');
XmlNode node = null;
for (int xpathPos = partsOfXPath.Length; xpathPos > 0; xpathPos--)
{
string subXpath = string.Join("/", partsOfXPath, 0, xpathPos);
node = doc.SelectSingleNode(subXpath);
if (node != null)
{
// append new descendants
for (int newXpathPos = xpathPos; newXpathPos < partsOfXPath.Length; newXpathPos++)
{
node = node.AppendChild(doc.CreateElement(partsOfXPath[newXpathPos]));
}
break;
}
}
return node;
}
使用
var xDoc = new XDocument(new XElement("root",
new XElement("child1"),
new XElement("child2")));
CreateElement(xDoc, "/root/child3");
CreateElement(xDoc, "/root/child4[@year=32][@month=44]");
CreateElement(xDoc, "/root/child4[@year=32][@month=44]/subchild1");
CreateElement(xDoc, "/root/child4[@year=32][@month=44]/subchild1/subchild[@name='jon']");
CreateElement(xDoc, "/root/child1");
限定
public static XDocument CreateElement(XDocument document, string xpath)
{
if (string.IsNullOrEmpty(xpath))
throw new InvalidOperationException("Xpath must not be empty");
var xNodes = Regex.Matches(xpath, @"\/[^\/]+").Cast<Match>().Select(it => it.Value).ToList();
if (!xNodes.Any())
throw new InvalidOperationException("Invalid xPath");
var parent = document.Root;
var currentNodeXPath = "";
foreach (var xNode in xNodes)
{
currentNodeXPath += xNode;
var nodeName = Regex.Match(xNode, @"(?<=\/)[^\[]+").Value;
var existingNode = parent.XPathSelectElement(currentNodeXPath);
if (existingNode != null)
{
parent = existingNode;
continue;
}
var attributeNames =
Regex.Matches(xNode, @"(?<=@)([^=]+)\=([^]]+)")
.Cast<Match>()
.Select(it =>
{
var groups = it.Groups.Cast<Group>().ToList();
return new { AttributeName = groups[1].Value, AttributeValue = groups[2].Value };
});
parent.Add(new XElement(nodeName, attributeNames.Select(it => new XAttribute(it.AttributeName, it.AttributeValue)).ToArray()));
parent = parent.Descendants().Last();
}
return document;
}
我知道这是一个非常古老的线程...但我刚刚尝试了同样的事情,并提出了以下正则表达式,这是不完美但我发现更通用
/+([\w]+)(\[@([\w]+)='([^']*)'\])?|/@([\w]+)
字符串/ configuration / appSettings / add [@ key ='name'] / @ value
应该解析为
共找到14场比赛:
start = 0,end = 14 Group(0)= / configuration Group(1)= configuration Group(2)= null Group(3)= null Group(4)= null Group(5)= null
start = 14,end = 26 Group(0)= / appSettings Group(1)= appSettings Group(2)= null Group(3)= null Group(4)= null Group(5)= null
start = 26,end = 43 Group(0)= / add [@ key ='name'] Group(1)= add Group(2)= [@ key ='name'] Group(3)= key group(4 )= name Group(5)= null
start = 43,end = 50 Group(0)= / @ value Group(1)= null Group(2)= null Group(3)= null Group(4)= null Group(5)= value
这意味着我们拥有
Group(0)= Ignored Group(1)=元素名称Group(2)= Ignored Group(3)= Filter属性名称Group(4)= Filter属性值
这是一个可以使用该模式的java方法
public static Node createNodeFromXPath(Document doc, String expression) throws XPathExpressionException {
StringBuilder currentPath = new StringBuilder();
Matcher matcher = xpathParserPattern.matcher(expression);
Node currentNode = doc.getFirstChild();
while (matcher.find()) {
String currentXPath = matcher.group(0);
String elementName = matcher.group(1);
String filterName = matcher.group(3);
String filterValue = matcher.group(4);
String attributeName = matcher.group(5);
StringBuilder builder = currentPath.append(currentXPath);
String relativePath = builder.toString();
Node newNode = selectSingleNode(doc, relativePath);
if (newNode == null) {
if (attributeName != null) {
((Element) currentNode).setAttribute(attributeName, "");
newNode = selectSingleNode(doc, relativePath);
} else if (elementName != null) {
Element element = doc.createElement(elementName);
if (filterName != null) {
element.setAttribute(filterName, filterValue);
}
currentNode.appendChild(element);
newNode = element;
} else {
throw new UnsupportedOperationException("The given xPath is not supported " + relativePath);
}
}
currentNode = newNode;
}
if (selectSingleNode(doc, expression) == null) {
throw new IllegalArgumentException("The given xPath cannot be created " + expression);
}
return currentNode;
}
我喜欢Chris的版本,因为它处理xpaths中的属性而其他解决方案没有(虽然它没有在我修复的路径中处理“text()”)。 我很遗憾不得不在VB应用程序中使用它,所以这里是转换:
Private Sub SplitOnce(ByVal value As String, ByVal separator As String, ByRef part1 As String, ByRef part2 As String)
If (value IsNot Nothing) Then
Dim idx As Integer = value.IndexOf(separator)
If (idx >= 0) Then
part1 = value.Substring(0, idx)
part2 = value.Substring(idx + separator.Length)
Else
part1 = value
part2 = Nothing
End If
Else
part1 = ""
part2 = Nothing
End If
End Sub
Private Function createXPath(ByVal doc As XmlDocument, ByVal xpath As String) As XmlNode
Dim node As XmlNode = doc
Dim part As String
For Each part In xpath.Substring(1).Split("/")
Dim nodes As XmlNodeList = node.SelectNodes(part)
If (nodes.Count > 1) Then
Throw New Exception("Xpath '" + xpath + "' was not found multiple times!")
ElseIf (nodes.Count = 1) Then
node = nodes(0)
Continue For
End If
If (part.EndsWith("text()")) Then
' treat this the same as previous node since this is really innertext
Exit For
ElseIf (part.StartsWith("@")) Then
Dim anode As XmlAttribute = doc.CreateAttribute(part.Substring(1))
node.Attributes.Append(anode)
node = anode
Else
Dim elName As String = Nothing
Dim attrib As String = Nothing
If (part.Contains("[")) Then
SplitOnce(part, "[", elName, attrib)
If (Not attrib.EndsWith("]")) Then
Throw New Exception("Unsupported XPath (missing ]): " + part)
End If
attrib = attrib.Substring(0, attrib.Length - 1)
Else
elName = part
End If
Dim nextnode As XmlNode = doc.CreateElement(elName)
node.AppendChild(nextnode)
node = nextnode
If (attrib IsNot Nothing) Then
If (Not attrib.StartsWith("@")) Then
Throw New Exception("Unsupported XPath attrib (missing @): " + part)
End If
Dim name As String = ""
Dim value As String = ""
SplitOnce(attrib.Substring(1), "='", name, value)
If (String.IsNullOrEmpty(value) Or Not value.EndsWith("'")) Then
Throw New Exception("Unsupported XPath attrib: " + part)
End If
value = value.Substring(0, value.Length - 1)
Dim anode As XmlAttribute = doc.CreateAttribute(name)
anode.Value = value
node.Attributes.Append(anode)
End If
End If
Next
Return node
End Function
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.