[英]How to parse a xml-file using C# and Linq by finding a specific attributes value
我有一个文件夹。 在该文件夹中,有pdf,图片等。此外,还有一个xmlfile。
该xmlfile具有每个其他文件的元数据。
我想从xml提取数据并将其保存在c#类中,以便以后使用
我搜索了一种使用linq解析文件的方法。 但是我无法使其按我想要的方式工作。
我希望它像这样工作:
我有一个存储在应用程序中的文件列表。 然后,我想遍历每个文件,并从xml中获取该文件的数据。
xmlfile看起来像这样:
<?xml version='1.0' encoding='ISO-8859-1' ?>
<FOLDERS Name="XXXXXXX" >
<FOLDER Date="12/15/2015 15:25:04" ByUser="" Name="some folders name" Type="" MemberOf="">
<![CDATA[FOLDERID111]]>
<VISUALFOLDER Date="02/16/2016 14:25:00" ByUser="" Name="some folders name" Type="" StartView="UNKNOWN" ScreenOffset="0"/>
<TABSHEET Date="02/16/2016 14:25:00" Name="Fields" Type="IdxFields">
<![CDATA[TABSHEETID521]]>
<VISUALTABSHEET Date="02/16/2016 14:25:00" Name="Fields" Type="IdxFields"/>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="DocuName">
<![CDATA[Something thats not the documents name]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="DocuName"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="DocuDate">
<![CDATA[09.12.2015]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="DocuDate"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="Object">
<![CDATA[OBJECT1]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="Object"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="Tag">
<![CDATA[LETTER]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="Tag"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="User">
<![CDATA[USER1]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="User"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="Note">
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="Note"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="Barcode">
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="Barcode"/>
</INDEXFIELD>
</TABSHEET>
<TABSHEET Date="02/16/2016 14:25:00" Name="Documents" Type="Documents" Data="" SeqNo="0" Title="" Password="">
<![CDATA[TABSHEETID522]]>
<VISUALTABSHEET Date="02/16/2016 14:25:00" Name="Documents" Type="Documents"/>
<DOCUMENT Date="02/16/2016 14:25:00" Name="Document" Type="" Data="" FileName="C:\ProgramData\Import\file1.pdf" FileOffset="5712054" FileSize="128509" BinaryType="PDF">
<VISUALDOCUMENT Date="02/16/2016 14:25:00" Name="Document" Type="" Height="148" Width="105"/>
</DOCUMENT>
<DOCUMENT Date="02/16/2016 14:25:00" Name="Document" Type="" Data="" FileName="C:\ProgramData\Import\file2.pdf" FileOffset="5840563" FileSize="129847" BinaryType="PDF">
<VISUALDOCUMENT Date="02/16/2016 14:25:00" Name="Document" Type="" Height="148" Width="105"/>
</DOCUMENT>
</TABSHEET>
</FOLDER>
<FOLDER Date="12/30/2015 15:25:04" ByUser="" Name="some other folders name" Type="" MemberOf="">
<![CDATA[FOLDERID111]]>
<VISUALFOLDER Date="02/16/2016 14:25:00" ByUser="" Name="some other folders name" Type="" StartView="UNKNOWN" ScreenOffset="0"/>
<TABSHEET Date="02/16/2016 14:25:00" Name="Fields" Type="IdxFields">
<![CDATA[TABSHEETID521]]>
<VISUALTABSHEET Date="02/16/2016 14:25:00" Name="Fields" Type="IdxFields"/>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="DocuName">
<![CDATA[Something thats not the documents name]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="DocuName"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="DocuDate">
<![CDATA[09.12.2015]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="DocuDate"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="Object">
<![CDATA[OBJECT1]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="Object"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="Tag">
<![CDATA[LETTER]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="Tag"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="User">
<![CDATA[USER1]]>
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="User"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="Note">
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="Note"/>
</INDEXFIELD>
<INDEXFIELD Date="02/16/2016 14:25:00" Name="Barcode">
<VISUALINDEXFIELD Date="02/16/2016 14:25:00" Name="Barcode"/>
</INDEXFIELD>
</TABSHEET>
<TABSHEET Date="02/16/2016 14:25:00" Name="Documents" Type="Documents" Data="" SeqNo="0" Title="" Password="">
<![CDATA[TABSHEETID522]]>
<VISUALTABSHEET Date="02/16/2016 14:25:00" Name="Documents" Type="Documents"/>
<DOCUMENT Date="02/16/2016 14:25:00" Name="Document" Type="" Data="" FileName="C:\ProgramData\Import\file3.pdf" FileOffset="5712054" FileSize="128509" BinaryType="PDF">
<VISUALDOCUMENT Date="02/16/2016 14:25:00" Name="Document" Type="" Height="148" Width="105"/>
</DOCUMENT>
</TABSHEET>
</FOLDER>
</FOLDERS>
xml由另一个应用程序生成。
每个“文件夹”都有两个“ TABSHEET”。 一个包含数据(可通过“名称”属性标识),另一个包含文件名。
数据包含在CDATA块中。 有些字段有数据,有些则没有。 并非每个文档都有“条形码”。
Linq查询看起来像我想要做什么?
好的,我已将查询修正为几乎可以做的事
var test1 = xdoc
.Element("FOLDERS")
.Elements("FOLDER")
.Where(xml => xml
.Elements("TABSHEET")
.Elements("DOCUMENT")
.Select(x => x.Attribute("FileName").Value)
.ToList()
.Contains(file.FilePath)
)
.Select(xml => xml
.Elements("TABSHEET")
.Elements("INDEXFIELD")
.Where(x =>
x.Attribute("Name").Value == "DocuName" ||
x.Attribute("Name").Value == "Note" ||
x.Attribute("Name").Value == "User")
.Select(x => (string)x.Value)
);
现在唯一的问题是如何区分结果。
我的意思是:查询将返回IEnumerable>,其中包含3个值乘以数量文件。 但是因为它是IEnumerable,所以我无法确定字符串是“ DocuName”还是“ Note”或“ User”。
有没有办法从该查询中获取具有正确键的字典?
有许多方法可以解决此问题,因为您提到要使用等效的C#
实体,所以我更喜欢这种方法。
为您的xml生成C#实体(有很多工具)
[XmlRoot(ElementName="VISUALFOLDER")]
public class VISUALFOLDER {
[XmlAttribute(AttributeName="Date")]
public string Date { get; set; }
[XmlAttribute(AttributeName="ByUser")]
public string ByUser { get; set; }
[XmlAttribute(AttributeName="Name")]
public string Name { get; set; }
[XmlAttribute(AttributeName="Type")]
public string Type { get; set; }
[XmlAttribute(AttributeName="StartView")]
public string StartView { get; set; }
[XmlAttribute(AttributeName="ScreenOffset")]
public string ScreenOffset { get; set; }
}
[XmlRoot(ElementName="VISUALTABSHEET")]
public class VISUALTABSHEET {
[XmlAttribute(AttributeName="Date")]
public string Date { get; set; }
[XmlAttribute(AttributeName="Name")]
public string Name { get; set; }
[XmlAttribute(AttributeName="Type")]
public string Type { get; set; }
}
[XmlRoot(ElementName="VISUALINDEXFIELD")]
public class VISUALINDEXFIELD {
[XmlAttribute(AttributeName="Date")]
public string Date { get; set; }
[XmlAttribute(AttributeName="Name")]
public string Name { get; set; }
}
[XmlRoot(ElementName="INDEXFIELD")]
public class INDEXFIELD {
[XmlElement(ElementName="VISUALINDEXFIELD")]
public VISUALINDEXFIELD VISUALINDEXFIELD { get; set; }
[XmlAttribute(AttributeName="Date")]
public string Date { get; set; }
[XmlAttribute(AttributeName="Name")]
public string Name { get; set; }
}
[XmlRoot(ElementName="TABSHEET")]
public class TABSHEET {
[XmlElement(ElementName="VISUALTABSHEET")]
public VISUALTABSHEET VISUALTABSHEET { get; set; }
[XmlElement(ElementName="INDEXFIELD")]
public List<INDEXFIELD> INDEXFIELD { get; set; }
[XmlAttribute(AttributeName="Date")]
public string Date { get; set; }
[XmlAttribute(AttributeName="Name")]
public string Name { get; set; }
[XmlAttribute(AttributeName="Type")]
public string Type { get; set; }
[XmlElement(ElementName="DOCUMENT")]
public List<DOCUMENT> DOCUMENT { get; set; }
[XmlAttribute(AttributeName="Data")]
public string Data { get; set; }
[XmlAttribute(AttributeName="SeqNo")]
public string SeqNo { get; set; }
[XmlAttribute(AttributeName="Title")]
public string Title { get; set; }
[XmlAttribute(AttributeName="Password")]
public string Password { get; set; }
}
[XmlRoot(ElementName="VISUALDOCUMENT")]
public class VISUALDOCUMENT {
[XmlAttribute(AttributeName="Date")]
public string Date { get; set; }
[XmlAttribute(AttributeName="Name")]
public string Name { get; set; }
[XmlAttribute(AttributeName="Type")]
public string Type { get; set; }
[XmlAttribute(AttributeName="Height")]
public string Height { get; set; }
[XmlAttribute(AttributeName="Width")]
public string Width { get; set; }
}
[XmlRoot(ElementName="DOCUMENT")]
public class DOCUMENT {
[XmlElement(ElementName="VISUALDOCUMENT")]
public VISUALDOCUMENT VISUALDOCUMENT { get; set; }
[XmlAttribute(AttributeName="Date")]
public string Date { get; set; }
[XmlAttribute(AttributeName="Name")]
public string Name { get; set; }
[XmlAttribute(AttributeName="Type")]
public string Type { get; set; }
[XmlAttribute(AttributeName="Data")]
public string Data { get; set; }
[XmlAttribute(AttributeName="FileName")]
public string FileName { get; set; }
[XmlAttribute(AttributeName="FileOffset")]
public string FileOffset { get; set; }
[XmlAttribute(AttributeName="FileSize")]
public string FileSize { get; set; }
[XmlAttribute(AttributeName="BinaryType")]
public string BinaryType { get; set; }
}
[XmlRoot(ElementName="FOLDER")]
public class FOLDER {
[XmlElement(ElementName="VISUALFOLDER")]
public VISUALFOLDER VISUALFOLDER { get; set; }
[XmlElement(ElementName="TABSHEET")]
public List<TABSHEET> TABSHEET { get; set; }
[XmlAttribute(AttributeName="Date")]
public string Date { get; set; }
[XmlAttribute(AttributeName="ByUser")]
public string ByUser { get; set; }
[XmlAttribute(AttributeName="Name")]
public string Name { get; set; }
[XmlAttribute(AttributeName="Type")]
public string Type { get; set; }
[XmlAttribute(AttributeName="MemberOf")]
public string MemberOf { get; set; }
}
[XmlRoot(ElementName="FOLDERS")]
public class FOLDERS {
[XmlElement(ElementName="FOLDER")]
public List<FOLDER> FOLDER { get; set; }
[XmlAttribute(AttributeName="Name")]
public string Name { get; set; }
}
现在,我们可以使用以下代码片段反序列化此代码。
StreamReader reader = new StreamReader(filepath);
var folders = (FOLDERS)serializer.Deserialize(reader);
工作Demo
我想出了以下解决方案:
文件是我保存文件所有数据的类。
var elements = xdoc.Element("FOLDERS");
if (elements == null)
{
throw new KeyNotFoundException();
}
var data = elements
.Elements("FOLDER")
.Where(xml => xml
.Elements("TABSHEET")
.Elements("DOCUMENT")
.Select(x => x.Attribute("FileName").Value)
.ToList()
.Contains(file.FileName)
)
.Select(xml => xml
.Elements("TABSHEET")
.Elements("INDEXFIELD")
.Where(x =>
x.Attribute("Name").Value == "Date" ||
x.Attribute("Name").Value == "Note"
)
.Select(x => new string[] { (string)x.Attribute("Name"), (string)x.Value }))
.ToList();
if (data.Count != 1)
{
file.Upload = false;
continue;
}
var dataDictionary = data[0].ToDictionary(item => item[0],
item => item[1]);
file.Date = !dataDictionary.ContainsKey("Date") || string.IsNullOrWhiteSpace(dataDictionary["Date"]) ? new DateTime() : DateTime.Parse(dataDictionary["Date"]);
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.