简体   繁体   中英

A fast way to compare two XML files and create another one with differences

My friend wants to upload only product differences to his web shop. So my idea is to compare XML files and extract only changes. Thus I've created this:

Part of XML file (note that this XML have more elements, but I've excluded them):

<?xml version="1.0" encoding="UTF-8"?>
<artikli>
    <artikal>
        <id>1039282</id>
        <sifra>42640</sifra>
        <naziv><![CDATA[Bluetooth zvucnik za tablet IYIGLE X7 crni]]></naziv>
    </artikal>
    <artikal>
        <id>1048331</id>
        <sifra>48888</sifra>
        <naziv><![CDATA[Bluetooth zvucnik REMAX RB-M15 crni]]></naziv>
    </artikal>
</artikli>

C# script

    static IEnumerable<XElement> StreamRootChildDoc(string uri)
    {
      using (XmlReader reader = XmlReader.Create(uri))
      {
        reader.MoveToContent();

        while (!reader.EOF)
        {
          if (reader.NodeType == XmlNodeType.Element && reader.Name == "artikal")
          {
            XElement el = XElement.ReadFrom(reader) as XElement;
            if (el != null)
              yield return el;
          }
          else
          {
            reader.Read();
          }
        }
      }
    }

    void ProcessFiles()
    {

      try
      {

        IEnumerable<XElement> posle = from el in StreamRootChildDoc(@"lisic2.xml")
                                      select el;

        IEnumerable<XElement> pre = from el in StreamRootChildDoc(@"lisic1.xml")
                                    select el;

        XmlDocument doc = new XmlDocument();

        //(1) the xml declaration is recommended, but not mandatory
        XmlDeclaration xmlDeclaration = doc.CreateXmlDeclaration("1.0", "UTF-8", null);
        XmlElement root = doc.DocumentElement;
        doc.InsertBefore(xmlDeclaration, root);

        //(2) string.Empty makes cleaner code
        XmlElement element1 = doc.CreateElement(string.Empty, "artikli", string.Empty);
        doc.AppendChild(element1);

        int count_files = 0;

        foreach (XElement node_posle in posle)
        {
          count_files++;

          var node_pre = pre.First(child => child.Element("id").Value == node_posle.Element("id").Value);
          if (node_pre != null)
          {
            string pre_Value = node_pre.Value.Replace("\t", ""); ;
            string posle_Value = node_posle.Value.Replace("\t", ""); ;
            if (pre_Value != posle_Value)
            {
              var reader = node_posle.CreateReader();
              reader.MoveToContent();

              XmlElement element2 = doc.CreateElement(string.Empty, "artikal", reader.ReadInnerXml());
              element1.AppendChild(element2);
            }
          }
        }
        doc.Save("document.xml");
      }
      finally
      {

      }
    }

This works but after 10000 passed records the speed is 18 records per second, after 14000 - 12 records/sec. Is there any other approach to speed this up?

UPDATE

Now, I will try to move faster to the corresponding ID of checked XML.

One way to do it is with XmlDocument, just because the XML is small (22000 products) it is possible to use it.

   void ProcessXMLDocument()
    {
      SetControlEnabled(btStart, false);
      Stopwatch sw = new Stopwatch();
      sw.Start();
      try
      {
        XmlDocument sada = new XmlDocument();
        sada.Load(tbPathSada.Text);

        XmlDocument pre = new XmlDocument();
        pre.Load(tbPathOdPre.Text);

        XmlDocument doc = new XmlDocument();

        //(1) the xml declaration is recommended, but not mandatory
        XmlDeclaration xmlDeclaration = doc.CreateXmlDeclaration("1.0", "UTF-8", null);
        XmlElement root = doc.DocumentElement;
        doc.InsertBefore(xmlDeclaration, root);

        //(2) string.Empty makes cleaner code
        XmlElement element1 = doc.CreateElement(string.Empty, "artikli", string.Empty);
        doc.AppendChild(element1);

        root = sada.DocumentElement;
        XmlNodeList nodes = root.SelectNodes("artikal"); 
        int count_files = 0;
        foreach (XmlNode nodeSada in nodes)
        {
          count_files++;
          try
          {
            SetControlText(lbBlokova, count_files.ToString());
            TimeSpan elapsed = sw.Elapsed;
            var files_per_sec = Math.Floor((double)count_files / (double)elapsed.TotalSeconds);
            SetControlText(lbPerSecond, files_per_sec.ToString());
            SetControlText(lbTime, elapsed.ToString(@"hh\:mm\:ss"));
          }
          catch (Exception ex2)
          {

          }

          var idSada = nodeSada.SelectSingleNode("id").InnerText.Trim();
          var nodePre = pre.DocumentElement.SelectSingleNode("artikal[id='" + idSada + "']");
          if (nodePre != null)
          {
            string pre_Value = nodePre.InnerXml.Replace("\t", ""); ;
            string posle_Value = nodeSada.InnerXml.Replace("\t", ""); ;
            if (pre_Value != posle_Value)
            {
              XmlNode importNode = doc.ImportNode(nodeSada, true);
              element1.AppendChild(importNode);
            }
          }
          else
          {
            XmlNode importNode = doc.ImportNode(nodeSada, true);
            element1.AppendChild(importNode);
          }
        }
        doc.Save("razlika.xml");
      }
      finally
      {
        sw.Stop();
        SetControlEnabled(btStart, true);
      }
    }

This way I've managed to improve @10000 records => 140 rec/sec and @14000 => 104 rec/sec

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM