简体   繁体   中英

XSL Merging Multiple XML Records in the Same File

I have a single XML file containing multiple records. Each record has a key. I'd like to select all of the records by key and collapse each into one XML record. Some of the data in each XML record is repeated and there are empty elements. I'd also like to remove duplicates and empty tags.

Input

<Data>
    <Record>
        <Key>12345</Key>
        <Number>09095I</Number>
        <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
        <Text_Field_2>This is Text Field 2</Text_Field_2>
        <Author>A1</Author>
        <Author>A2</Author>
        <Author></Author>
        <Author>A1</Author>
        <Author>A2</Author>
        <Author>A3</Author>
        <Author></Author>
        <Author>A1</Author>
        <Date>10/12/2019</Date>
        <Summary>Record 1: Summary 1 Text</Summary>
    </Record>
    <Record>
        <Key>12345</Key>
        <Number>09095I</Number>
        <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
        <Text_Field_2>This is Text_Field_2</Text_Field_2>
        <Author>A2</Author>
        <Author></Author>
        <Author>A1</Author>
        <Author>A3</Author>
        <Author></Author>
        <Author>B2</Author>
        <Author></Author>
        <Author>B2</Author>
        <Date>10/12/2019</Date>
        <Summary>Record 2: Summary 1 Text</Summary>
    </Record>
    <Record>
        <Key>23456</Key>
        <Number>43095I</Number>
        <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
        <Text_Field_2>This is Text_Field_2</Text_Field_2>
        <Author>AA2</Author>
        <Author></Author>
        <Author>AA1</Author>
        <Author>AA3</Author>
        <Author></Author>
        <Author>AA3</Author>
        <Author>BB2</Author>
        <Author></Author>
        <Author>AA3</Author>
        <Date>01/12/2020</Date>
        <Summary>Record 1: Summary 1 Text</Summary>
    </Record>
    <Record>
        <Key>23456</Key>
        <Number>43095I</Number>
        <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
        <Text_Field_2>This is Text_Field_2</Text_Field_2>
        <Author>AA1</Author>
        <Author>AA3</Author>
        <Author></Author>
        <Author>CC2</Author>
        <Author></Author>
        <Author>AA1</Author>
        <Author>CC2</Author>
        <Date>01/12/2020</Date>
        <Summary>Record 2: Summary 1 Text</Summary>
    </Record>
    <Record>
        <Key>23456</Key>
        <Number>43095I</Number>
        <Text_Field_1>Record 3: This is Text Field 1</Text_Field_1>
        <Text_Field_2>This is Text_Field_2</Text_Field_2>
        <Author>AA1</Author>
        <Author>AA3</Author>
        <Author></Author>
        <Author>CC2</Author>
        <Author></Author>
        <Author>AA1</Author>
        <Author>CC3</Author>
        <Date>01/12/2020</Date>
        <Summary>Record 3: Summary 1 Text</Summary>
    </Record>
    <Record>
        <Key>778899</Key>
        <Number>998822I</Number>
        <Text_Field_1>Record 1: This is Text_Field_1</Text_Field_1>
        <Text_Field_2>This is Text_Field_2</Text_Field_2>
        <Author>A2</Author>
        <Author></Author>
        <Author>D1</Author>
        <Author>D2</Author>
        <Author></Author>
        <Author>D3</Author>
        <Author>D33</Author>
        <Author></Author>
        <Author>D33</Author>
        <Date>10/12/2019</Date>
        <Summary>Record 1: Summary 1 Text</Summary>
    </Record>
</Data>

Desired Output

<Data>
    <Record>
        <Key>12345</Key>
        <Number>09095I</Number>
        <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
        <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
        <Text_Field_2>This is Text Field 2</Text_Field_2>
        <Author>A1</Author>
        <Author>A2</Author>
        <Author>A3</Author>
        <Author>B2</Author>
        <Date>10/12/2019</Date>
        <Summary>Record 1: Summary 1 Text</Summary>
        <Summary>Record 2: Summary 1 Text</Summary>
    </Record>

    <Record>
        <Key>23456</Key>
        <Number>43095I</Number>
        <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
        <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
        <Text_Field_1>Record 3: This is Text Field 1</Text_Field_1>
        <Text_Field_2>This is Text_Field_2</Text_Field_2>
        <Author>AA1</Author>
        <Author>AA2</Author>
        <Author>AA3</Author>
        <Author>BB2</Author>
        <Author>CC2</Author>
        <Author>CC3</Author>
        <Date>01/12/2020</Date>
        <Summary>Record 1: Summary 1 Text</Summary>
        <Summary>Record 2: Summary 1 Text</Summary>
        <Summary>Record 3: Summary 1 Text</Summary>
    </Record>
    <Record>
        <Key>778899</Key>
        <Number>998822I</Number>
        <Text_Field_1>Record 1: This is Text_Field_1</Text_Field_1>
        <Text_Field_2>This is Text_Field_2</Text_Field_2>
        <Author>A2</Author>
        <Author>D1</Author>
        <Author>D2</Author>
        <Author>D3</Author>
        <Author>D33</Author>
        <Date>10/12/2019</Date>
        <Summary>Record 1: Summary 1 Text</Summary>
    </Record>
</Data>

I've used this code, but I'm not sure this it is the correct path.

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="2.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:fn="http://www.w3.org/2005/xpath-functions">
    <xsl:output method="xml" indent="yes"/>
    <xsl:strip-space elements="*" />

    <xsl:key name="key" match="Record" use="Key"/>
    <xsl:key name="kNamedSiblings" match="*" 
           use="concat(generate-id(..), '+', name())"/>

    <xsl:template match="*">
      <xsl:copy>
        <xsl:apply-templates select="key('kNamedSiblings', 
                                         concat(generate-id(..), '+', name())
                                        )/node()" />
        </xsl:copy>
    </xsl:template>
    <xsl:template match="*[not(*) and . = '']" />
    <xsl:template match="*[generate-id() != 
                         generate-id(key('kNamedSiblings', 
                                         concat(generate-id(..), '+', name()))[1]
                                    )]" />
</xsl:stylesheet>

Current output

<?xml version="1.0"?>
<Data>
  <Record>

    <Key>12345</Key>
    <Number>09095I</Number>
    <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
    <Text_Field_2>This is Text Field 2</Text_Field_2>
    <Author>A1A2A1A2A3A1</Author>
    <Date>10/12/2019</Date>
    <Summary>Record 1: Summary 1 Text</Summary>

    <Key>12345</Key>
    <Number>09095I</Number>
    <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
    <Text_Field_2>This is Text_Field_2</Text_Field_2>
    <Author>A2A1A3B2B2</Author>
    <Date>10/12/2019</Date>
    <Summary>Record 2: Summary 1 Text</Summary>

    <Key>23456</Key>
    <Number>43095I</Number>
    <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
    <Text_Field_2>This is Text_Field_2</Text_Field_2>
    <Author>AA2AA1AA3AA3BB2AA3</Author>
    <Date>01/12/2020</Date>
    <Field_Text_1>This is the Text 1</Field_Text_1>

    <Key>23456</Key>
    <Number>43095I</Number>
    <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
    <Text_Field_2>This is Text_Field_2</Text_Field_2>
    <Author>AA1AA3CC2AA1CC2</Author>
    <Date>01/12/2020</Date>
    <Field_Text_1>This is the Text 1</Field_Text_1>

    <Key>23456</Key>
    <Number>43095I</Number>
    <Text_Field_1>Record 3: This is Text Field 1</Text_Field_1>
    <Text_Field_2>This is Text_Field_2</Text_Field_2>
    <Author>AA1AA3CC2AA1CC3</Author>
    <Date>01/12/2020</Date>
    <Field_Text_1>This is the Text 1</Field_Text_1>

    <Key>778899</Key>
    <Number>998822I</Number>
    <Text_Field_1>Record 1: This is Text_Field_1</Text_Field_1>
    <Text_Field_2>This is Text_Field_2</Text_Field_2>
    <Author>A2A3A3A3</Author>
    <Date>10/12/2019</Date>
    <Field_Text_1>This is the Text 1</Field_Text_1>
  </Record>
</Data>

My current code creates one large record, not three separate ones. In addition, the Author elements aren't maintained. Instead, one element is created and the values are lumped together. I understand this is a phased solution involving: - The merging of multiple records into one per key - The removal of empty tags - The removal of duplicate tags with the same value - Maintaining the original XML structure

Understanding the solution would also be a big help.

Because your stylesheet indicates that you are able to use XSLT-2.0, you can simplify your approach from using the complicated xsl:key one to a more straightforward xsl:for-each-group one:

<xsl:template match="Data">
  <xsl:copy>
    <xsl:for-each-group select="Record" group-by="Key">
      <xsl:copy>
        <xsl:for-each-group select="current-group()/*[normalize-space()]" group-by="concat(name(),.)">
          <xsl:sort select="name()" order="ascending" />
          <xsl:copy-of select="current-group()[1]" />
        </xsl:for-each-group>
      </xsl:copy>
    </xsl:for-each-group>
  </xsl:copy>
</xsl:template>

This template groups the Record elements by Key and then groups its result by a string consisting of the element name and its content. The result of this is sorted alphabetically to group the elements with the same name.
Then, the first (and so unique) element is output.

The output is:

<?xml version="1.0" encoding="UTF-8"?>
<Data>
   <Record>
      <Author>A1</Author>
      <Author>A2</Author>
      <Author>A3</Author>
      <Author>B2</Author>
      <Date>10/12/2019</Date>
      <Key>12345</Key>
      <Number>09095I</Number>
      <Summary>Record 1: Summary 1 Text</Summary>
      <Summary>Record 2: Summary 1 Text</Summary>
      <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
      <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
      <Text_Field_2>This is Text Field 2</Text_Field_2>
      <Text_Field_2>This is Text_Field_2</Text_Field_2>
   </Record>
   <Record>
      <Author>AA2</Author>
      <Author>AA1</Author>
      <Author>AA3</Author>
      <Author>BB2</Author>
      <Author>CC2</Author>
      <Author>CC3</Author>
      <Date>01/12/2020</Date>
      <Key>23456</Key>
      <Number>43095I</Number>
      <Summary>Record 1: Summary 1 Text</Summary>
      <Summary>Record 2: Summary 1 Text</Summary>
      <Summary>Record 3: Summary 1 Text</Summary>
      <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
      <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
      <Text_Field_1>Record 3: This is Text Field 1</Text_Field_1>
      <Text_Field_2>This is Text_Field_2</Text_Field_2>
   </Record>
   <Record>
      <Author>A2</Author>
      <Author>D1</Author>
      <Author>D2</Author>
      <Author>D3</Author>
      <Author>D33</Author>
      <Date>10/12/2019</Date>
      <Key>778899</Key>
      <Number>998822I</Number>
      <Summary>Record 1: Summary 1 Text</Summary>
      <Text_Field_1>Record 1: This is Text_Field_1</Text_Field_1>
      <Text_Field_2>This is Text_Field_2</Text_Field_2>
   </Record>
</Data>

Besides zx485's good XSLT 2.0 answer , here is an XSLT 1.0 stylesheet with a double key grouping:

<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
    <xsl:output method="xml" indent="yes"/>
    <xsl:strip-space elements="*" />
    <xsl:key name="Record-by-Key" match="Record" use="Key"/>
    <xsl:key name="Record-by-Key-child-by-name-value" match="Record/*" 
             use="concat(../Key,'+',name(),'+',.)"/>
    <xsl:template match="Data">
      <Data>
        <xsl:for-each 
             select="*[generate-id()=generate-id(key('Record-by-Key',Key)[1])]">
            <Record>
                <xsl:for-each  
                    select="key('Record-by-Key',Key)
                            /*[generate-id()
                                =generate-id(
                                    key('Record-by-Key-child-by-name-value',
                                        concat(../Key,'+',name(),'+',.))[1])]">
                    <xsl:sort select="name()"/>
                    <xsl:copy-of select="self::*[node()]"/>                
                </xsl:for-each>
            </Record>
        </xsl:for-each>  
      </Data>
    </xsl:template>
</xsl:stylesheet>

Output:

<Data>
   <Record>
      <Author>A1</Author>
      <Author>A2</Author>
      <Author>A3</Author>
      <Author>B2</Author>
      <Date>10/12/2019</Date>
      <Key>12345</Key>
      <Number>09095I</Number>
      <Summary>Record 1: Summary 1 Text</Summary>
      <Summary>Record 2: Summary 1 Text</Summary>
      <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
      <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
      <Text_Field_2>This is Text Field 2</Text_Field_2>
      <Text_Field_2>This is Text_Field_2</Text_Field_2>
   </Record>
   <Record>
      <Author>AA2</Author>
      <Author>AA1</Author>
      <Author>AA3</Author>
      <Author>BB2</Author>
      <Author>CC2</Author>
      <Author>CC3</Author>
      <Date>01/12/2020</Date>
      <Key>23456</Key>
      <Number>43095I</Number>
      <Summary>Record 1: Summary 1 Text</Summary>
      <Summary>Record 2: Summary 1 Text</Summary>
      <Summary>Record 3: Summary 1 Text</Summary>
      <Text_Field_1>Record 1: This is Text Field 1</Text_Field_1>
      <Text_Field_1>Record 2: This is Text Field 1</Text_Field_1>
      <Text_Field_1>Record 3: This is Text Field 1</Text_Field_1>
      <Text_Field_2>This is Text_Field_2</Text_Field_2>
   </Record>
   <Record>
      <Author>A2</Author>
      <Author>D1</Author>
      <Author>D2</Author>
      <Author>D3</Author>
      <Author>D33</Author>
      <Date>10/12/2019</Date>
      <Key>778899</Key>
      <Number>998822I</Number>
      <Summary>Record 1: Summary 1 Text</Summary>
      <Text_Field_1>Record 1: This is Text_Field_1</Text_Field_1>
      <Text_Field_2>This is Text_Field_2</Text_Field_2>
   </Record>
</Data>

Addendum: it's also possible to enforce children order by name...

As we already have XSLT 1 and XSLT 2 solutions, for completeness here an XSLT 3 one using xsl:merge :

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    exclude-result-prefixes="#all"
    version="3.0">

  <xsl:output indent="yes"/>

  <xsl:mode on-no-match="shallow-copy"/>

  <xsl:template match="Data">
      <xsl:copy>
          <xsl:merge>
              <xsl:merge-source select="Record">
                  <xsl:merge-key select="Key"/>
              </xsl:merge-source>
              <xsl:merge-action>
                  <xsl:copy>
                      <xsl:merge>
                          <xsl:merge-source select="current-merge-group()/*[normalize-space()]" sort-before-merge="yes">
                              <xsl:merge-key select="name()"/>
                              <xsl:merge-key select="."/>
                          </xsl:merge-source>
                          <xsl:merge-action>
                              <xsl:copy-of select="."/>
                          </xsl:merge-action>
                      </xsl:merge>
                  </xsl:copy>
              </xsl:merge-action>
          </xsl:merge>
      </xsl:copy>
  </xsl:template>

</xsl:stylesheet>

https://xsltfiddle.liberty-development.net/gWEaSv5

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM