繁体   English   中英

如何使用 xslt 减少重复元素 xml

[英]how reduce repeating element xml using xslt

我有以下 XML:

   <test-dump>
        <table-data>
            <table-max>1000</table-max>
            <table>
                <daten>XXXXXXXXXXXXXXXXXXXXXXXXXX
                </daten>
            </table>
            <table>
                <daten>
                </daten>
            </table>
            <table>
                <daten>
                </daten>
            </table>
            <table>
                <daten>
                </daten>
            </table>
            <table>
                <daten>
                </daten>
            </table>
            <table>
                <daten>
                </daten>
            </table>
            <table>
                <daten>
                </daten>
            </table>
            <table>
                <daten>
                </daten>
            </table>
            <table>
                <daten>
                </daten>
            </table>
        </table-data>    
    <test-dump> 

我想使用 XSLT 生成以下输出

转储文件是 6 G,我必须减少它,同一级别中的每个组都具有相同的值,应该第一次写入 2 次,第二次写入值,从哪里到哪里与上面的值相同,如此输出

table-max = 1000
table(001)  = 
    daten = XXXXXXXXXXXXXXXXXXXXXXXXXX
table(002)  = 
    daten = 
table(003 - 009)  = as above

谢谢你的帮助。

带有 XSLT 3 和流的 Saxon EE 应该能够处理这么大的文件

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    version="3.0"
    xmlns:xs="http://www.w3.org/2001/XMLSchema"
    exclude-result-prefixes="#all"
    expand-text="yes">
    
    <xsl:output method="text"/>
    
    <xsl:mode on-no-match="shallow-skip" streamable="yes" use-accumulators="#all"/>
    
    <xsl:accumulator name="table-no" as="xs:integer" streamable="yes" initial-value="0">
        <xsl:accumulator-rule match="table-data" select="0"/>
        <xsl:accumulator-rule match="table-data/table" select="$value + 1"/>
    </xsl:accumulator>
    
    <xsl:accumulator name="max" as="xs:integer?" streamable="yes" initial-value="()">
        <xsl:accumulator-rule match="table-data/table-max/text()" select="xs:integer(.)"/>
    </xsl:accumulator>
    
    <xsl:template match="table-data">
        <xsl:for-each-group select="table/daten/text()" group-adjacent=".">
            <xsl:if test="position() eq 1">
                <xsl:text>table-max = {accumulator-before('max')}&#10;</xsl:text>
            </xsl:if>
            <xsl:iterate select="current-group()">
                <xsl:param name="table-no" as="xs:integer" select="0"/>
                <xsl:param name="pos" as="xs:integer" select="1"/>
                <xsl:on-completion select="if ($pos gt 1) then ' - ' || format-integer($table-no, '001') || ') = as above&#10;' else ()"/>
                <xsl:choose>
                    <xsl:when test="position() eq 1">
                        <xsl:text>table({format-integer(accumulator-before('table-no'), '001')}) = &#10;    daten = {current-grouping-key()}&#10;</xsl:text>
                    </xsl:when>
                    <xsl:when test="position() eq 2">table({format-integer(accumulator-before('table-no'), '001')}</xsl:when>
                </xsl:choose>
                <xsl:next-iteration>
                    <xsl:with-param name="table-no" select="accumulator-before('table-no')"/>
                    <xsl:with-param name="pos" select="position()"/>
                </xsl:next-iteration>
            </xsl:iterate>
        </xsl:for-each-group>
    </xsl:template>
    
</xsl:stylesheet>

如果您想生成 XML 输出,您可以将上述内容更改为

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="#all" expand-text="yes">

    <xsl:output method="xml" indent="yes"/>

    <xsl:mode on-no-match="shallow-copy" streamable="yes" use-accumulators="#all"/>

    <xsl:accumulator name="table-no" as="xs:integer" streamable="yes" initial-value="0">
        <xsl:accumulator-rule match="table-data" select="0"/>
        <xsl:accumulator-rule match="table-data/table" select="$value + 1"/>
    </xsl:accumulator>

    <xsl:accumulator name="max" as="xs:integer?" streamable="yes" initial-value="()">
        <xsl:accumulator-rule match="table-data/table-max/text()" select="xs:integer(.)"/>
    </xsl:accumulator>

    <xsl:template match="table-data">
        <xsl:copy>
            <xsl:for-each-group select="table/daten/text()" group-adjacent=".">
                <xsl:if test="position() eq 1">
                    <table-max>{accumulator-before('max')}</table-max>
                </xsl:if>
                <xsl:iterate select="current-group()">
                    <xsl:param name="table-start-no" as="xs:integer" select="0"/>
                    <xsl:param name="table-end-no" as="xs:integer" select="0"/>
                    <xsl:param name="pos" as="xs:integer" select="1"/>
                    <xsl:on-completion>
                        <xsl:if test="$pos gt 1">
                            <table range="{format-integer($table-start-no, '001')} - {format-integer($table-end-no, '001')}">
                                <daten>as before</daten>
                            </table>
                        </xsl:if>
                    </xsl:on-completion>
                    <xsl:if test="position() eq 1">
                            <table no="{format-integer(accumulator-before('table-no'), '001')}">
                                <daten>{current-grouping-key()}</daten>
                            </table>
                    </xsl:if>
                    <xsl:next-iteration>
                        <xsl:with-param name="table-start-no" select="if (position() eq 2) then accumulator-before('table-no') else $table-start-no"/>
                        <xsl:with-param name="table-end-no" select="accumulator-before('table-no')"/>
                        <xsl:with-param name="pos" select="position()"/>
                    </xsl:next-iteration>
                </xsl:iterate>
            </xsl:for-each-group>
        </xsl:copy>
    </xsl:template>

</xsl:stylesheet>

没有流的代码更紧凑,但我不知道它是否适用于 6 GB 输入:

<?xml version="1.0" encoding="utf-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" exclude-result-prefixes="#all" expand-text="yes">

    <xsl:output method="xml" indent="yes"/>

    <xsl:mode on-no-match="shallow-copy" use-accumulators="#all"/>

    <xsl:accumulator name="table-no" as="xs:integer" initial-value="0">
        <xsl:accumulator-rule match="table-data" select="0"/>
        <xsl:accumulator-rule match="table-data/table" select="$value + 1"/>
    </xsl:accumulator>

    <xsl:template match="table-data">
        <xsl:copy>
            <xsl:apply-templates select="table-max"/>
            <xsl:for-each-group select="table" group-adjacent="daten">
                <table no="{format-integer(accumulator-before('table-no'), '001')}">
                    <daten>{current-grouping-key()}</daten>
                </table>
                <xsl:if test="tail(current-group())">
                    <table range="{current-group()[2] ! accumulator-before('table-no') => format-integer('001')} - {current-group()[last()] ! accumulator-before('table-no') => format-integer('001')}">
                        <daten>as before</daten>
                    </table>
                </xsl:if>
            </xsl:for-each-group>
        </xsl:copy>
    </xsl:template>

</xsl:stylesheet>

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM