繁体   English   中英

无法使用xslt将所需的数据从xml获取到CSV

[英]Unable to get the required data from xml to csv using xslt

我正在尝试将数据从XML加载到csv,但是我缺少一些节点数据。 下面是我的Java代码,

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.ParseException;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.Result;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stax.StAXSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;


public class XML2CSV {

public static int transform(InputStream is, OutputStream os, Transformer transformer, QName name) throws XMLStreamException, TransformerException {
    long time1 = System.nanoTime();
    // Open input & output files
    XMLInputFactory factory = XMLInputFactory.newInstance();
    factory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, true);
    factory.setProperty(XMLInputFactory.IS_VALIDATING, false);
    XMLStreamReader reader = factory.createXMLStreamReader(is);
    // In case you want to check which implementation is used.
    // Woodstox is a bit faster, but not worth adding extra dependency.
    Result result = new StreamResult(os);
    transformer.transform(new StAXSource(reader), result);

    int count = 0;
    while (reader.hasNext()) {
        if (reader.getEventType() == XMLStreamReader.START_ELEMENT && name.equals(reader.getName())) {
            // System.out.println("FOUND " + count);
            count++;

            if (count % 1000 == 0) {
                long time2 = System.nanoTime();
                double ms = (time2 - time1) / 1000000.0;
                System.out.format("Time=%.2fms Rows=%d%n", ms, count);
            }
        } else if (reader.getEventType() == XMLStreamReader.START_ELEMENT) {
//                    System.out.println("Start "+reader.getName()+" != "+name);
        }
        reader.next();
    }
    long time2 = System.nanoTime();
    double ms = (time2 - time1) / 1000000.0;
    System.out.format("Total Time=%.2fms Total rows=%d%n", ms, count);
    return count;
}

public static void main(String arg[]) throws Exception {
    // Parse command line options
    File xsltFile;
    File inputFile;
    File outputFile;
    String tagName;
    String namespace;
    try {
        String xsltFileName = parse("-x", arg, "XSLT sheet", true);
        String inputFileName = parse("-f", arg, "Input file", true);
        String outputFileName = parse("-o", arg, "Output file", true);
        tagName = parse("-t", arg, "Tag name", true);
        namespace = parse("-n", arg, "Tag Namespace URL", false);
        xsltFile = new File(xsltFileName);
        inputFile = new File(inputFileName);
        outputFile = new File(outputFileName);
    } catch (ParseException e) {
        System.err.println(e.getMessage());
        System.err.println("Syntax: XML2CSV -f <input file> -o <output file> -x <XSLT stylesheet> -t <Tag name> [-n <namespace URL>]");
        System.err.println("Will split given file on given tag with given namespace.");
        System.err.println("Will process contents of each tag using given XSLT.");
        System.exit(1);
        return;
    }
    if (!xsltFile.exists()) {
        System.err.println("File not found " + xsltFile.getAbsolutePath());
        System.exit(1);
    }
    if (!inputFile.exists()) {
        System.err.println("File not found " + inputFile.getAbsolutePath());
        System.exit(1);
    }

    // Open XSLT stylesheet
    StreamSource stylesource = new StreamSource(xsltFile);
    Transformer transformer = TransformerFactory.newInstance().newTransformer(stylesource);

    // Create XML tag name which is used to break up XML into rows
    final QName name;
    if (namespace != null) {
        name = new QName(namespace, tagName);
    } else {
        name = new QName(tagName);
    }
    System.out.println("Will look for tag " + name + " in namespace " + namespace);

    FileOutputStream fos = null;
    FileInputStream fis = null;
    try {
        // Open input & output files
        fis = new FileInputStream(inputFile);
        fos = new FileOutputStream(outputFile);
        transform(fis, fos, transformer, name);
    } finally {
        if (fos != null) {
            fos.close();
        }
        if (fis != null) {
            fis.close();
        }
    }
}

// Teo - inefficient, but who cares
private static String parse(String option, String[] arg, String desc, boolean required) throws ParseException {
    for (int i = 0; i < arg.length; i++) {
        if (option.equals(arg[i])) {
            if (i + 1 < arg.length) {
                String value = arg[i + 1].trim();
                return value;
            } else {
                throw new ParseException(option + " must be followed by an argument", i);
            }
        }
    }
    if (required) {
        throw new ParseException(desc + " is required", 0);
    } else {
        return null;
    }
 }
}

这是我的XSL文件,

<?xml version="1.0" encoding="UTF-8"?>
  <xsl:stylesheet version="3.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" 
                          xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
                          xmlns:x="http://www.fixprotocol.org/FIXML-5-0-SP2" 
                          xsi:schemaLocation="http://www.fixprotocol.org/FIXML-5-0-SP2 fixml-main-5-0-SP2_.xsd"
                          xmlns:math="http://www.w3.org/2005/xpath-functions/math"
                          exclude-result-prefixes="xs math">


<xsl:accumulator name="MktSegID" streamable="yes" as="xs:string?" initial-value="()">
  <xsl:accumulator-rule match="x:Batch/x:MktDef" select="string(@MktSegID)"/>
   </xsl:accumulator>

 <xsl:mode streamable="yes" use-accumulators="MktSegID"/>
 <xsl:output method="text" encoding="utf-8" />

 <xsl:param name="delim" select="','" />
 <xsl:param name="quote" select="'&quot;'" />
 <xsl:param name="break" select="'&#xA;'" />


 <xsl:template match="/">
 <xsl:text>PriSetPx,TxnTm,ID,Src,EventTyp,Dt,Exch,MktSegID </xsl:text>
 <xsl:text>&#xA;</xsl:text>
 <xsl:apply-templates select="descendant::x:Evnt"/>
 </xsl:template>

<xsl:template match="x:Evnt">
<xsl:value-of select="concat($quote, normalize-space(../../@PriSetPx), $quote)" /><xsl:value-of select="$delim" />
<xsl:value-of select="concat($quote, normalize-space(../../@TxnTm), $quote)" /><xsl:value-of select="$delim" />
 <xsl:value-of select="concat($quote, normalize-space(../@ID), $quote)" /><xsl:value-of select="$delim" />
 <xsl:value-of select="concat($quote, normalize-space(../@Src), $quote)" /><xsl:value-of select="$delim" />

 <xsl:value-of select="concat($quote, normalize-space(@EventTyp), $quote)" /><xsl:value-of select="$delim" />
 <xsl:value-of select="concat($quote, normalize-space(@Dt), $quote)" /><xsl:value-of select="$delim" />

 <xsl:value-of select="concat($quote, normalize-space(../@Exch), $quote)" /><xsl:value-of select="$delim" />
 <xsl:value-of select="concat($quote, accumulator-before('MktSegID'), $quote)" /><xsl:value-of select="$delim" />
<xsl:value-of select="$break" />
</xsl:template>
</xsl:stylesheet>

这是我的示例xml,

<?xml version="1.0" encoding="ISO-8859-1"?>
<FIXML xsi:schemaLocation="http://www.fixprotocol.org/FIXML-5-0-SP2 fixml-main-5-0-SP2_.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.fixprotocol.org/FIXML-5-0-SP2" s="2012-04-23" v="FIX.5.0SP2">
<Batch ID="RPTTA111PUBLI20170509">
    ************ This is one set of loop************
           <MktDef MktID="XEUR" MktSegID="19699" EfctvBizDt="2017-05-11" NxtEfctvBizDt="2017-05-15" MktSeg="FCEA" MarketSegmentDesc="FUT ON EUR AUD" Sym="DE000A160WW0" ParentMktSegmID="FCUR" Ccy="AUD" MktSegStat="10" USFirmFlag="Y" PartID="1">
        <Undly Exch="XREU" Sym="CEA" ID="EU0009654748" Src="4" PrevClsPx="1.47"/>
    </MktDef>

    <SecDef PriSetPx="68708.52">
        <Instrmt ID="221096" Src="M" SecTyp="FUT" Status="1" Exch="XLDX" ProdCmplx="1" CFI="FFMCSX" MatDt="2024-12-17" MMY="202412" Mult="1" ValMeth="FUT" SettlMeth="C" PxPrcsn="2" MinPxIncr="0.01" MinPxIncrAmt="0.01">
            <Evnt EventTyp="7" Dt="2024-12-17"/>
        </Instrmt>
    </SecDef>
            <SecDef>
             .
             .
             .
            </SecDef>
            <SecDef>
             .
             .
             .
            </SecDef>
            <SecDef>
             .
             .
             .
            </SecDef>
   ************ This is one set of loop************
  ############ This will continue n number of times having millions of line###########        
</Batch>
</FIXML>

输出应该像这样,作为带有数据的列,但是缺少@Exch和@MktSegId数据,

PriSetPx TxnTm ID Src EventTyp Dt Exch MktSegID 

请帮助我了解XSL代码中的错误地方以及如何获取列数据。

谢谢!

尝试设置一个累加器

<xsl:accumulator name="MktSegID" streamable="yes" as="xs:string?" initial-value="()">
  <xsl:accumulator-rule match="x:Batch/x:MktDef" select="string(@MktSegID)"/>
</xsl:accumulator>

<xsl:mode streamable="yes" use-accumulators="MktSegID"/>xsl:mode ,然后代替

<xsl:value-of select="concat($quote, normalize-space(../../../@MktSegID), $quote)" />

采用

<xsl:value-of select="concat($quote, accumulator-before('MktSegID'), $quote)" />

作为一个评论已经说了,它将代替<xsl:value-of select="concat($quote, normalize-space(../../@Exch), $quote)" />您更希望<xsl:value-of select="concat($quote, normalize-space(../@Exch), $quote)" />

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM