簡體   English   中英

Scala:將 xml 數據框轉換為 csv 文件

[英]Scala: Convert xml dataframe to csv file

使用 Scala 和 IntelliJ,

我有一個 xml 文件,並將其寫入數據幀,如下所示:

var dftest = spark.read.format("com.databricks.spark.xml").option("rowTag","transferBatch").load(file)

模式很長,有很多序列元素節點。 某些列還具有不同的數據類型。

root
 |-- accountingInfo: struct (nullable = true)
 |    |-- currencyConversion: struct (nullable = true)
 |    |    |-- ExchangeRateDefinition: struct (nullable = true)
 |    |    |    |-- exchangeRate: long (nullable = true)
 |    |    |    |-- exchangeRateCode: long (nullable = true)
 |    |    |    |-- numberOfDecimalPlaces: long (nullable = true)
 |    |-- localCurrency: string (nullable = true)
 |    |-- tapDecimalPlaces: long (nullable = true)
 |-- auditControlInfo: struct (nullable = true)
 |    |-- callEventDetailsCount: long (nullable = true)
 |    |-- earliestCallTimeStamp: struct (nullable = true)
 |    |    |-- localTimeStamp: string (nullable = true)
 |    |    |-- utcTimeOffset: string (nullable = true)
 |    |-- latestCallTimeStamp: struct (nullable = true)
 |    |    |-- localTimeStamp: string (nullable = true)
 |    |    |-- utcTimeOffset: string (nullable = true)
 |    |-- operatorSpecInformation: struct (nullable = true)
 |    |    |-- OperatorSpecInformation: array (nullable = true)
 |    |    |    |-- element: string (containsNull = true)
 |    |-- totalChargeValueList: struct (nullable = true)
 |    |    |-- TotalChargeValue: struct (nullable = true)
 |    |    |    |-- chargeType: string (nullable = true)
 |    |    |    |-- totalCharge: long (nullable = true)
 |    |-- totalDiscountValue: long (nullable = true)
 |    |-- totalTaxValue: long (nullable = true)
 |-- batchControlInfo: struct (nullable = true)
 |    |-- fileAvailableTimeStamp: struct (nullable = true)
 |    |    |-- localTimeStamp: string (nullable = true)
 |    |    |-- utcTimeOffset: string (nullable = true)
 |    |-- fileCreationTimeStamp: struct (nullable = true)
 |    |    |-- localTimeStamp: string (nullable = true)
 |    |    |-- utcTimeOffset: string (nullable = true)
 |    |-- fileSequenceNumber: string (nullable = true)
 |    |-- recipient: string (nullable = true)
 |    |-- releaseVersionNumber: long (nullable = true)
 |    |-- sender: string (nullable = true)
 |    |-- specificationVersionNumber: long (nullable = true)
 |    |-- transferCutOffTimeStamp: struct (nullable = true)
 |    |    |-- localTimeStamp: string (nullable = true)
 |    |    |-- utcTimeOffset: string (nullable = true)
 |-- callEventDetails: struct (nullable = true)
 |    |-- gprsCall: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- equipmentInformation: struct (nullable = true)
 |    |    |    |    |-- imeiOrEsn: struct (nullable = true)
 |    |    |    |    |    |-- imei: string (nullable = true)
 |    |    |    |-- gprsBasicCallInformation: struct (nullable = true)
 |    |    |    |    |-- callEventStartTimeStamp: struct (nullable = true)
 |    |    |    |    |    |-- localTimeStamp: string (nullable = true)
 |    |    |    |    |    |-- utcTimeOffsetCode: long (nullable = true)
 |    |    |    |    |-- chargeableSubscriber: struct (nullable = true)
 |    |    |    |    |    |-- chargeableSubscriber: struct (nullable = true)
 |    |    |    |    |    |    |-- simChargeableSubscriber: struct (nullable = true)
 |    |    |    |    |    |    |    |-- imsi: string (nullable = true)
 |    |    |    |    |    |    |    |-- msisdn: string (nullable = true)
 |    |    |    |    |    |-- pdpAddress: string (nullable = true)
 |    |    |    |    |    |-- pdpType: long (nullable = true)
 |    |    |    |    |-- chargingId: string (nullable = true)
 |    |    |    |    |-- gprsDestination: struct (nullable = true)
 |    |    |    |    |    |-- accessPointNameNI: string (nullable = true)
 |    |    |    |    |    |-- accessPointNameOI: string (nullable = true)
 |    |    |    |    |-- totalCallEventDuration: long (nullable = true)
 |    |    |    |-- gprsLocationInformation: struct (nullable = true)
 |    |    |    |    |-- gprsNetworkLocation: struct (nullable = true)
 |    |    |    |    |    |-- cellId: long (nullable = true)
 |    |    |    |    |    |-- locationArea: long (nullable = true)
 |    |    |    |    |    |-- recEntity: struct (nullable = true)
 |    |    |    |    |    |    |-- RecEntityCode: array (nullable = true)
 |    |    |    |    |    |    |    |-- element: long (containsNull = true)
 |    |    |    |-- gprsServiceUsed: struct (nullable = true)
 |    |    |    |    |-- chargeInformationList: struct (nullable = true)
 |    |    |    |    |    |-- ChargeInformation: struct (nullable = true)
 |    |    |    |    |    |    |-- chargeDetailList: struct (nullable = true)
 |    |    |    |    |    |    |    |-- ChargeDetail: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- charge: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- chargeType: string (nullable = true)
 |    |    |    |    |    |    |    |    |-- chargeableUnits: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- chargedUnits: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- dayCategory: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- timeBand: long (nullable = true)
 |    |    |    |    |    |    |-- chargedItem: long (nullable = true)
 |    |    |    |    |    |    |-- exchangeRateCode: long (nullable = true)
 |    |    |    |    |-- gprsServiceUsageList: struct (nullable = true)
 |    |    |    |    |    |-- GprsServiceUsage: struct (nullable = true)
 |    |    |    |    |    |    |-- dataVolumeIncoming: long (nullable = true)
 |    |    |    |    |    |    |-- dataVolumeOutgoing: long (nullable = true)
 |    |    |    |-- operatorSpecInformation: struct (nullable = true)
 |    |    |    |    |-- OperatorSpecInformation: array (nullable = true)
 |    |    |    |    |    |-- element: string (containsNull = true)
 |    |    |    |-- typeOfControllingNode: long (nullable = true)
 |    |-- mobileOriginatedCall: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- basicCallInformation: struct (nullable = true)
 |    |    |    |    |-- callEventStartTimeStamp: struct (nullable = true)
 |    |    |    |    |    |-- localTimeStamp: string (nullable = true)
 |    |    |    |    |    |-- utcTimeOffsetCode: long (nullable = true)
 |    |    |    |    |-- chargeableSubscriber: struct (nullable = true)
 |    |    |    |    |    |-- simChargeableSubscriber: struct (nullable = true)
 |    |    |    |    |    |    |-- imsi: string (nullable = true)
 |    |    |    |    |    |    |-- msisdn: string (nullable = true)
 |    |    |    |    |-- destination: struct (nullable = true)
 |    |    |    |    |    |-- calledNumber: string (nullable = true)
 |    |    |    |    |-- totalCallEventDuration: long (nullable = true)
 |    |    |    |-- basicServiceUsedList: struct (nullable = true)
 |    |    |    |    |-- BasicServiceUsed: struct (nullable = true)
 |    |    |    |    |    |-- basicService: struct (nullable = true)
 |    |    |    |    |    |    |-- serviceCode: struct (nullable = true)
 |    |    |    |    |    |    |    |-- teleServiceCode: string (nullable = true)
 |    |    |    |    |    |-- chargeInformationList: struct (nullable = true)
 |    |    |    |    |    |    |-- ChargeInformation: struct (nullable = true)
 |    |    |    |    |    |    |    |-- callTypeGroup: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- callTypeLevel1: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- callTypeLevel2: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- callTypeLevel3: long (nullable = true)
 |    |    |    |    |    |    |    |    |-- calledCountryCode: string (nullable = true)
 |    |    |    |    |    |    |    |-- chargeDetailList: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- ChargeDetail: struct (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- charge: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- chargeType: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- chargeableUnits: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- chargedUnits: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- dayCategory: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- timeBand: long (nullable = true)
 |    |    |    |    |    |    |    |-- chargedItem: long (nullable = true)
 |    |    |    |    |    |    |    |-- exchangeRateCode: long (nullable = true)
 |    |    |    |-- equipmentInformation: struct (nullable = true)
 |    |    |    |    |-- imeiOrEsn: struct (nullable = true)
 |    |    |    |    |    |-- imei: string (nullable = true)
 |    |    |    |-- locationInformation: struct (nullable = true)
 |    |    |    |    |-- networkLocation: struct (nullable = true)
 |    |    |    |    |    |-- callReference: string (nullable = true)
 |    |    |    |    |    |-- cellId: long (nullable = true)
 |    |    |    |    |    |-- locationArea: long (nullable = true)
 |    |    |    |    |    |-- recEntityCode: long (nullable = true)
 |    |    |    |-- operatorSpecInformation: struct (nullable = true)
 |    |    |    |    |-- OperatorSpecInformation: array (nullable = true)
 |    |    |    |    |    |-- element: string (containsNull = true)
 |    |-- mobileTerminatedCall: array (nullable = true)
 |    |    |-- element: struct (containsNull = true)
 |    |    |    |-- basicCallInformation: struct (nullable = true)
 |    |    |    |    |-- callEventStartTimeStamp: struct (nullable = true)
 |    |    |    |    |    |-- localTimeStamp: string (nullable = true)
 |    |    |    |    |    |-- utcTimeOffsetCode: long (nullable = true)
 |    |    |    |    |-- callOriginator: struct (nullable = true)
 |    |    |    |    |    |-- callingNumber: string (nullable = true)
 |    |    |    |    |-- chargeableSubscriber: struct (nullable = true)
 |    |    |    |    |    |-- simChargeableSubscriber: struct (nullable = true)
 |    |    |    |    |    |    |-- imsi: string (nullable = true)
 |    |    |    |    |    |    |-- msisdn: string (nullable = true)
 |    |    |    |    |-- totalCallEventDuration: long (nullable = true)
 |    |    |    |-- basicServiceUsedList: struct (nullable = true)
 |    |    |    |    |-- BasicServiceUsed: struct (nullable = true)
 |    |    |    |    |    |-- basicService: struct (nullable = true)
 |    |    |    |    |    |    |-- serviceCode: struct (nullable = true)
 |    |    |    |    |    |    |    |-- teleServiceCode: string (nullable = true)
 |    |    |    |    |    |-- chargeInformationList: struct (nullable = true)
 |    |    |    |    |    |    |-- ChargeInformation: struct (nullable = true)
 |    |    |    |    |    |    |    |-- chargeDetailList: struct (nullable = true)
 |    |    |    |    |    |    |    |    |-- ChargeDetail: struct (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- charge: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- chargeType: string (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- chargeableUnits: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- chargedUnits: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- dayCategory: long (nullable = true)
 |    |    |    |    |    |    |    |    |    |-- timeBand: long (nullable = true)
 |    |    |    |    |    |    |    |-- chargedItem: long (nullable = true)
 |    |    |    |    |    |    |    |-- exchangeRateCode: long (nullable = true)
 |    |    |    |-- equipmentInformation: struct (nullable = true)
 |    |    |    |    |-- imeiOrEsn: struct (nullable = true)
 |    |    |    |    |    |-- imei: string (nullable = true)
 |    |    |    |-- locationInformation: struct (nullable = true)
 |    |    |    |    |-- networkLocation: struct (nullable = true)
 |    |    |    |    |    |-- callReference: string (nullable = true)
 |    |    |    |    |    |-- cellId: long (nullable = true)
 |    |    |    |    |    |-- locationArea: long (nullable = true)
 |    |    |    |    |    |-- recEntityCode: long (nullable = true)
 |    |    |    |-- operatorSpecInformation: struct (nullable = true)
 |    |    |    |    |-- OperatorSpecInformation: array (nullable = true)
 |    |    |    |    |    |-- element: string (containsNull = true)
 |-- networkInfo: struct (nullable = true)
 |    |-- calledNumAnalysis: struct (nullable = true)
 |    |    |-- CalledNumAnalysis: struct (nullable = true)
 |    |    |    |-- calledNumAnalysisCode: long (nullable = true)
 |    |    |    |-- countryCodeTable: struct (nullable = true)
 |    |    |    |    |-- CountryCode: string (nullable = true)
 |    |    |    |-- iacTable: struct (nullable = true)
 |    |    |    |    |-- Iac: string (nullable = true)
 |    |-- networkType: long (nullable = true)
 |    |-- recEntityInfo: struct (nullable = true)
 |    |    |-- RecEntityDefinition: array (nullable = true)
 |    |    |    |-- element: struct (containsNull = true)
 |    |    |    |    |-- recEntityCode: long (nullable = true)
 |    |    |    |    |-- recEntityId: struct (nullable = true)
 |    |    |    |    |    |-- gsnaddress: struct (nullable = true)
 |    |    |    |    |    |    |-- iPTextV4Address: string (nullable = true)
 |    |    |    |    |    |-- mscId: string (nullable = true)
 |    |    |    |    |    |-- msisdn: string (nullable = true)
 |    |    |    |    |-- recEntityType: long (nullable = true)
 |    |-- utcTimeOffsetInfo: struct (nullable = true)
 |    |    |-- UtcTimeOffsetDefinition: struct (nullable = true)
 |    |    |    |-- utcTimeOffset: string (nullable = true)
 |    |    |    |-- utcTimeOffsetCode: long (nullable = true)

當我想看看在數據幀中的元素,它顯示在這樣一個表:

我不確定如何將此數據幀寫入 csv 文件。

有什么建議嗎? 謝謝

請查看 Databricks 中的 Spark-csv 庫:

這是一個簡單的例子:

mydf.write.
    format("com.databricks.spark.csv").
    option("header", "true").
    save("out.csv")

你可以在這里找到它: https : //mvnrepository.com/artifact/com.databricks/spark-csv_2.10

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM