[英]Scala: Convert xml dataframe to csv file
使用 Scala 和 IntelliJ,
我有一個 xml 文件,並將其寫入數據幀,如下所示:
var dftest = spark.read.format("com.databricks.spark.xml").option("rowTag","transferBatch").load(file)
模式很長,有很多序列元素節點。 某些列還具有不同的數據類型。
root
|-- accountingInfo: struct (nullable = true)
| |-- currencyConversion: struct (nullable = true)
| | |-- ExchangeRateDefinition: struct (nullable = true)
| | | |-- exchangeRate: long (nullable = true)
| | | |-- exchangeRateCode: long (nullable = true)
| | | |-- numberOfDecimalPlaces: long (nullable = true)
| |-- localCurrency: string (nullable = true)
| |-- tapDecimalPlaces: long (nullable = true)
|-- auditControlInfo: struct (nullable = true)
| |-- callEventDetailsCount: long (nullable = true)
| |-- earliestCallTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
| |-- latestCallTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
| |-- operatorSpecInformation: struct (nullable = true)
| | |-- OperatorSpecInformation: array (nullable = true)
| | | |-- element: string (containsNull = true)
| |-- totalChargeValueList: struct (nullable = true)
| | |-- TotalChargeValue: struct (nullable = true)
| | | |-- chargeType: string (nullable = true)
| | | |-- totalCharge: long (nullable = true)
| |-- totalDiscountValue: long (nullable = true)
| |-- totalTaxValue: long (nullable = true)
|-- batchControlInfo: struct (nullable = true)
| |-- fileAvailableTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
| |-- fileCreationTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
| |-- fileSequenceNumber: string (nullable = true)
| |-- recipient: string (nullable = true)
| |-- releaseVersionNumber: long (nullable = true)
| |-- sender: string (nullable = true)
| |-- specificationVersionNumber: long (nullable = true)
| |-- transferCutOffTimeStamp: struct (nullable = true)
| | |-- localTimeStamp: string (nullable = true)
| | |-- utcTimeOffset: string (nullable = true)
|-- callEventDetails: struct (nullable = true)
| |-- gprsCall: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- equipmentInformation: struct (nullable = true)
| | | | |-- imeiOrEsn: struct (nullable = true)
| | | | | |-- imei: string (nullable = true)
| | | |-- gprsBasicCallInformation: struct (nullable = true)
| | | | |-- callEventStartTimeStamp: struct (nullable = true)
| | | | | |-- localTimeStamp: string (nullable = true)
| | | | | |-- utcTimeOffsetCode: long (nullable = true)
| | | | |-- chargeableSubscriber: struct (nullable = true)
| | | | | |-- chargeableSubscriber: struct (nullable = true)
| | | | | | |-- simChargeableSubscriber: struct (nullable = true)
| | | | | | | |-- imsi: string (nullable = true)
| | | | | | | |-- msisdn: string (nullable = true)
| | | | | |-- pdpAddress: string (nullable = true)
| | | | | |-- pdpType: long (nullable = true)
| | | | |-- chargingId: string (nullable = true)
| | | | |-- gprsDestination: struct (nullable = true)
| | | | | |-- accessPointNameNI: string (nullable = true)
| | | | | |-- accessPointNameOI: string (nullable = true)
| | | | |-- totalCallEventDuration: long (nullable = true)
| | | |-- gprsLocationInformation: struct (nullable = true)
| | | | |-- gprsNetworkLocation: struct (nullable = true)
| | | | | |-- cellId: long (nullable = true)
| | | | | |-- locationArea: long (nullable = true)
| | | | | |-- recEntity: struct (nullable = true)
| | | | | | |-- RecEntityCode: array (nullable = true)
| | | | | | | |-- element: long (containsNull = true)
| | | |-- gprsServiceUsed: struct (nullable = true)
| | | | |-- chargeInformationList: struct (nullable = true)
| | | | | |-- ChargeInformation: struct (nullable = true)
| | | | | | |-- chargeDetailList: struct (nullable = true)
| | | | | | | |-- ChargeDetail: struct (nullable = true)
| | | | | | | | |-- charge: long (nullable = true)
| | | | | | | | |-- chargeType: string (nullable = true)
| | | | | | | | |-- chargeableUnits: long (nullable = true)
| | | | | | | | |-- chargedUnits: long (nullable = true)
| | | | | | | | |-- dayCategory: long (nullable = true)
| | | | | | | | |-- timeBand: long (nullable = true)
| | | | | | |-- chargedItem: long (nullable = true)
| | | | | | |-- exchangeRateCode: long (nullable = true)
| | | | |-- gprsServiceUsageList: struct (nullable = true)
| | | | | |-- GprsServiceUsage: struct (nullable = true)
| | | | | | |-- dataVolumeIncoming: long (nullable = true)
| | | | | | |-- dataVolumeOutgoing: long (nullable = true)
| | | |-- operatorSpecInformation: struct (nullable = true)
| | | | |-- OperatorSpecInformation: array (nullable = true)
| | | | | |-- element: string (containsNull = true)
| | | |-- typeOfControllingNode: long (nullable = true)
| |-- mobileOriginatedCall: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- basicCallInformation: struct (nullable = true)
| | | | |-- callEventStartTimeStamp: struct (nullable = true)
| | | | | |-- localTimeStamp: string (nullable = true)
| | | | | |-- utcTimeOffsetCode: long (nullable = true)
| | | | |-- chargeableSubscriber: struct (nullable = true)
| | | | | |-- simChargeableSubscriber: struct (nullable = true)
| | | | | | |-- imsi: string (nullable = true)
| | | | | | |-- msisdn: string (nullable = true)
| | | | |-- destination: struct (nullable = true)
| | | | | |-- calledNumber: string (nullable = true)
| | | | |-- totalCallEventDuration: long (nullable = true)
| | | |-- basicServiceUsedList: struct (nullable = true)
| | | | |-- BasicServiceUsed: struct (nullable = true)
| | | | | |-- basicService: struct (nullable = true)
| | | | | | |-- serviceCode: struct (nullable = true)
| | | | | | | |-- teleServiceCode: string (nullable = true)
| | | | | |-- chargeInformationList: struct (nullable = true)
| | | | | | |-- ChargeInformation: struct (nullable = true)
| | | | | | | |-- callTypeGroup: struct (nullable = true)
| | | | | | | | |-- callTypeLevel1: long (nullable = true)
| | | | | | | | |-- callTypeLevel2: long (nullable = true)
| | | | | | | | |-- callTypeLevel3: long (nullable = true)
| | | | | | | | |-- calledCountryCode: string (nullable = true)
| | | | | | | |-- chargeDetailList: struct (nullable = true)
| | | | | | | | |-- ChargeDetail: struct (nullable = true)
| | | | | | | | | |-- charge: long (nullable = true)
| | | | | | | | | |-- chargeType: string (nullable = true)
| | | | | | | | | |-- chargeableUnits: long (nullable = true)
| | | | | | | | | |-- chargedUnits: long (nullable = true)
| | | | | | | | | |-- dayCategory: long (nullable = true)
| | | | | | | | | |-- timeBand: long (nullable = true)
| | | | | | | |-- chargedItem: long (nullable = true)
| | | | | | | |-- exchangeRateCode: long (nullable = true)
| | | |-- equipmentInformation: struct (nullable = true)
| | | | |-- imeiOrEsn: struct (nullable = true)
| | | | | |-- imei: string (nullable = true)
| | | |-- locationInformation: struct (nullable = true)
| | | | |-- networkLocation: struct (nullable = true)
| | | | | |-- callReference: string (nullable = true)
| | | | | |-- cellId: long (nullable = true)
| | | | | |-- locationArea: long (nullable = true)
| | | | | |-- recEntityCode: long (nullable = true)
| | | |-- operatorSpecInformation: struct (nullable = true)
| | | | |-- OperatorSpecInformation: array (nullable = true)
| | | | | |-- element: string (containsNull = true)
| |-- mobileTerminatedCall: array (nullable = true)
| | |-- element: struct (containsNull = true)
| | | |-- basicCallInformation: struct (nullable = true)
| | | | |-- callEventStartTimeStamp: struct (nullable = true)
| | | | | |-- localTimeStamp: string (nullable = true)
| | | | | |-- utcTimeOffsetCode: long (nullable = true)
| | | | |-- callOriginator: struct (nullable = true)
| | | | | |-- callingNumber: string (nullable = true)
| | | | |-- chargeableSubscriber: struct (nullable = true)
| | | | | |-- simChargeableSubscriber: struct (nullable = true)
| | | | | | |-- imsi: string (nullable = true)
| | | | | | |-- msisdn: string (nullable = true)
| | | | |-- totalCallEventDuration: long (nullable = true)
| | | |-- basicServiceUsedList: struct (nullable = true)
| | | | |-- BasicServiceUsed: struct (nullable = true)
| | | | | |-- basicService: struct (nullable = true)
| | | | | | |-- serviceCode: struct (nullable = true)
| | | | | | | |-- teleServiceCode: string (nullable = true)
| | | | | |-- chargeInformationList: struct (nullable = true)
| | | | | | |-- ChargeInformation: struct (nullable = true)
| | | | | | | |-- chargeDetailList: struct (nullable = true)
| | | | | | | | |-- ChargeDetail: struct (nullable = true)
| | | | | | | | | |-- charge: long (nullable = true)
| | | | | | | | | |-- chargeType: string (nullable = true)
| | | | | | | | | |-- chargeableUnits: long (nullable = true)
| | | | | | | | | |-- chargedUnits: long (nullable = true)
| | | | | | | | | |-- dayCategory: long (nullable = true)
| | | | | | | | | |-- timeBand: long (nullable = true)
| | | | | | | |-- chargedItem: long (nullable = true)
| | | | | | | |-- exchangeRateCode: long (nullable = true)
| | | |-- equipmentInformation: struct (nullable = true)
| | | | |-- imeiOrEsn: struct (nullable = true)
| | | | | |-- imei: string (nullable = true)
| | | |-- locationInformation: struct (nullable = true)
| | | | |-- networkLocation: struct (nullable = true)
| | | | | |-- callReference: string (nullable = true)
| | | | | |-- cellId: long (nullable = true)
| | | | | |-- locationArea: long (nullable = true)
| | | | | |-- recEntityCode: long (nullable = true)
| | | |-- operatorSpecInformation: struct (nullable = true)
| | | | |-- OperatorSpecInformation: array (nullable = true)
| | | | | |-- element: string (containsNull = true)
|-- networkInfo: struct (nullable = true)
| |-- calledNumAnalysis: struct (nullable = true)
| | |-- CalledNumAnalysis: struct (nullable = true)
| | | |-- calledNumAnalysisCode: long (nullable = true)
| | | |-- countryCodeTable: struct (nullable = true)
| | | | |-- CountryCode: string (nullable = true)
| | | |-- iacTable: struct (nullable = true)
| | | | |-- Iac: string (nullable = true)
| |-- networkType: long (nullable = true)
| |-- recEntityInfo: struct (nullable = true)
| | |-- RecEntityDefinition: array (nullable = true)
| | | |-- element: struct (containsNull = true)
| | | | |-- recEntityCode: long (nullable = true)
| | | | |-- recEntityId: struct (nullable = true)
| | | | | |-- gsnaddress: struct (nullable = true)
| | | | | | |-- iPTextV4Address: string (nullable = true)
| | | | | |-- mscId: string (nullable = true)
| | | | | |-- msisdn: string (nullable = true)
| | | | |-- recEntityType: long (nullable = true)
| |-- utcTimeOffsetInfo: struct (nullable = true)
| | |-- UtcTimeOffsetDefinition: struct (nullable = true)
| | | |-- utcTimeOffset: string (nullable = true)
| | | |-- utcTimeOffsetCode: long (nullable = true)
當我想看看在數據幀中的元素,它顯示在這樣一個表:表
我不確定如何將此數據幀寫入 csv 文件。
有什么建議嗎? 謝謝
請查看 Databricks 中的 Spark-csv 庫:
這是一個簡單的例子:
mydf.write.
format("com.databricks.spark.csv").
option("header", "true").
save("out.csv")
你可以在這里找到它: https : //mvnrepository.com/artifact/com.databricks/spark-csv_2.10
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.