簡體   English   中英

使用 jq 將 JSON 行轉換為 JSON 數組

[英]Convert JSON lines to JSON array using jq

首先,我是jq新手,比如 1 天新手,我也是 JSON 的新手,我是一個 SQL 人,所以我學得很快,但無法解決這個問題......所以請耐心等待我。

我正在運行 Windows,在 PowerShell 上使用 jq v1.5。

我下載了多個 JSON 文件,它們看起來像這樣:

{"Header":{"AssetClass":"Commodities","InstrumentType":"Forward","UseCase":"Forward","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"EUR","ExpiryDate":"2018-01-01","ReturnorPayoutTrigger":"Contract for Difference (CFD)","DeliveryType":"CASH","BaseProduct":"AGRI","TransactionType":"FUTR","FinalPriceType":"ARGM","ReferenceRate":"10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN","SubProduct":"GROS","AdditionalSubProduct":"FWHT"},"ISIN":{"ISIN":"EZX27M86B860","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","UnderlyingAssetType":"Agriculture","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"FullName":"Commodities Forward AGRI GROS FWHT EUR 20180101","ShortName":"NA/Fwd AGRI FWHT EUR 20180101","ClassificationType":"JTAXCC"}}
{"Header":{"AssetClass":"Commodities","InstrumentType":"Swap","UseCase":"Basis_Swap","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"SOS","ExpiryDate":"2208-12-10","ReturnorPayoutTrigger":"Total Return","DeliveryType":"OPTL","TransactionType":"ORIT","FinalPriceType":"IHSM","ReferenceRate":"NATURAL GAS-MONTHLY INDEX S. TEXAS (TETCO)-GAS DAILY PRICE GUIDE","OtherReferenceRate":"NATURAL GAS-MONTHLY INDEX W. LOUISIANA (TETCO)-GAS DAILY PRICE GUIDE","BaseProduct":"OTHR","OtherBaseProduct":"OTHR","SubProduct":"","AdditionalSubProduct":"","OtherSubProduct":"","OtherAdditionalSubProduct":""},"ISIN":{"ISIN":"EZBBH1XR9GV6","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","UnderlyingAssetType":"Multi Commodity","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"FullName":"Commodities Swap Basis_Swap OTHR   OTHR   SOS 22081210","ShortName":"NA/Swap OTHR   SOS 22081210","ClassificationType":"STQTXE"}}
{"Header":{"AssetClass":"Commodities","InstrumentType":"Swap","UseCase":"Multi_Exotic_Swap","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"LRD","ExpiryDate":"2200-01-31","ReturnorPayoutTrigger":"Contract for Difference (CFD)","DeliveryType":"CASH","TransactionType":"TAPO","FinalPriceType":"EXOF","UnderlyingInstrumentIndex":["BCOMF6","BCOMNG3"]},"ISIN":{"ISIN":"EZ286HJVY4Q2","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"UnderlyingAssetType":"Multi Commodity","BaseProduct":"MCEX","SubProduct":"","AdditionalSubProduct":"","FullName":"Commodities Multi_Exotic_Swap MCEX LRD 22000131","ShortName":"NA/Swap MCEX LRD 22000131","ClassificationType":"STQCXC"}}
{"Header":{"AssetClass":"Commodities","InstrumentType":"Option","UseCase":"Option","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"TND","ExpiryDate":"2209-10-18","OptionType":"OPTL","OptionExerciseStyle":"AMER","ValuationMethodorTrigger":"Asian","DeliveryType":"CASH","TransactionType":"OTHR","FinalPriceType":"IHSM","ReferenceRate":"NATURAL GAS-NGPL (NICOR, NIPSCO, PGLC CITYGATE), NBPL-NICOR-ICE/10X MONTHLY","BaseProduct":"OTHR","SubProduct":"","AdditionalSubProduct":""},"ISIN":{"ISIN":"EZ2TK5CWL9Y4","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","UnderlyingAssetType":"Other","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"FullName":"Commodities Option OTHR   TND 22091018","ShortName":"NA/O OTHR  OPTL TND 22091018","ClassificationType":"HTMHAC"}}
{"Header":{"AssetClass":"Commodities","InstrumentType":"Option","UseCase":"Multi_Exotic_Option","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"SOS","ExpiryDate":"2209-10-18","UnderlyingInstrumentIndex":["BCOMSI2","BCOMPR3T"],"OptionType":"CALL","OptionExerciseStyle":"AMER","ValuationMethodorTrigger":"Other Path Dependent","DeliveryType":"CASH","TransactionType":"ORIT","FinalPriceType":"BLTC"},"ISIN":{"ISIN":"EZ82L36B6225","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"UnderlyingAssetType":"Multi Commodity","BaseProduct":"MCEX","SubProduct":"","AdditionalSubProduct":"","FullName":"Commodities Multi_Exotic_Option MCEX SOS 22091018","ShortName":"NA/O MCEX Call SOS 22091018","ClassificationType":"HTQBPC"}}

這些文件的大小可以超過 1 GB。

為了有效地使用這些,我需要將 JSON 行轉換為 JSON 數組,包裝文件,在前面加上“[”並附加“]”,並且每一行用逗號 (,) 分隔。

使文件看起來像這樣(理論上):

[
{
"Header": {
  "AssetClass": "Commodities",
  "InstrumentType": "Swap",
  "UseCase": "Basis_Swap",
  "Level": "InstRefDataReporting"
},
"Attributes": {
  "NotionalCurrency": "EUR",
  "ExpiryDate": "2017-08-31",
  "ReturnorPayoutTrigger": "Contract for Difference (CFD)",
  "DeliveryType": "CASH",
  "BaseProduct": "AGRI",
  "OtherBaseProduct": "AGRI",
  "TransactionType": "FUTR",
  "FinalPriceType": "ARGM",
  "ReferenceRate": "10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN",
  "OtherReferenceRate": "10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN",
  "SubProduct": "GROS",
  "AdditionalSubProduct": "FWHT",
  "OtherSubProduct": "GROS",
  "OtherAdditionalSubProduct": "FWHT"
},
"ISIN": {
  "ISIN": "EZ68CZDRFYY7",
  "Status": "New"
},
"TemplateVersion": 1,
"Derived": {
  "CommodityDerivativeIndicator": "TRUE",
  "UnderlyingAssetType": "Multi Commodity",
  "IssuerorOperatoroftheTradingVenueIdentifier": "NA",
  "PriceMultiplier": 1,
  "FullName": "Commodities Swap Basis_Swap AGRI GROS FWHT AGRI GROS FWHT EUR 20170831",
  "ShortName": "NA/Swap AGRI FWHT FWHT EUR 20170831",
  "ClassificationType": "STQCXC"
  }
},
{
"Header": {
  "AssetClass": "Commodities",
  "InstrumentType": "Swap",
  "UseCase": "Basis_Swap",
  "Level": "InstRefDataReporting"
},
"Attributes": {
  "NotionalCurrency": "EUR",
  "ExpiryDate": "2017-08-31",
  "ReturnorPayoutTrigger": "Contract for Difference (CFD)",
  "DeliveryType": "CASH",
  "BaseProduct": "AGRI",
  "OtherBaseProduct": "AGRI",
  "TransactionType": "FUTR",
  "FinalPriceType": "ARGM",
  "ReferenceRate": "10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN",
  "OtherReferenceRate": "10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN",
  "SubProduct": "GROS",
  "AdditionalSubProduct": "FWHT",
  "OtherSubProduct": "GROS",
  "OtherAdditionalSubProduct": "FWHT"
},
"ISIN": {
  "ISIN": "EZ68CZDRFYY7",
  "Status": "New"
},
"TemplateVersion": 1,
"Derived": {
  "CommodityDerivativeIndicator": "TRUE",
  "UnderlyingAssetType": "Multi Commodity",
  "IssuerorOperatoroftheTradingVenueIdentifier": "NA",
  "PriceMultiplier": 1,
  "FullName": "Commodities Swap Basis_Swap AGRI GROS FWHT AGRI GROS FWHT EUR 20170831",
  "ShortName": "NA/Swap AGRI FWHT FWHT EUR 20170831",
  "ClassificationType": "STQCXC"
}
}
]

所以我找到了 jq 並且根據我的理解我可以運行它

jq --slurp 'map(select(. >= 2))' Inputfile.json > OutputFile.json

這有效,但是當運行大於 200 MB 的文件時,我會收到“系統內存不足”錯誤(如果我使用 ISE),如果我使用標准 Powershell 或 CMD,則需要很長時間。 分鍾 (5+)

如果我從命令中取出--slurp ,它會起作用,它會更快,但結果如下所示:

[
{
"Header": {
  "AssetClass": "Commodities",
  "InstrumentType": "Swap",
  "UseCase": "Basis_Swap",
  "Level": "InstRefDataReporting"
},
"Attributes": {
  "NotionalCurrency": "EUR",
  "ExpiryDate": "2017-08-31",
  "ReturnorPayoutTrigger": "Contract for Difference (CFD)",
  "DeliveryType": "CASH",
  "BaseProduct": "AGRI",
  "OtherBaseProduct": "AGRI",
  "TransactionType": "FUTR",
  "FinalPriceType": "ARGM",
  "ReferenceRate": "10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN",
  "OtherReferenceRate": "10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN",
  "SubProduct": "GROS",
  "AdditionalSubProduct": "FWHT",
  "OtherSubProduct": "GROS",
  "OtherAdditionalSubProduct": "FWHT"
},
"ISIN": {
  "ISIN": "EZ68CZDRFYY7",
  "Status": "New"
},
"TemplateVersion": 1,
"Derived": {
  "CommodityDerivativeIndicator": "TRUE",
  "UnderlyingAssetType": "Multi Commodity",
  "IssuerorOperatoroftheTradingVenueIdentifier": "NA",
  "PriceMultiplier": 1,
  "FullName": "Commodities Swap Basis_Swap AGRI GROS FWHT AGRI GROS FWHT EUR 20170831",
  "ShortName": "NA/Swap AGRI FWHT FWHT EUR 20170831",
  "ClassificationType": "STQCXC"
  }
}]
[{
"Header": {
  "AssetClass": "Commodities",
  "InstrumentType": "Swap",
  "UseCase": "Basis_Swap",
  "Level": "InstRefDataReporting"
},
"Attributes": {
  "NotionalCurrency": "EUR",
  "ExpiryDate": "2017-08-31",
  "ReturnorPayoutTrigger": "Contract for Difference (CFD)",
  "DeliveryType": "CASH",
  "BaseProduct": "AGRI",
  "OtherBaseProduct": "AGRI",
  "TransactionType": "FUTR",
  "FinalPriceType": "ARGM",
  "ReferenceRate": "10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN",
  "OtherReferenceRate": "10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN",
  "SubProduct": "GROS",
  "AdditionalSubProduct": "FWHT",
  "OtherSubProduct": "GROS",
  "OtherAdditionalSubProduct": "FWHT"
},
"ISIN": {
  "ISIN": "EZ68CZDRFYY7",
  "Status": "New"
},
"TemplateVersion": 1,
"Derived": {
  "CommodityDerivativeIndicator": "TRUE",
  "UnderlyingAssetType": "Multi Commodity",
  "IssuerorOperatoroftheTradingVenueIdentifier": "NA",
  "PriceMultiplier": 1,
  "FullName": "Commodities Swap Basis_Swap AGRI GROS FWHT AGRI GROS FWHT EUR 20170831",
  "ShortName": "NA/Swap AGRI FWHT FWHT EUR 20170831",
  "ClassificationType": "STQCXC"
}
}
]

它為每一行創建一個數組,但該數組不是逗號分隔的,這不是我想要的。

那么,如何在不使用 slurp 的情況下處理多個 JSON 行的大文件,並為作為單個數組生成的輸入文件創建一個文件,逗號分隔?

我已經閱讀了有關輸入的內容,但不確定這是否與我需要做的有關?

也許,我遲到了,但這就是你要找的!

jq -s '.' in.json > out.json

看起來inputs與 slurp 存在相同的問題。 我不知道如何在大文件上使用jq完成此操作,但sed可以做到:

sed '1s/^/[/; $!s/$/,/; $s/$/]/' in.json > out.json

輸出:

[{"Header":{"AssetClass":"Commodities","InstrumentType":"Forward","UseCase":"Forward","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"EUR","ExpiryDate":"2018-01-01","ReturnorPayoutTrigger":"Contract for Difference (CFD)","DeliveryType":"CASH","BaseProduct":"AGRI","TransactionType":"FUTR","FinalPriceType":"ARGM","ReferenceRate":"10PPM ULTRA LOW SULPHUR DIESEL-CARGOES CIF NWE/BASIS ARA-PLATTS EUROPEAN","SubProduct":"GROS","AdditionalSubProduct":"FWHT"},"ISIN":{"ISIN":"EZX27M86B860","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","UnderlyingAssetType":"Agriculture","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"FullName":"Commodities Forward AGRI GROS FWHT EUR 20180101","ShortName":"NA/Fwd AGRI FWHT EUR 20180101","ClassificationType":"JTAXCC"}},
{"Header":{"AssetClass":"Commodities","InstrumentType":"Swap","UseCase":"Basis_Swap","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"SOS","ExpiryDate":"2208-12-10","ReturnorPayoutTrigger":"Total Return","DeliveryType":"OPTL","TransactionType":"ORIT","FinalPriceType":"IHSM","ReferenceRate":"NATURAL GAS-MONTHLY INDEX S. TEXAS (TETCO)-GAS DAILY PRICE GUIDE","OtherReferenceRate":"NATURAL GAS-MONTHLY INDEX W. LOUISIANA (TETCO)-GAS DAILY PRICE GUIDE","BaseProduct":"OTHR","OtherBaseProduct":"OTHR","SubProduct":"","AdditionalSubProduct":"","OtherSubProduct":"","OtherAdditionalSubProduct":""},"ISIN":{"ISIN":"EZBBH1XR9GV6","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","UnderlyingAssetType":"Multi Commodity","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"FullName":"Commodities Swap Basis_Swap OTHR   OTHR   SOS 22081210","ShortName":"NA/Swap OTHR   SOS 22081210","ClassificationType":"STQTXE"}},
{"Header":{"AssetClass":"Commodities","InstrumentType":"Swap","UseCase":"Multi_Exotic_Swap","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"LRD","ExpiryDate":"2200-01-31","ReturnorPayoutTrigger":"Contract for Difference (CFD)","DeliveryType":"CASH","TransactionType":"TAPO","FinalPriceType":"EXOF","UnderlyingInstrumentIndex":["BCOMF6","BCOMNG3"]},"ISIN":{"ISIN":"EZ286HJVY4Q2","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"UnderlyingAssetType":"Multi Commodity","BaseProduct":"MCEX","SubProduct":"","AdditionalSubProduct":"","FullName":"Commodities Multi_Exotic_Swap MCEX LRD 22000131","ShortName":"NA/Swap MCEX LRD 22000131","ClassificationType":"STQCXC"}},
{"Header":{"AssetClass":"Commodities","InstrumentType":"Option","UseCase":"Option","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"TND","ExpiryDate":"2209-10-18","OptionType":"OPTL","OptionExerciseStyle":"AMER","ValuationMethodorTrigger":"Asian","DeliveryType":"CASH","TransactionType":"OTHR","FinalPriceType":"IHSM","ReferenceRate":"NATURAL GAS-NGPL (NICOR, NIPSCO, PGLC CITYGATE), NBPL-NICOR-ICE/10X MONTHLY","BaseProduct":"OTHR","SubProduct":"","AdditionalSubProduct":""},"ISIN":{"ISIN":"EZ2TK5CWL9Y4","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","UnderlyingAssetType":"Other","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"FullName":"Commodities Option OTHR   TND 22091018","ShortName":"NA/O OTHR  OPTL TND 22091018","ClassificationType":"HTMHAC"}},
{"Header":{"AssetClass":"Commodities","InstrumentType":"Option","UseCase":"Multi_Exotic_Option","Level":"InstRefDataReporting"},"Attributes":{"NotionalCurrency":"SOS","ExpiryDate":"2209-10-18","UnderlyingInstrumentIndex":["BCOMSI2","BCOMPR3T"],"OptionType":"CALL","OptionExerciseStyle":"AMER","ValuationMethodorTrigger":"Other Path Dependent","DeliveryType":"CASH","TransactionType":"ORIT","FinalPriceType":"BLTC"},"ISIN":{"ISIN":"EZ82L36B6225","Status":"New"},"TemplateVersion":1,"Derived":{"CommodityDerivativeIndicator":"TRUE","IssuerorOperatoroftheTradingVenueIdentifier":"NA","PriceMultiplier":1,"UnderlyingAssetType":"Multi Commodity","BaseProduct":"MCEX","SubProduct":"","AdditionalSubProduct":"","FullName":"Commodities Multi_Exotic_Option MCEX SOS 22091018","ShortName":"NA/O MCEX Call SOS 22091018","ClassificationType":"HTQBPC"}}]

說明

sed 腳本由三個單獨的替換組成。 它們在不同的行上:

1  s/^/[/      # Insert a left bracket at the beginning of the first line
$! s/$/,/      # On all but the last line append a comma
$  s/$/]/      # Append a right bracket to the last line

也許 awk 在眼睛上更容易:

awk 'BEGIN{print "["} 
 length(last)>0 {print last ","} {last=$0}
 END {print last, "]"}'

作為記錄,這是一個使用 jq 版本和input的無slurp解決方案:

jq -nr '"[", try (input|tojson, repeat(",\n\(input|tojson)")), "]"'

我使用一行節點調用:

$ cat input.ndjson | node -e 'const rl = readline.createInterface({ input: process.stdin }); !async function () { let idx = 0; for await (const line of rl) { process.stdout.write((++idx === 1 ? "[" : "\n,") + JSON.stringify(JSON.parse(line))); } process.stdout.write("]"); }()' | tee output.json | jq 'length'
16814

說明:

$ cat input.ndjson |      # pipe the ndjson input, can be a file or any stream
  node -e '
    const rl = readline.createInterface({ input: process.stdin });
    !async function () {  // to be able to use await, this function has to be async, and then `!` is to be able call it right away;
      let idx = 0;
      for await (const line of rl) {
        process.stdout.write((++idx === 1 ? "[" : "\n,") + JSON.stringify(JSON.parse(line)));
      }
      process.stdout.write("]");
    }()
  ' | tee output.json     # save the output json file
    | jq 'length'         # call jq to calculate a length, also validate it, to make sure it's a valid single json file, this optional

Node 的readline是按行讀取流的好方法,它是asyncIterable ,可以使用 (await ... of) 來獲取每一行,然后調用JSON.parse以確保驗證每一行都是有效的 json,並且然后JSON.stringify轉換回縮小的 JSON,在每一行;

當然,您可以更改為本地const arr = []; 並將每一行的 obj 推入 t,最后一次 JSON.stringify,以獲得最縮小的 JSON,

但我喜歡獲得這種幾乎縮小的格式,即每一行的 obj 都被縮小,但仍逐行保留外部數組,以便我可以通過wc -l count its lines 輕松獲得其總數

[{...minified line1obj with no spaces...}
,{...minified line2obj}
,{...minified line3obj}
,...
,{...minified lineNobj}]

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM