簡體   English   中英

用python解析上傳到服務器的奇怪yaml文件

[英]parse weird yaml file uploaded to server with python

我有一個配置服務器,我們從中讀取服務配置。 在那里我們有一個我需要閱讀的 yaml 文件,但它在服務器上的格式很奇怪,如下所示:

            {
                "document[0].Name": "os",
                "document[0].Rules.Rule1": false,
                "document[0].Rules.Rule2": true,
                "document[0].MinScore": 100,
                "document[0].MaxScore": 100,
                "document[0].ClusterId": 22,
                "document[0].Enabled": true,
                "document[0].Module": "device",
                "document[0].Description": "",
                "document[0].Modified": 1577880000000,
                "document[0].Created": 1577880000000,
                "document[0].RequiredReview": false,
                "document[0].Type": "NO_CODE",
                "document[1].Name": "rule with params test",
                "document[1].Rules.Rule": false,
                "document[1].MinScore": 100,
                "document[1].MaxScore": 100,
                "document[1].ClusterId": 29,
                "document[1].Enabled": true,
                "document[1].Module": "device",
                "document[1].Description": "rule with params test",
                "document[1].Modified": 1577880000000,
                "document[1].Created": 1577880000000,
                "document[1].RequiredReview": false,
                "document[1].Type": "NO_CODE",
                "document[1].ParametersRules[0].Features.feature1.op": ">",
                "document[1].ParametersRules[0].Features.feature1.value": 10,
                "document[1].ParametersRules[0].Features.feature2.op": "==",
                "document[1].ParametersRules[0].Features.feature2.value": true,
                "document[1].ParametersRules[0].Features.feature3.op": "range",
                "document[1].ParametersRules[0].Features.feature3.value[0]": 4,
                "document[1].ParametersRules[0].Features.feature3.value[1]": 10,
                "document[1].ParametersRules[0].Features.feature4.op": "!=",
                "document[1].ParametersRules[0].Features.feature4.value": "None",
                "document[1].ParametersRules[0].DecisionType": "all",
                "document[1].ParametersRules[1].Features.feature5.op": "<",
                "document[1].ParametersRules[1].Features.feature5.value": 1000,
                "document[1].ParametersRules[1].DecisionType": "any"
            }

這就是 dict 的樣子(我手工做的可能不完美):

[
{
"Name": "os",
"Rules": { "Rule1": false, "Rule2": true },
"MinScore": 100,
"MaxScore": 100,
"ClusterId": 22,
"Enabled": true,
"Module": "device",
"Description": "",
"Modified": 1577880000000,
"Created": 1577880000000,
"RequiredReview": false,
"Type": "NO_CODE"
},
{
  "Name": "rule with params test",
  "Rules": { "Rule": false},
  "MinScore": 100,
  "MaxScore": 100,
  "ClusterId": 29,
  "Enabled": true,
  "Module": "device",
  "Description": "rule with params test",
  "Modified": 1577880000000,
  "Created": 1577880000000,
  "RequiredReview": false,
  "Type": "NO_CODE",
  "ParametersRules":[
    {"Features": {"feature1": {"op": ">", "value": 10},
                  "feature2": {"op": "==", "value": true},
                  "feature3": {"op": "range", "value": [4,10]},
                  "feature4": {"op": "!=", "value": "None"}} ,
       "DecisionType": "all"},
    {"Features": { "feature5": { "op": "<", "value": 1000 }},
     "DecisionType": "any"}
  ]
}
]

我沒有辦法更改文件上傳到服務器的方式(這是一個不同的團隊,非常令人頭疼),所以我需要使用 python 解析它。 我的想法是可能有人以前遇到過,所以必須有一個解決它的包,我希望這里有人可能知道。

謝謝。

我有樣品,希望對你有幫助

import yaml
import os
file_dir = os.path.dirname(os.path.abspath(__file__))
config = yaml.full_load(open(f"{file_dir}/file.json"))
yaml_file = open(f'{file_dir}/meta.yaml', 'w+')
yaml.dump(config, yaml_file, allow_unicode=True) # this one make your json file to yaml

您當前的輸出是:

- ClusterId: 22
  Created: 1577880000000
  Description: ''
  Enabled: true
  MaxScore: 100
  MinScore: 100
  Modified: 1577880000000
  Module: device
  Name: os
  RequiredReview: false
  Rules:
    Rule1: false
    Rule2: true
  Type: NO_CODE
- ClusterId: 29
  Created: 1577880000000
  Description: rule with params test
  Enabled: true
  MaxScore: 100
  MinScore: 100
  Modified: 1577880000000
  Module: device
  Name: rule with params test
  ParametersRules:
  - DecisionType: all
    Features:
      feature1:
        op: '>'
        value: 10
      feature2:
        op: ==
        value: true
      feature3:
        op: range
        value:
        - 4
        - 10
      feature4:
        op: '!='
        value: None
  - DecisionType: any
    Features:
      feature5:
        op: <
        value: 1000
  RequiredReview: false
  Rules:
    Rule: false
  Type: NO_CODE

到目前為止,這是我的方法。 它遠非完美,但希望它能讓您了解如何解決它。

from __future__ import annotations  # can be removed in Python 3.10+


def clean_value(o: str | bool | int) -> str | bool | int | None:
    """handle int, None, or bool values encoded as a string"""
    if isinstance(o, str):
        lowercase = o.lower()

        if lowercase.isnumeric():
            return int(o)
        elif lowercase == 'none':
            return None
        elif lowercase in ('true', 'false'):
            return lowercase == 'true'
            # return eval(o.capitalize())

    return o


# noinspection PyUnboundLocalVariable
def process(o: dict):

    # final return list
    docs_list = []

    doc: dict[str, list | dict | str | bool | int | None]
    doc_idx: int

    def add_new_doc(new_idx: int):
        """Push new item to result list, and increment index."""
        nonlocal doc_idx, doc

        doc_idx = new_idx
        doc = {}
        docs_list.append(doc)

    # add initial `dict` object to return list
    add_new_doc(0)

    for k, v in o.items():
        doc_id, key, *parts = k.split('.')

        doc_id: str
        key: str
        parts: list[str]

        curr_doc_idx = int(doc_id.rsplit('[', 1)[1].rstrip(']'))

        if curr_doc_idx > doc_idx:
            add_new_doc(curr_doc_idx)

        if not parts:
            final_val = clean_value(v)

        elif key in doc:
            # For example, when we encounter `document[0].Rules.Rule2`, but we've already encountered
            # `document[0].Rules.Rule1` - so in this case, we add value to the existing dict.

            final_val = temp_dict = doc[key]
            temp_dict: dict

            for p in parts[:-1]:
                temp_dict = temp_dict.setdefault(p, {})

            temp_dict[parts[-1]] = clean_value(v)

        else:
            final_val = temp_dict = {}

            for p in parts[:-1]:
                temp_dict = temp_dict[p] = {}

            temp_dict[parts[-1]] = clean_value(v)

        doc[key] = final_val

    return docs_list


if __name__ == '__main__':
    import json
    from pprint import pprint

    j = """{
        "document[0].Name": "os",
        "document[0].Rules.Rule1": false,
        "document[0].Rules.Rule2": "true",
        "document[0].MinScore": 100,
        "document[0].MaxScore": 100,
        "document[0].ClusterId": 22,
        "document[0].Enabled": true,
        "document[0].Module": "device",
        "document[0].Description": "",
        "document[0].Modified": 1577880000000,
        "document[0].Created": 1577880000000,
        "document[0].RequiredReview": false,
        "document[0].Type": "NO_CODE",
        "document[1].Name": "rule with params test",
        "document[1].Rules.Rule": false,
        "document[1].MinScore": 100,
        "document[1].MaxScore": 100,
        "document[1].ClusterId": 29,
        "document[1].Enabled": true,
        "document[1].Module": "device",
        "document[1].Description": "rule with params test",
        "document[1].Modified": 1577880000000,
        "document[1].Created": 1577880000000,
        "document[1].RequiredReview": false,
        "document[1].Type": "NO_CODE",
        "document[1].ParametersRules[0].Features.feature1.op": ">",
        "document[1].ParametersRules[0].Features.feature1.value": 10,
        "document[1].ParametersRules[0].Features.feature2.op": "==",
        "document[1].ParametersRules[0].Features.feature2.value": true,
        "document[1].ParametersRules[0].Features.feature3.op": "range",
        "document[1].ParametersRules[0].Features.feature3.value[0]": 4,
        "document[1].ParametersRules[0].Features.feature3.value[1]": 10,
        "document[1].ParametersRules[0].Features.feature4.op": "!=",
        "document[1].ParametersRules[0].Features.feature4.value": "None",
        "document[1].ParametersRules[0].DecisionType": "all",
        "document[1].ParametersRules[1].Features.feature5.op": "<",
        "document[1].ParametersRules[1].Features.feature5.value": 1000,
        "document[1].ParametersRules[1].DecisionType": "any"
    }"""

    d: dict[str, str | bool | int | None] = json.loads(j)

    result = process(d)
    pprint(result)

結果:

[{'ClusterId': 22,
  'Created': 1577880000000,
  'Description': '',
  'Enabled': True,
  'MaxScore': 100,
  'MinScore': 100,
  'Modified': 1577880000000,
  'Module': 'device',
  'Name': 'os',
  'RequiredReview': False,
  'Rules': {'Rule1': False, 'Rule2': True},
  'Type': 'NO_CODE'},
 {'ClusterId': 29,
  'Created': 1577880000000,
  'Description': 'rule with params test',
  'Enabled': True,
  'MaxScore': 100,
  'MinScore': 100,
  'Modified': 1577880000000,
  'Module': 'device',
  'Name': 'rule with params test',
  'ParametersRules[0]': {'DecisionType': 'all',
                         'Features': {'feature1': {'value': 10},
                                      'feature2': {'op': '==', 'value': True},
                                      'feature3': {'op': 'range',
                                                   'value[0]': 4,
                                                   'value[1]': 10},
                                      'feature4': {'op': '!=', 'value': None}}},
  'ParametersRules[1]': {'DecisionType': 'any',
                         'Features': {'feature5': {'value': 1000}}},
  'RequiredReview': False,
  'Rules': {'Rule': False},
  'Type': 'NO_CODE'}]

當然,其中一個問題是它沒有考慮像document[1].ParametersRules[0].Features.feature1.op這樣的嵌套路徑,理想情況下應該創建一個新的子列表來添加值。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM