繁体   English   中英

Python 将复杂的 Object 正确解析为 JSON 格式

[英]Python Correctly Parse a Complex Object into a JSON format

我有以下内容,我想将其解析为 JSON。class 也有项目 object 的列表

class Item(JSONEncoder):
    def __init__(self):
        self.Type = ''
        self.Content = ''
        self.N = None
        self.Parent = None
        self.Items = []

    def reprJSON(self):
        d = dict()
        for a, v in self.__dict__.items():
            if (hasattr(v, "reprJSON")):
                d[a] = v.reprJSON()
            else:
                d[a] = v
        return d

因此,当我尝试解析项目 class 的实例时, root.reprJSON()得到以下结果。

    {'Type': 'root',
 'Content': '',
 'N': 'root',
 'Parent': None,
 'Items': [<Item.Item at 0x10575fb3c88>,
  <Item.Item at 0x10575fb3e10>,
  <Item.Item at 0x10575fb3eb8>,
  <Item.Item at 0x10575fbc080>,
  <Item.Item at 0x10575fbc2b0>,
  <Item.Item at 0x10575fc6a20>,
  <Item.Item at 0x10575fc6a58>,
  <Item.Item at 0x10575fc6b70>,
  <Item.Item at 0x10575fc6be0>,
  <Item.Item at 0x10575fc6c50>,
  <Item.Item at 0x10575fc6da0>,
  <Item.Item at 0x10575fc6fd0>,
  <Item.Item at 0x10575fcb128>,
  <Item.Item at 0x10575fcb358>,
  <Item.Item at 0x10575fcba90>,
  <Item.Item at 0x10575fcbb00>,
  <Item.Item at 0x10575fcbb70>,
  <Item.Item at 0x10575fcbc18>,
  <Item.Item at 0x10575fcbda0>,
  <Item.Item at 0x10575fcbfd0>,
  <Item.Item at 0x10575fd3208>,
  <Item.Item at 0x10575fd34a8>,
  <Item.Item at 0x10575fd3550>,
  <Item.Item at 0x10575fd35c0>,
  <Item.Item at 0x10575fd36d8>,
  <Item.Item at 0x10575fd37f0>,
  <Item.Item at 0x10575fd3898>,
  <Item.Item at 0x10575fd3940>,
  <Item.Item at 0x10575fd39b0>,
  <Item.Item at 0x10575fd3a20>,
  <Item.Item at 0x10575fd3ac8>,
  <Item.Item at 0x10575fd3b70>,
  <Item.Item at 0x10575fd3c88>,
  <Item.Item at 0x10575fd3d68>,
  <Item.Item at 0x10575fd3dd8>,
  <Item.Item at 0x10575fd3e10>,
  <Item.Item at 0x10575fd3ef0>,
  <Item.Item at 0x10575fdc080>,
  <Item.Item at 0x10575fdc0b8>,
  <Item.Item at 0x10575fdc128>,
  <Item.Item at 0x10575fdc1d0>,
  <Item.Item at 0x10575fdc240>,
  <Item.Item at 0x10575fdc390>,
  <Item.Item at 0x10575fdc438>,
  <Item.Item at 0x10575fdc550>,
  <Item.Item at 0x10575fdc5c0>,
  <Item.Item at 0x10575fdc630>,
  <Item.Item at 0x10575fdc6a0>,
  <Item.Item at 0x10575fdc6d8>,
  <Item.Item at 0x10575fdc780>,
  <Item.Item at 0x10575fdc908>,
  <Item.Item at 0x10575fdc9e8>,
  <Item.Item at 0x10575fdca58>,
  <Item.Item at 0x10575fdcac8>,
  <Item.Item at 0x10575fdcb00>,
  <Item.Item at 0x10575fdcba8>,
  <Item.Item at 0x10575fdccc0>,
  <Item.Item at 0x10575fdcd30>,
  <Item.Item at 0x10575fdcda0>,
  <Item.Item at 0x10575fdce48>,
  <Item.Item at 0x10575fdceb8>,
  <Item.Item at 0x10575fdcf28>,
  <Item.Item at 0x10575fe22e8>,
  <Item.Item at 0x10575fe2828>,
  <Item.Item at 0x10575fe2940>,
  <Item.Item at 0x10575fe2b70>,
  <Item.Item at 0x10575fe2be0>,
  <Item.Item at 0x10575fe2c88>,
  <Item.Item at 0x10575fe2cc0>,
  <Item.Item at 0x10575fe2cf8>]}

但我想将这些项目的值也放入单个 json object 中。我不知道该怎么做,希望得到任何帮助。 谢谢

编辑

以下代码创建项目 class 的实例并用数据填充它。

    def Crawl(parsedPDF):   

        soup = BeautifulSoup(parsedPDF, "html.parser")   


        root = Item()
        root.Type = "root"
        root.N = "root"
        parent = root
        head = root
        body = RemoveEmptyTags(soup.body)


        for tag in body:        
            elements = RemoveEmptyChild(tag.contents)        
            for element in elements:
                if element.name == "head":
                    head = CreateHeading(root, parent, element)
                    parent = head.Parent  
                elif element.name == "p":               
                    AddParagraph(head, element)
                elif element.name == "figure":
                    pass
                elif element.name == "figdesc":
                    pass     
                elif element.name == "table":
                    #elem = AddElement(head, element)     
                    pass  
                else:
                    #elem = AddElement(head, element)
                    pass            

            pass   


        return root


def AddParagraph(head, element):
        # split the paragraph into multiple lines based on alphabetize bullet points
        lines = split_with_AplhabetizeBullets(element.text, '\.\s(\(.*?\)\s)')
        for line in lines:
            item = Item()
            item.Content = line
            item.Type = element.name    
            item.Parent = head
            head.Items.append(item)     




def CreateHeading(root, parent, element):
    item = Item()
    item.Content = element.text
    item.Type = element.name    
    item.Parent = parent

    try:                    
        item.N = element["n"]
    except:                                   
        pass

    if item.N is None:        
        bracketTextLength = 0
        try:
            result = re.search(r'\(.*?\)',item.Content)
            bracketTextLength = len(result.group)
        except:
            pass   

        item.N = item.Content
        # to check if the heading without 'N' is a heading or its a subheading
        if len(item.Content) > 3 and  bracketTextLength == 0:
            root.Items.append(item) 
            item.Parent = item
            pass
        else:
            parent.Items.append(item)
            pass





    else: # item.N is not None        
        if parent.N is None:
            item.Parent = item
            parent = item.Parent
            pass

        #else: # if the new heading sharing the same reference as of its parent then            
        if parent.N in item.N[:len(parent.N)]:                
            parent.Items.append(item)                
            pass

        else: # if the new heading has no parent then add it into root
            root.Items.append(item)
            item.Parent = item                
            pass               


    return item

查看您的代码,您可以在代码中使用此演示解决方案,因为我将 Demo class 的对象存储在项目列表中。 您需要在 Items class 中编写serialize()和 dumper dumper()方法,并且还需要在reprJSON方法中进行更改以在 Items 列表上进行迭代。

from json import JSONEncoder

class Demo():
    def __init__(self):
        self.name = ''
        self.demolist = []

class Item(JSONEncoder):

    def __init__(self):
        # super().__init__()
        self.Type = ''
        self.Content = ''
        self.N = None
        self.Parent = None
        self.Items = []

    def reprJSON(self):
        d = {}
        for a, v in self.__dict__.items():
            if isinstance(v, list):
                for i in v:
                    if d.get(a, []) == []:
                        d[a] = []
                        d[a].append(self.dumper(i))
                    else:
                        d[a].append(self.dumper(i))
            else:
                d[a] = v
        return d

    def serialize(self):
        return self.__dict__

    @staticmethod
    def dumper(obj):
        if "serialize" in dir(obj):
            return obj.serialize()
        return obj.__dict__




itemobj = Item()
d1 = Demo()
d2 = Demo()
d1.name = 'akash'
d1.demolist = [{'good':[4,6,5],'yyy':'why'},{'ho':{'ksks':'333'}}]
d2.name = 'heheh'
d2.demolist = [4,6,1111]
itemobj.Items.extend([d1,d2])

from pprint import pprint
pprint(itemobj.reprJSON())

Output:

{'Content': '',
 'Items': [{'demolist': [{'good': [4, 6, 5], 'yyy': 'why'},
                         {'ho': {'ksks': '333'}}],
            'name': 'akash'},
           {'demolist': [4, 6, 1111], 'name': 'heheh'}],
 'N': None,
 'Parent': None,
 'Type': ''}```

pip 安装 jsonwhatever

from jsonwhatever import jsonwhatever as jw


class Item():
    def __init__(self):
        self.Type = ''
        self.Content = ''
        self.N = None
        self.Parent = None #Not to reference father class to avoid infinite recursivity
        self.Items = None #You should put None by default to stop recursivity



obj = Item()
obj01 = Item()

obj01.Type = '01'
obj01.Content = 'stuff'
obj01.N = 9
obj01.Parent = None

list_objects = []

list_objects.append(obj01)

obj.Items = list_objects

json_string = jw.jsonwhatever('list_of_items', obj)

print(json_string)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM