[英]How can I convert a given list of lists to a tree structure in Python?
[英]Convert tree to list of lists
我有一個樹形結構,例如:公司 -> 部門 -> 角色 ->
我有一個這樣的循環結構三元組:
for company in self.Companies():
cName = company.Name()
for dept in company.Departments():
dName = department.Name()
for role in dept.Roles():
rName = role.Name()
roleID = role.ID()
.Name() 函數返回一個部門名稱,如 Android-Sales。 公司可以有零個或多個部門。
到目前為止,以上就是我所擁有的。 我正在嘗試開發它,以便獲得列表列表:理想情況下,這就是我想要的。 如果有重復,列表中的下一項應將其留空。 或者它可能沒有字段,在這種情況下也將其留空。
[
['Google', 'Android-Sales', 'Marketer', 'A123'],
['','Google-Play','開發者', 'A435'],
['','','測試儀','A125'],
['','','','A126'],
['我的小公司','','超級角色','A123'] ]
或者這也可以工作......
[
['Google', 'Android-Sales', 'Marketer', 'A123'],
['Google','Google-Play','開發者','A435'],
['Google','Google-Play','Tester','A125'],
['Google','Google-Play','Tester','A126'],
['My Small Company','','Super Role','A123'](這里“我的小公司”沒有>部門。]
每個內部列表的長度應為 4。
嘗試這樣的事情:
tree = {"Google":{"Android":"yes", "Nexus":"no"}}
list_of_lists = []
def listbuilder(sub_tree, current_list):
for key in sub_tree:
if isinstance(sub_tree[key], dict):
listbuilder(sub_tree[key], current_list + [key])
else:
list_of_lists.append(current_list + [key] + [sub_tree[key]])
listbuilder(tree,[])
print str(list_of_lists)
輸出:
[['Google', 'Nexus', 'no'], ['Google', 'Android', 'yes']]
前段時間我需要以結構化的方式將 JSON 文件插入到數據庫中。 基於@Gillespie描述的解決方案,我能夠將文件層次結構轉換為列表列表。
以下腳本需要讀取一個 JSON 文件,其項目列表最初由父鍵“值”標識。 這是data.json
文件的一個簡單示例。
{
"value":
[
{
"A": 0,
"B": 1,
"C": 2
},
{
"C": {
"E": 3,
"F": 4
},
"D": [
{
"G": 5
},
{
"H": 6
}
]
}
]
}
以下腳本讀取 JSON 文件並將其轉換為要插入數據庫的元組列表。 請記住在數據庫中創建表和列之前刪除特殊字符。
import json
def dict_to_list(sub_tree: dict, current_list: [str], items_list: [[str]]) -> [[str]]:
try: # Tree branches.
for key in sub_tree:
if isinstance(sub_tree[key], list):
for sub_item in sub_tree[key]:
dict_to_list(sub_tree=sub_item, current_list=current_list + [key], items_list=items_list)
elif isinstance(sub_tree[key], dict):
dict_to_list(sub_tree=sub_tree[key], current_list=current_list + [key], items_list=items_list)
else:
items_list.append(current_list + [key] + [sub_tree[key]])
except: # Tree leaf.
items_list.append(current_list + [str(sub_tree)])
return items_list
def json_data_to_samples_list(json_data: dict, data_key: str = 'value', column_sep: str = "_") -> [[(str, str)]]:
samples_list = []
for parent in json_data[data_key]:
column_value_tuples = []
for child in dict_to_list(sub_tree=parent, current_list=[], items_list=[]):
column = column_sep.join(child[:-1])
value = child[-1]
column_value_tuples.append((column, value))
samples_list.append(column_value_tuples)
return samples_list
def main() -> None:
json_file_path = "data.json"
with open(json_file_path, mode="r", encoding="utf-8") as file:
json_data = json.load(file)
samples_list = json_data_to_samples_list(json_data=json_data)
print(f"\nExtracted {len(samples_list)} items from the JSON file:")
for idx, parent in enumerate(samples_list):
print(f"\n>>>>> Child #{idx}:")
for child in parent:
print(f"\t\t{child}")
print()
if __name__ == '__main__':
main()
Extracted 2 items from the JSON file:
>>>>> Child #0:
('A', 0)
('B', 1)
('C', 2)
>>>>> Child #1:
('C_E', 3)
('C_F', 4)
('D_G', 5)
('D_H', 6)
除了我之前的回答之外,您可能還希望將 JSON 文件轉換為可以插入數據庫的元組。 在這種情況下,您可以使用以下方法:
{
"value":
[
{
"A": 0,
"B": 1,
"C": 2
},
{
"C": {
"E": 3,
"F": 4
},
"D": [
{
"G": 5
},
{
"H": 6
}
]
}
]
}
from collections import OrderedDict
import logging
import json
logging.basicConfig(level="DEBUG")
logger = logging.getLogger(__name__)
class JSON:
def __init__(self, data: dict) -> "JSON":
self.data = data
def abbreviate(self, string: str, abbreviations: dict = {}) -> str:
for word in abbreviations:
string = string.replace(word, abbreviations[word])
string = string.replace(word.title(), abbreviations[word].title())
return string
def find_string(
self,
string: str,
string_list: [str],
index_start: int = 0
) -> [int]:
indexes = [
index
for index, item in enumerate(
string_list[index_start:],
start=index_start
)
if len(item) and item == string
]
return indexes
def disambiguate_columns(
self,
columns: [str],
is_case_sentitive: bool = False
) -> [str]:
if not is_case_sentitive:
columns_ref = [c.lower() for c in columns]
else:
columns_ref = [c for c in columns]
for i, c in enumerate(columns_ref):
indexes = self.find_string(
c,
string_list=columns_ref,
index_start=i+1
)
for n, j in enumerate(indexes, start=1):
columns[j] = f"{columns[j]}_{n}"
columns_ref[j] = ""
return columns
def dict_to_list(
self,
sub_tree: dict,
current_list: [str],
items_list: [[str]]
) -> [[str]]:
try: # Tree branches.
for key in sub_tree:
if isinstance(sub_tree[key], list):
for sub_item in sub_tree[key]:
self.dict_to_list(
sub_tree=sub_item,
current_list=current_list + [key],
items_list=items_list
)
elif isinstance(sub_tree[key], dict):
self.dict_to_list(
sub_tree=sub_tree[key],
current_list=current_list + [key],
items_list=items_list
)
else:
items_list.append(current_list + [key] + [sub_tree[key]])
except: # Tree leaf.
items_list.append(current_list + [str(sub_tree)])
return items_list
def extract_entries(
self,
data_key: str = None,
column_sep: str = "_"
) -> [[(str, str)]]:
entries = []
if data_key is not None:
data_iterator = (_ for _ in self.data[data_key])
else:
data_iterator = (_ for _ in self.data)
for parent in data_iterator:
column_value_tuples = []
for child in self.dict_to_list(
sub_tree=parent,
current_list=[],
items_list=[]):
column_parts = child[:-1]
column = column_sep.join(column_parts)
value = child[-1]
column_value_tuples.append((column, value))
entries.append(column_value_tuples)
return entries
def get_nth_element(
self,
items: [(str, str)],
element: str,
nth: int = 1
) -> ((str, str), bool):
assert nth >= 1
occurrences = [i for i in items if i[0] == element]
n_occurrences = len(occurrences)
if n_occurrences:
index_out_of_bounds = True if nth > n_occurrences else False
nth_element = occurrences[min(nth, n_occurrences)-1]
else:
nth_element = None
index_out_of_bounds = True
return (nth_element, index_out_of_bounds)
def to_tuples(
self,
data_key: str = None,
is_case_sentitive: bool = False,
abbreviate_columns: bool = True
) -> ([str], [tuple]):
logger.debug(f"Extracting values tuples from JSON data...")
entries = self.extract_entries(data_key=data_key)
original_columns = list(
OrderedDict.fromkeys(
[
column_value_tuple[0]
for samples in entries
for column_value_tuple in samples
]
)
)
n_entries = len(entries)
n_columns = len(original_columns)
values = []
for tuples, index in zip(entries, range(1, n_entries+1)):
logger.debug(
f"Processing values from entry {index}/{n_entries} " \
f"({((index/n_entries)*100):.2f}%)..."
)
for i in range(1, len(tuples)+1):
row = []
index_out_of_bounds_count = 0
for c in original_columns:
column_value_tuple, index_out_of_bounds = \
self.get_nth_element(items=tuples, element=c, nth=i)
if column_value_tuple:
row.append(column_value_tuple[1])
else:
row.append(None)
if index_out_of_bounds:
index_out_of_bounds_count += 1
if index_out_of_bounds_count == n_columns:
break
if row.count(None) != n_columns:
values.append(row)
columns = original_columns
if abbreviate_columns:
logger.debug(f"Abbreviating column names...")
columns_abbreviation = []
for c in columns:
abbreviation = self.abbreviate(c)
columns_abbreviation.append(abbreviation)
logger.debug(f"[{c}] --> [{abbreviation}]")
columns = columns_abbreviation
logger.debug(f"Disambiguating column names...")
columns = self.disambiguate_columns(
columns,
is_case_sentitive=is_case_sentitive
)
return (columns, values)
def main() -> None:
json_file_path = "data.json"
with open(json_file_path, mode="r", encoding="utf-8") as file:
json_data = json.load(file)
columns, values = JSON(json_data).to_tuples(data_key='value')
print(f"\nExtracted {len(values)} items from the JSON file:")
for idx, parent in enumerate(values):
print(f"\n>>>>> Child #{idx}:")
for col, child in zip(columns, parent):
print(f"\t{col}: {child}")
print()
if __name__ == '__main__':
main()
DEBUG:__main__:Extracting values tuples from JSON data...
DEBUG:__main__:Processing values from entry 1/2 (50.00%)...
DEBUG:__main__:Processing values from entry 2/2 (100.00%)...
DEBUG:__main__:Abbreviating column names...
DEBUG:__main__:[A] --> [A]
DEBUG:__main__:[B] --> [B]
DEBUG:__main__:[C] --> [C]
DEBUG:__main__:[C_E] --> [C_E]
DEBUG:__main__:[C_F] --> [C_F]
DEBUG:__main__:[D_G] --> [D_G]
DEBUG:__main__:[D_H] --> [D_H]
DEBUG:__main__:Disambiguating column names...
Extracted 2 items from the JSON file:
>>>>> Child #0:
A: 0
B: 1
C: 2
C_E: None
C_F: None
D_G: None
D_H: None
>>>>> Child #1:
A: None
B: None
C: None
C_E: 3
C_F: 4
D_G: 5
D_H: 6
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.