I have a tree structure, something like: Company -> Department -> Roles ->
I have a triple for loop-structure like this:
for company in self.Companies():
cName = company.Name()
for dept in company.Departments():
dName = department.Name()
for role in dept.Roles():
rName = role.Name()
roleID = role.ID()
The .Name() function returns a dept name like Android-Sales. Companies can zero or more departments.
So far the above is all I have. I am trying to develop this so I can get a list of lists: Ideally this is what I would like. If something repeats, the next item in list should leave it blank. Or it might not have a field, in that case leave it blank too.
[
['Google', 'Android-Sales', 'Marketer', 'A123'],
['','Google-Play','Developer', 'A435'],
['','','Tester','A125'],
['','','','A126'],
['My Small Company','','Super Role','A123'] ]
Or this would work too...
[
['Google', 'Android-Sales', 'Marketer', 'A123'],
['Google','Google-Play','Developer', 'A435'],
['Google','Google-Play','Tester','A125'],
['Google','Google-Play','Tester','A126'],
['My Small Company','','Super Role','A123'] (Here "My Small Company" has no > departments. ]
Each inner list should be of length 4.
Try something like this:
tree = {"Google":{"Android":"yes", "Nexus":"no"}}
list_of_lists = []
def listbuilder(sub_tree, current_list):
for key in sub_tree:
if isinstance(sub_tree[key], dict):
listbuilder(sub_tree[key], current_list + [key])
else:
list_of_lists.append(current_list + [key] + [sub_tree[key]])
listbuilder(tree,[])
print str(list_of_lists)
Output:
[['Google', 'Nexus', 'no'], ['Google', 'Android', 'yes']]
A while ago I needed to insert JSON files in a structured way into a database. Based on the solution described by @Gillespie , I was able to convert the file hierarchy into lists of lists.
The following script expects to read a JSON file whose list of items is initially identified by the parent key "value". Here is a simple example of the data.json
file.
{
"value":
[
{
"A": 0,
"B": 1,
"C": 2
},
{
"C": {
"E": 3,
"F": 4
},
"D": [
{
"G": 5
},
{
"H": 6
}
]
}
]
}
The following script reads a JSON file and turns it into tuple lists to be inserted into a database. Remember to remove special characters before creating tables and columns in the database.
import json
def dict_to_list(sub_tree: dict, current_list: [str], items_list: [[str]]) -> [[str]]:
try: # Tree branches.
for key in sub_tree:
if isinstance(sub_tree[key], list):
for sub_item in sub_tree[key]:
dict_to_list(sub_tree=sub_item, current_list=current_list + [key], items_list=items_list)
elif isinstance(sub_tree[key], dict):
dict_to_list(sub_tree=sub_tree[key], current_list=current_list + [key], items_list=items_list)
else:
items_list.append(current_list + [key] + [sub_tree[key]])
except: # Tree leaf.
items_list.append(current_list + [str(sub_tree)])
return items_list
def json_data_to_samples_list(json_data: dict, data_key: str = 'value', column_sep: str = "_") -> [[(str, str)]]:
samples_list = []
for parent in json_data[data_key]:
column_value_tuples = []
for child in dict_to_list(sub_tree=parent, current_list=[], items_list=[]):
column = column_sep.join(child[:-1])
value = child[-1]
column_value_tuples.append((column, value))
samples_list.append(column_value_tuples)
return samples_list
def main() -> None:
json_file_path = "data.json"
with open(json_file_path, mode="r", encoding="utf-8") as file:
json_data = json.load(file)
samples_list = json_data_to_samples_list(json_data=json_data)
print(f"\nExtracted {len(samples_list)} items from the JSON file:")
for idx, parent in enumerate(samples_list):
print(f"\n>>>>> Child #{idx}:")
for child in parent:
print(f"\t\t{child}")
print()
if __name__ == '__main__':
main()
Extracted 2 items from the JSON file:
>>>>> Child #0:
('A', 0)
('B', 1)
('C', 2)
>>>>> Child #1:
('C_E', 3)
('C_F', 4)
('D_G', 5)
('D_H', 6)
In addition to my previous answer , you might also want to convert a JSON file to tuples that can be inserted into a database. In that case, you can use the following approach:
{
"value":
[
{
"A": 0,
"B": 1,
"C": 2
},
{
"C": {
"E": 3,
"F": 4
},
"D": [
{
"G": 5
},
{
"H": 6
}
]
}
]
}
from collections import OrderedDict
import logging
import json
logging.basicConfig(level="DEBUG")
logger = logging.getLogger(__name__)
class JSON:
def __init__(self, data: dict) -> "JSON":
self.data = data
def abbreviate(self, string: str, abbreviations: dict = {}) -> str:
for word in abbreviations:
string = string.replace(word, abbreviations[word])
string = string.replace(word.title(), abbreviations[word].title())
return string
def find_string(
self,
string: str,
string_list: [str],
index_start: int = 0
) -> [int]:
indexes = [
index
for index, item in enumerate(
string_list[index_start:],
start=index_start
)
if len(item) and item == string
]
return indexes
def disambiguate_columns(
self,
columns: [str],
is_case_sentitive: bool = False
) -> [str]:
if not is_case_sentitive:
columns_ref = [c.lower() for c in columns]
else:
columns_ref = [c for c in columns]
for i, c in enumerate(columns_ref):
indexes = self.find_string(
c,
string_list=columns_ref,
index_start=i+1
)
for n, j in enumerate(indexes, start=1):
columns[j] = f"{columns[j]}_{n}"
columns_ref[j] = ""
return columns
def dict_to_list(
self,
sub_tree: dict,
current_list: [str],
items_list: [[str]]
) -> [[str]]:
try: # Tree branches.
for key in sub_tree:
if isinstance(sub_tree[key], list):
for sub_item in sub_tree[key]:
self.dict_to_list(
sub_tree=sub_item,
current_list=current_list + [key],
items_list=items_list
)
elif isinstance(sub_tree[key], dict):
self.dict_to_list(
sub_tree=sub_tree[key],
current_list=current_list + [key],
items_list=items_list
)
else:
items_list.append(current_list + [key] + [sub_tree[key]])
except: # Tree leaf.
items_list.append(current_list + [str(sub_tree)])
return items_list
def extract_entries(
self,
data_key: str = None,
column_sep: str = "_"
) -> [[(str, str)]]:
entries = []
if data_key is not None:
data_iterator = (_ for _ in self.data[data_key])
else:
data_iterator = (_ for _ in self.data)
for parent in data_iterator:
column_value_tuples = []
for child in self.dict_to_list(
sub_tree=parent,
current_list=[],
items_list=[]):
column_parts = child[:-1]
column = column_sep.join(column_parts)
value = child[-1]
column_value_tuples.append((column, value))
entries.append(column_value_tuples)
return entries
def get_nth_element(
self,
items: [(str, str)],
element: str,
nth: int = 1
) -> ((str, str), bool):
assert nth >= 1
occurrences = [i for i in items if i[0] == element]
n_occurrences = len(occurrences)
if n_occurrences:
index_out_of_bounds = True if nth > n_occurrences else False
nth_element = occurrences[min(nth, n_occurrences)-1]
else:
nth_element = None
index_out_of_bounds = True
return (nth_element, index_out_of_bounds)
def to_tuples(
self,
data_key: str = None,
is_case_sentitive: bool = False,
abbreviate_columns: bool = True
) -> ([str], [tuple]):
logger.debug(f"Extracting values tuples from JSON data...")
entries = self.extract_entries(data_key=data_key)
original_columns = list(
OrderedDict.fromkeys(
[
column_value_tuple[0]
for samples in entries
for column_value_tuple in samples
]
)
)
n_entries = len(entries)
n_columns = len(original_columns)
values = []
for tuples, index in zip(entries, range(1, n_entries+1)):
logger.debug(
f"Processing values from entry {index}/{n_entries} " \
f"({((index/n_entries)*100):.2f}%)..."
)
for i in range(1, len(tuples)+1):
row = []
index_out_of_bounds_count = 0
for c in original_columns:
column_value_tuple, index_out_of_bounds = \
self.get_nth_element(items=tuples, element=c, nth=i)
if column_value_tuple:
row.append(column_value_tuple[1])
else:
row.append(None)
if index_out_of_bounds:
index_out_of_bounds_count += 1
if index_out_of_bounds_count == n_columns:
break
if row.count(None) != n_columns:
values.append(row)
columns = original_columns
if abbreviate_columns:
logger.debug(f"Abbreviating column names...")
columns_abbreviation = []
for c in columns:
abbreviation = self.abbreviate(c)
columns_abbreviation.append(abbreviation)
logger.debug(f"[{c}] --> [{abbreviation}]")
columns = columns_abbreviation
logger.debug(f"Disambiguating column names...")
columns = self.disambiguate_columns(
columns,
is_case_sentitive=is_case_sentitive
)
return (columns, values)
def main() -> None:
json_file_path = "data.json"
with open(json_file_path, mode="r", encoding="utf-8") as file:
json_data = json.load(file)
columns, values = JSON(json_data).to_tuples(data_key='value')
print(f"\nExtracted {len(values)} items from the JSON file:")
for idx, parent in enumerate(values):
print(f"\n>>>>> Child #{idx}:")
for col, child in zip(columns, parent):
print(f"\t{col}: {child}")
print()
if __name__ == '__main__':
main()
DEBUG:__main__:Extracting values tuples from JSON data...
DEBUG:__main__:Processing values from entry 1/2 (50.00%)...
DEBUG:__main__:Processing values from entry 2/2 (100.00%)...
DEBUG:__main__:Abbreviating column names...
DEBUG:__main__:[A] --> [A]
DEBUG:__main__:[B] --> [B]
DEBUG:__main__:[C] --> [C]
DEBUG:__main__:[C_E] --> [C_E]
DEBUG:__main__:[C_F] --> [C_F]
DEBUG:__main__:[D_G] --> [D_G]
DEBUG:__main__:[D_H] --> [D_H]
DEBUG:__main__:Disambiguating column names...
Extracted 2 items from the JSON file:
>>>>> Child #0:
A: 0
B: 1
C: 2
C_E: None
C_F: None
D_G: None
D_H: None
>>>>> Child #1:
A: None
B: None
C: None
C_E: 3
C_F: 4
D_G: 5
D_H: 6
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.