i'm new to pandas and was wondering if someone can help me out.
i would simply like to set the name of each column in the output dataframe as the name of the country object (eg. Germany or France)
instead of getting this output
value name value name
tag
capital Paris France Berlin Germany
population 34111000000 France 11233000000 Germany
language French France German Germany
...i would like something like this
France Germany
tag
capital Paris Berlin Germany
population 34111000000 11233000000 Germany
language French German Germany
any help would be appreciated : - )
here is my code...
import numpy as np
import pandas as pd
import json
class Country(object):
def __init__(self,name):
self.name = name
self.json = name + "_Data.json"
def ImportJson(x):
ImportedJson = []
for country in x:
with open(country.json) as country_json_file:
country_data = json.load(country_json_file)
country_data_table = pd.DataFrame(country_data['data'], columns=['tag', 'value']).set_index('tag')
country_data_table['name'] = country.name
ImportedJson.append(country_data_table)
return ImportedJson
France = Country("France")
Germany = Country("Germany")
All_Countries = [France,Germany]
OpenedJson = ImportJson(All_Countries)
Country_Data = pd.concat(OpenedJson,axis=1)
print Country_Data
here are the json files
Germany_Data.json
{
"data": [
{
"tag": "capital",
"value": "Berlin"
},
{
"tag": "population",
"value": 11233000000
},
{
"tag": "language",
"value": "German"
}
],
"result_count": 33,
"page_size": 5000,
"current_page": 1,
"total_pages": 1,
"api_call_credits": 1
}
France_Data.json
{
"data": [
{
"tag": "capital",
"value": "Paris"
},
{
"tag": "population",
"value": 34111000000
},
{
"tag": "language",
"value": "French"
}
],
"result_count": 33,
"page_size": 5000,
"current_page": 1,
"total_pages": 1,
"api_call_credits": 1
}
script output
value name value name
tag
capital Paris France Berlin Germany
population 34111000000 France 11233000000 Germany
language French France German Germany
In your function ImportJson
you have the following two lines of code.
country_data_table = pd.DataFrame(country_data['data'], columns=['tag', 'value']).set_index('tag')
country_data_table['name'] = country.name
Delete the second line and add this directly after it
country_data_table.rename(columns={'value':country.name}, inplace=True)
I rewrote your class
import numpy as np
import pandas as pd
import json
class Country(object):
def __init__(self,name):
self.name = name
self.json = name + "_Data.json"
with open(self.json, 'r') as fp:
self.data = json.load(fp)['data']
self.series = pd.DataFrame.from_records(
self.data
).set_index('tag').value.rename(self.name)
France = Country("France")
Germany = Country("Germany")
pd.concat([c.series for c in [France, Germany]], axis=1)
France Germany
tag
capital Paris Berlin
population 34111000000 11233000000
language French German
if you insisted on manipulating your constructed dataframe
# take transpose so I can groupby index and add a count column
# for each `name` and `value`. Once I have a unique index, I can
# do more.
CD1 = Country_Data.T.set_index(
Country_Data.T.groupby(level=0).cumcount(), append=True).T
# strategy is to filter `value` columns and reassign the columns
CD2 = CD1.filter(like='value')
CD2.columns = Country_Data.loc['capital', 'name'].tolist()
CD2
France Germany
tag
capital Paris Berlin
population 34111000000 11233000000
language French German
setup json
files
import json
with open('Germany_Data.json', 'w') as fp:
json.dump(
{
"data": [
{
"tag": "capital",
"value": "Berlin"
},
{
"tag": "population",
"value": 11233000000
},
{
"tag": "language",
"value": "German"
}
],
"result_count": 33,
"page_size": 5000,
"current_page": 1,
"total_pages": 1,
"api_call_credits": 1
}
, fp)
with open('France_Data.json', 'w') as fp:
json.dump(
{
"data": [
{
"tag": "capital",
"value": "Paris"
},
{
"tag": "population",
"value": 34111000000
},
{
"tag": "language",
"value": "French"
}
],
"result_count": 33,
"page_size": 5000,
"current_page": 1,
"total_pages": 1,
"api_call_credits": 1
}
, fp)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.