[英]Sum all items of a CSV with the same index name? Python, Pandas
我想总结同一个国家的所有省份。 以澳大利亚为例,我希望它看起来像这样
到目前为止,代码看起来像这样
class FileProcessCovid:
def dataLoad(self):
data= pd.read_csv("time_series_covid19_confirmed_global.csv", index_col=0)
#print(data)
return data
def delete_columns(self, data):
noIndx = dataframe.reset_index(drop=True)
dropped = noIndx.drop(["Lat", "Long"], axis=1)
# del dataframe[dataframe.columns[0]]
print(dropped)
return dropped
def sum_provinces(self, dropped):
pass
c = FileProcessCovid()
dataframe = c.dataLoad()
dropped = c.delete_columns(dataframe)
c.sum_provinces(dropped)
任何帮助表示赞赏。 谢谢
curr_country = data.iloc[0][['Country/Region'] #keeping track of the country we're working with
dd = defaultdict() #empty dict
df_out = pd.DataFrame(columns=list(data.columns)) #new output DataFrame
for row in data.iterrows():
country = row[1]['Country/Region']
dd['Country/Region'] = country
if country == curr_country:
for date in data:
if date is not 'Country/Region' or 'Province/State':
dd[date] += row[1][date]
else:
df_out.append(dd, ignore_index = True) #Populating output DF w/ country's summed values
curr_country = country
dd = defaultdict() #New defaultdict for new country
dd['Country/Region'] = country
for date in data:
if date is not 'Country/Region' or 'Province/State':
dd[date] = row[1][date]
df_out.append(dd, ignore_index = True) #Populating output DF w/ last country's values
return df_out
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.