[英]How do I plot stacked barplots side by side in python? (preferentially seaborn)
我正在尋找一種方法來並排繪制堆積的條形圖,以比較我的數據框中每個國家/地區的陽性(條件==真)和總病例的主機組成。
這是 DataFrame 的示例。
id Location Host genus_name #ofGenes Condition
1 Netherlands Homo sapiens Escherichia 4.0 True
2 Missing Missing Klebsiella 3.0 True
3 Missing Missing Aeromonas 2.0 True
4 Missing Missing Glaciecola 2.0 True
5 Antarctica Missing Alteromonas 2.0 True
6 Indian Ocean Missing Epibacterium 2.0 True
7 Missing Missing Klebsiella 2.0 True
8 China Homo sapiens Escherichia 0 False
9 Missing Missing Escherichia 2.0 True
10 China Plantae kingdom Pantoea 0 False
11 China Missing Escherichia 2.0 True
12 Pacific Ocean Missing Halomonas 0 False
我需要類似於下圖的東西,但我想以百分比形式繪制。
誰能幫我?
我想你想要的是一個堆疊的分類條形圖,它不能使用 seaborn 直接繪制。 但是您可以通過自定義一個來實現它。
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.patches import Patch
def gen_fake_data(data, size=400):
unique_values = []
for c in data.columns:
unique_values.append(data[c].unique())
new_data = pd.DataFrame({c: np.random.choice(unique_values[i], size=size)
for i, c in enumerate(data.columns)})
new_data = pd.concat([data, new_data])
new_data['id'] = new_data.index + 1
return new_data
data = pd.read_csv('data.csv')
new_data = gen_fake_data(data)
def stack_catplot(x, y, cat, stack, data, palette=sns.color_palette('Reds')):
ax = plt.gca()
# pivot the data based on categories and stacks
df = data.pivot_table(values=y, index=[cat, x], columns=stack,
dropna=False, aggfunc='sum').fillna(0)
ncat = data[cat].nunique()
nx = data[x].nunique()
nstack = data[stack].nunique()
range_x = np.arange(nx)
width = 0.8 / ncat # width of each bar
for i, c in enumerate(data[cat].unique()):
# iterate over categories, i.e., Conditions
# calculate the location of each bar
loc_x = (0.5 + i - ncat / 2) * width + range_x
bottom = 0
for j, s in enumerate(data[stack].unique()):
# iterate over stacks, i.e., Hosts
# obtain the height of each stack of a bar
height = df.loc[c][s].values
# plot the bar, you can customize the color yourself
ax.bar(x=loc_x, height=height, bottom=bottom, width=width,
color=palette[j + i * nstack], zorder=10)
# change the bottom attribute to achieve a stacked barplot
bottom += height
# make xlabel
ax.set_xticks(range_x)
ax.set_xticklabels(data[x].unique(), rotation=45)
ax.set_ylabel(y)
# make legend
plt.legend([Patch(facecolor=palette[i]) for i in range(ncat * nstack)],
[f"{c}: {s}" for c in data[cat].unique() for s in data[stack].unique()],
bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)
plt.grid()
plt.show()
plt.figure(figsize=(6, 3), dpi=300)
stack_catplot(x='Location', y='#ofGenes', cat='Condition', stack='Host', data=new_data)
total_genes = new_data.groupby(['Location', 'Condition'], as_index=False)['#ofGenes'].sum().rename(
columns={'#ofGenes': 'TotalGenes'})
new_data = new_data.merge(total_genes, how='left')
new_data['%ofGenes'] = new_data['#ofGenes'] / new_data['TotalGenes'] * 100
plt.figure(figsize=(6, 3), dpi=300)
stack_catplot(x='Location', y='%ofGenes', cat='Condition', stack='Host', data=new_data)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.