pandas 和 seaborn 的堆积密度图

Question

I am trying to obtain the following plot from a pandas data frame.我正在尝试从熊猫数据框中获取以下图。

I am not sure how to combine seaborn with pandas for that task.我不确定如何将 seaborn 与 Pandas 结合起来完成这项任务。

This is the dataframe I want to use:这是我要使用的数据框：

import pandas as pd

data = pd.DataFrame({'a': np.random.randn(1000) + 1,
              'b': np.random.randn(1000),
              'c': np.random.rand(1000) + 10},        
             columns=['a', 'b', 'c'])

data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN

Notice that the frequency will need to be normalized (height of the histogram), as the number of data points and distributions differ significantly and the distributions will have different 'y scales'.请注意，频率需要归一化（直方图的高度），因为数据点和分布的数量显着不同，分布将具有不同的“y 尺度”。

data.plot.hist();

This is the code of seaborn that generates the figure I used in the beginning.这是生成我一开始用的图的seaborn的代码。

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

# Create the data
rs = np.random.RandomState(1979)
x = rs.randn(150)
g = np.tile(list("ABC"), 50)
df = pd.DataFrame(dict(x=x, g=g))
m = df.g.map(ord)

# Initialize the FacetGrid object
pal = sns.cubehelix_palette(10, rot=-.25, light=.7)
g = sns.FacetGrid(df, row="g", hue="g", aspect=5, height=1, palette=pal)

# Draw the densities in a few steps
g.map(sns.kdeplot, "x", clip_on=False, shade=True, alpha=1, lw=1.5, bw=.2)
g.map(sns.kdeplot, "x", clip_on=False, color="w", lw=2, bw=.2)
g.map(plt.axhline, y=0, lw=2, clip_on=False)


# Define and use a simple function to label the plot in axes coordinates
def label(x, color, label):
    ax = plt.gca()
    ax.text(0, .3, label, fontweight="bold", color=color,
            ha="left", va="center", transform=ax.transAxes)


g.map(label, "x")

# Set the subplots to overlap
g.fig.subplots_adjust(hspace=-.0025)

# Remove axes details that don't play well with overlap
g.set_titles("")
g.set(yticks=[])
g.despine(bottom=True, left=True)

Answer 1

Here is a function to create a grid of kde plots ("joyplot") with one plot per dataframe column.这是一个创建 kde 图（“joyplot”）网格的函数，每个数据框列有一个图。

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import gaussian_kde


def joyplot_from_dataframe(data, cmap=None):
    mi, ma = np.nanmin(data.values), np.nanmax(data.values)
    minx = mi - (ma-mi)/5
    maxx = ma + (ma-mi)/5
    x = np.linspace(minx,maxx, 1000)

    n = len(data.columns)

    if not cmap:
        cmap = plt.cm.get_cmap("Blues")
    colors = cmap(np.linspace(.2,1,n))

    fig, axes = plt.subplots(nrows = n, sharex=True)

    for c, ax, color in zip(data.columns, axes, colors):
        y = data[c].values
        y = y[~np.isnan(y)]
        kde = gaussian_kde(y)
        ax.fill_between(x, kde(x), color=color)
        ax.yaxis.set_visible(False)
        for spine in ["left", "right", "top"]:
            ax.spines[spine].set_visible(False)
        ax.spines["bottom"].set_linewidth(2)
        ax.spines["bottom"].set_color(color)
        ax.margins(y=0)
        ax.tick_params(bottom=False)

    return fig, axes

Use it as将其用作

import pandas as pd

data = pd.DataFrame({'a': np.random.randn(1000) + 1,
              'b': np.random.randn(1000),
              'c': np.random.rand(1000) + 10},        
             columns=['a', 'b', 'c'])

data.a[data.a.sample(100).index] = np.NaN
data.b[data.b.sample(800).index] = np.NaN


joyplot_from_dataframe(data)    
plt.show()

pandas 和 seaborn 的堆积密度图

问题描述

1 个解决方案

解决方案1
2 已采纳 2019-02-17 01:10:38

pandas 和 seaborn 的堆积密度图

问题描述

1 个解决方案

解决方案1 2 已采纳 2019-02-17 01:10:38

解决方案1
2 已采纳 2019-02-17 01:10:38