繁体   English   中英

Dataframe 基于索引和列名的条件填充

[英]Dataframe fillna conditional based on Index & Column Name

我希望能够在 Dataframe 上使用df.fillna() function,但根据该特定单元格的索引和列名对其应用条件。

我正在尝试根据以下数据集创建曲棍球队友数据的热图(为下面的大字典道歉)-

linemates_toi = {
    'Player 1': {'Player 2': 0.25, 'Player 3': 7.95, 'Player 4': 0.6333, 'Player 5': 9.95, 'Player 6': 0.6333, 'Player 7': 0.8, 'Player 8': 4.2667, 'Player 9': 7.8833, 'Player 10': 0.3, 'Player 11': 11.2333, 'Player 12': 3.35, 'Player 13': 0.2167},
    'Player 10': {'Player 14': 2.3, 'Player 18': 1.2667, 'Player 2': 6.8333, 'Player 4': 5.5833, 'Player 5': 0.9, 'Player 16': 6.9167, 'Player 6': 4.9667, 'Player 7': 4.15, 'Player 15': 1.0, 'Player 8': 0.3167, 'Player 17': 5.3167, 'Player 1': 0.3, 'Player 11': 1.6167, 'Player 12': 0.6833, 'Player 13': 12.7167}, 
    'Player 12': {'Player 14': 4.5333, 'Player 18': 4.3333, 'Player 2': 3.1167, 'Player 3': 1.2333, 'Player 4': 5.7333, 'Player 5': 3.5167, 'Player 16': 3.0, 'Player 6': 3.0167, 'Player 7': 2.4, 'Player 15': 2.0167, 'Player 8': 11.6667, 'Player 17': 2.2667, 'Player 9': 0.1167, 'Player 1': 3.35, 'Player 10': 0.6833, 'Player 11': 3.35}, 
    'Player 17': {'Player 14': 4.55, 'Player 18': 1.65, 'Player 2': 0.8833, 'Player 3': 2.85, 'Player 5': 0.0333, 'Player 16': 2.9167, 'Player 6': 7.8167, 'Player 7': 6.0833, 'Player 8': 3.8, 'Player 9': 2.25, 'Player 10': 5.3167, 'Player 12': 2.2667, 'Player 13': 5.7833},
    'Player 7': {'Player 18': 0.3667, 'Player 2': 0.6667, 'Player 3': 1.55, 'Player 4': 0.3333, 'Player 5': 0.15, 'Player 16': 1.2167, 'Player 6': 6.8333, 'Player 15': 0.3333, 'Player 8': 3.0667, 'Player 17': 6.0833, 'Player 9': 1.8833, 'Player 1': 0.8, 'Player 10': 4.15, 'Player 11': 1.0, 'Player 12': 2.4, 'Player 13': 4.4333}, 
    'Player 16': {'Player 14': 2.2833, 'Player 2': 8.5333, 'Player 3': 2.7, 'Player 4': 8.0167, 'Player 5': 0.45, 'Player 6': 0.4, 'Player 7': 1.2167, 'Player 8': 2.3, 'Player 17': 2.9167, 'Player 9': 2.15, 'Player 10': 6.9167, 'Player 11': 0.1333, 'Player 12': 3.0, 'Player 13': 6.5833},
    'Player 18': {'Player 14': 10.05, 'Player 2': 0.75, 'Player 3': 5.0, 'Player 4': 3.45, 'Player 5': 0.3333, 'Player 6': 0.8333, 'Player 7': 0.3667, 'Player 15': 5.2, 'Player 8': 5.8167, 'Player 17': 1.65, 'Player 9': 4.3833, 'Player 10': 1.2667, 'Player 11': 1.5, 'Player 12': 4.3333, 'Player 13': 1.5333},
    'Player 13': {'Player 14': 3.0333, 'Player 18': 1.5333, 'Player 2': 5.9167, 'Player 3': 0.7333, 'Player 4': 4.95, 'Player 5': 0.8167, 'Player 16': 6.5833, 'Player 6': 5.1333, 'Player 7': 4.4333, 'Player 15': 1.2667, 'Player 8': 0.2833, 'Player 17': 5.7833, 'Player 1': 0.2167, 'Player 10': 12.7167, 'Player 11': 1.5333},
    'Player 5': {'Player 18': 0.3333, 'Player 2': 0.8333, 'Player 3': 8.0333, 'Player 16': 0.45, 'Player 6': 0.3333, 'Player 7': 0.15, 'Player 8': 3.0167, 'Player 17': 0.0333, 'Player 9': 6.7333, 'Player 1': 9.95, 'Player 10': 0.9, 'Player 11': 11.2333, 'Player 12': 3.5167, 'Player 13': 0.8167},
    'Player 15': {'Player 14': 4.5667, 'Player 18': 5.2, 'Player 2': 0.4667, 'Player 3': 2.35, 'Player 6': 0.1667, 'Player 7': 0.3333, 'Player 8': 2.0167, 'Player 9': 2.0833, 'Player 10': 1.0, 'Player 12': 2.0167, 'Player 13': 1.2667},
    'Player 2': {'Player 18': 0.75, 'Player 3': 2.65, 'Player 4': 8.6, 'Player 5': 0.8333, 'Player 16': 8.5333, 'Player 6': 0.8333, 'Player 7': 0.6667, 'Player 15': 0.4667, 'Player 8': 2.3333, 'Player 17': 0.8833, 'Player 9': 1.9167, 'Player 1': 0.25, 'Player 10': 6.8333, 'Player 11': 1.6167, 'Player 12': 3.1167, 'Player 13': 5.9167},
    'Player 8': {'Player 14': 5.8333, 'Player 18': 5.8167, 'Player 2': 2.3333, 'Player 3': 1.1167, 'Player 4': 5.6833, 'Player 5': 3.0167, 'Player 16': 2.3, 'Player 6': 4.2667, 'Player 7': 3.0667, 'Player 15': 2.0167, 'Player 17': 3.8, 'Player 9': 1.1333, 'Player 1': 4.2667, 'Player 10': 0.3167, 'Player 11': 3.8167, 'Player 12': 11.6667, 'Player 13': 0.2833},
    'Player 4': {'Player 14': 3.2833, 'Player 18': 3.45, 'Player 2': 8.6, 'Player 3': 2.0667, 'Player 16': 8.0167, 'Player 6': 0.8333, 'Player 7': 0.3333, 'Player 8': 5.6833, 'Player 9': 1.85, 'Player 1': 0.6333, 'Player 10': 5.5833, 'Player 11': 0.85, 'Player 12': 5.7333, 'Player 13': 4.95},
    'Player 9': {'Player 14': 4.5167, 'Player 18': 4.3833, 'Player 2': 1.9167, 'Player 3': 14.35, 'Player 4': 1.85, 'Player 5': 6.7333, 'Player 16': 2.15, 'Player 6': 0.8833, 'Player 7': 1.8833, 'Player 15': 2.0833, 'Player 8': 1.1333, 'Player 17': 2.25, 'Player 1': 7.8833, 'Player 11': 9.0667, 'Player 12': 0.1167},
    'Player 14': {'Player 18': 10.05, 'Player 3': 5.7167, 'Player 4': 3.2833, 'Player 16': 2.2833, 'Player 6': 1.8833, 'Player 15': 4.5667, 'Player 8': 5.8333, 'Player 17': 4.55, 'Player 9': 4.5167, 'Player 10': 2.3, 'Player 11': 0.9833, 'Player 12': 4.5333, 'Player 13': 3.0333},
    'Player 11': {'Player 14': 0.9833, 'Player 18': 1.5, 'Player 2': 1.6167, 'Player 3': 9.7667, 'Player 4': 0.85, 'Player 5': 11.2333, 'Player 16': 0.1333, 'Player 6': 0.5, 'Player 7': 1.0, 'Player 8': 3.8167, 'Player 9': 9.0667, 'Player 1': 11.2333, 'Player 10': 1.6167, 'Player 12': 3.35, 'Player 13': 1.5333},
    'Player 6': {'Player 14': 1.8833, 'Player 18': 0.8333, 'Player 2': 0.8333, 'Player 3': 1.1333, 'Player 4': 0.8333, 'Player 5': 0.3333, 'Player 16': 0.4, 'Player 7': 6.8333, 'Player 15': 0.1667, 'Player 8': 4.2667, 'Player 17': 7.8167, 'Player 9': 0.8833, 'Player 1': 0.6333, 'Player 10': 4.9667, 'Player 11': 0.5, 'Player 12': 3.0167, 'Player 13': 5.1333},
    'Player 3': {'Player 14': 5.7167, 'Player 18': 5.0, 'Player 2': 2.65, 'Player 4': 2.0667, 'Player 5': 8.0333, 'Player 16': 2.7, 'Player 6': 1.1333, 'Player 7': 1.55, 'Player 15': 2.35, 'Player 8': 1.1167, 'Player 17': 2.85, 'Player 9': 14.35, 'Player 1': 7.95, 'Player 11': 9.7667, 'Player 12': 1.2333, 'Player 13': 0.7333}
}

df = pd.DataFrame(linemates_toi)

我现在要实现的是使用df.fillna(0)并应用条件,因此唯一被替换的NaN是索引和列名称不匹配时,因为我希望这些单元格保持NaN以便当我plot 将它们放入热图中,它们在从 Matplotlib 应用的cmap中没有任何颜色。

如果我正在编写伪代码,它看起来像这样 -

df.fillna(0, df.cell.Index.Name != df.cell.Column.Name)

提前致谢!

使用一些广播和NaN -masking

mask = df.index.to_numpy() == df.columns.to_numpy()[:, None]

df.fillna(0).mask(mask)

>>> df.head()

           Player 1  Player 10  Player 12  Player 17  Player 7  Player 16  \
Player 1        NaN     0.3000     3.3500     0.0000    0.8000     0.0000   
Player 10    0.3000        NaN     0.6833     5.3167    4.1500     6.9167   
Player 11   11.2333     1.6167     3.3500     0.0000    1.0000     0.1333   
Player 12    3.3500     0.6833        NaN     2.2667    2.4000     3.0000   
Player 13    0.2167    12.7167     0.0000     5.7833    4.4333     6.5833   

           Player 18  Player 13  Player 5  Player 15  Player 2  Player 8  \
Player 1      0.0000     0.2167    9.9500     0.0000    0.2500    4.2667   
Player 10     1.2667    12.7167    0.9000     1.0000    6.8333    0.3167   
Player 11     1.5000     1.5333   11.2333     0.0000    1.6167    3.8167   
Player 12     4.3333     0.0000    3.5167     2.0167    3.1167   11.6667   
Player 13     1.5333        NaN    0.8167     1.2667    5.9167    0.2833   

           Player 4  Player 9  Player 14  Player 11  Player 6  Player 3  
Player 1     0.6333    7.8833     0.0000    11.2333    0.6333    7.9500  
Player 10    5.5833    0.0000     2.3000     1.6167    4.9667    0.0000  
Player 11    0.8500    9.0667     0.9833        NaN    0.5000    9.7667  
Player 12    5.7333    0.1167     4.5333     3.3500    3.0167    1.2333  
Player 13    4.9500    0.0000     3.0333     1.5333    5.1333    0.7333 

您也可以执行以下操作:

df.fillna(0, inplace=True)
for col in df:
    df.loc[col, col] = np.nan

解释:

  1. 填充 NaN
  2. 设置 NaN,其中行名等于列名

在每列上使用df.apply到 map 和 lambda :

df = df.apply(lambda col: col.where((col.name == col.index) | col.notnull(), 0))

col.where(condition, value_if_false)如果condition为真,则返回col中的原始值。 否则返回value_if_false

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM