[英]How to add a series specific line y=x to a faceted (or similar) dual-axis chart in Altair?
關於如何使用 Altair 對雙軸圖表進行刻面,然后在y=x
處向每個圖表添加一條線的任何建議? 挑戰在於線y=x
需要匹配特定於每個分面圖表中顯示的數據的系列的比例。
鏈接:
下面是重現該問題的代碼。
import altair as alt
from vega_datasets import data
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'rate-of-change'
base = alt.Chart().encode(
x='X:O',
)
scatter = base.mark_circle(size=60, opacity=0.30).encode(
y='Y:Q',
color=alt.Color('Series:O', scale=alt.Scale(scheme='category10')),
tooltip=['Series','X','Y']
)
line_x_equals_y = alt.Chart().mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('max(X)',axis=None),
y=alt.Y('max(X)',axis=None), # note: it's intentional to set max(X) here so that X and Y are equal.
color = alt.Color('line-label') # note: the intent here is for the line label to show up in the legend
)
rate = base.mark_line(strokeDash=[5,3]).encode(
y=alt.Y('rate:Q'),
color = alt.Color('rate-label',),
tooltip=['rate','X','Y']
)
scatter_rate = alt.layer(scatter, rate, data=source)
line_x_equals_y
)scatter_rate.facet('Series',columns=2).resolve_axis(
x='independent',
y='independent',
)
alt.layer(scatter_rate, line_x_equals_y, data=source).facet('Series',columns=2).resolve_axis(
x='independent',
y='independent',
)
chart_generator = (alt.layer(line_x_equals_y, scatter_rate, data = source, title=f"Series {val}").transform_filter(alt.datum.Series == val).resolve_scale(y='independent',x='independent') \
for val in source.Series.unique())
alt.concat(*(
chart_generator
), columns=2)
scatter_rate
是一個分面(按系列)雙軸圖表,帶有適用於值范圍的單獨刻度。y=x
,它從單個圖表的 (0,0) 到y=max(X)
值。您可以通過正常創建圖層並在圖層圖表上調用facet()
方法來完成此操作。 唯一的要求是所有層共享相同的源數據; 無需手動構建 facet,也無需在當前版本的 Altair 中對 facet 進行后期數據綁定:
import altair as alt
from vega_datasets import data
import pandas as pd
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'line y=x'
source_linear = source.groupby(by=['Series']).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=['Series'])
source_origin = source_linear.copy()
source_origin['y_linear'] = 0
source_origin['x_linear'] = 0
source_linear = pd.concat([source_origin,source_linear]).sort_values(by=['Series'])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart(source).mark_circle(size=60, opacity=0.60).encode(
x='X:Q',
y='Y:Q',
color='Series:N',
tooltip=['X','Y','rate']
)
rate = alt.Chart(source).mark_line(strokeDash=[5,3]).encode(
x='X:Q',
y='rate:Q',
color = 'rate-label:N'
)
line_plot = alt.Chart(source).mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('x_linear', title = ''),
y=alt.Y('y_linear', title = ''),
shape = alt.Shape('rate-label', title = 'Break Even'),
color = alt.value('black')
)
alt.layer(scatter, rate, line_plot).facet(
'Series:N'
).properties(
columns=2
).resolve_scale(
x='independent',
y='independent'
)
此解決方案在y=x
為每個圖表上的數據構建所需的線; 但是,點在合並步驟中重復,我不確定如何添加雙軸速率。
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'line y=x'
source_linear = source.groupby(by=['Series']).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=['Series'])
source_origin = source_linear.copy()
source_origin['y_linear'] = 0
source_origin['x_linear'] = 0
source_linear = pd.concat([source_origin,source_linear]).sort_values(by=['Series'])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart().mark_circle(size=60, opacity=0.60).encode(
x=alt.X('X', title='X'),
y=alt.Y('Y', title='Y'),
#color='year:N',
tooltip=['X','Y','rate']
)
line_plot = alt.Chart().mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('x_linear', title = ''),
y=alt.Y('y_linear', title = ''),
shape = alt.Shape('rate-label', title = 'Break Even'),
color = alt.value('black')
)
chart_generator = (alt.layer(scatter, line_plot, data = source, title=f"{val}: Duplicated Points w/ Line at Y=X").transform_filter(alt.datum.Series == val) \
for val in source.Series.unique())
chart = alt.concat(*(
chart_generator
), columns=3)
chart.display()
該解決方案包括速率,但不是一個雙軸,一個軸是Y
,另一個軸是rate
。
import altair as alt
from vega_datasets import data
import pandas as pd
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'rate of change'
source['line-label'] = 'line y=x'
source_linear = source.groupby(by=['Series']).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=['Series'])
source_origin = source_linear.copy()
source_origin['y_linear'] = 0
source_origin['x_linear'] = 0
source_linear = pd.concat([source_origin,source_linear]).sort_values(by=['Series'])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart(source).mark_circle(size=60, opacity=0.60).encode(
x=alt.X('X', title='X'),
y=alt.Y('Y', title='Y'),
color='Series:N',
tooltip=['X','Y','rate']
)
line_plot = alt.Chart(source).mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('x_linear', title = ''),
y=alt.Y('y_linear', title = ''),
shape = alt.Shape('line-label', title = 'Break Even'),
color = alt.value('black')
)
rate = alt.Chart(source).mark_line(strokeDash=[5,3]).encode(
x=alt.X('X', axis=None, title = 'X'),
y=alt.Y('rate:Q'),
color = alt.Color('rate-label',),
tooltip=['rate','X','Y']
)
alt.layer(scatter, line_plot, rate).facet(
'Series:N'
).properties(
columns=2
).resolve_scale(
x='independent',
y='independent'
).display()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.