Any suggestions for how to facet a dual axis chart and then add a line to each chart at y=x
using Altair? The challenge is that the line y=x
needs to match the scale of the series specific to data displayed in each faceted chart.
Links:
Below is the code that reproduces the issue.
import altair as alt
from vega_datasets import data
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'rate-of-change'
base = alt.Chart().encode(
x='X:O',
)
scatter = base.mark_circle(size=60, opacity=0.30).encode(
y='Y:Q',
color=alt.Color('Series:O', scale=alt.Scale(scheme='category10')),
tooltip=['Series','X','Y']
)
line_x_equals_y = alt.Chart().mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('max(X)',axis=None),
y=alt.Y('max(X)',axis=None), # note: it's intentional to set max(X) here so that X and Y are equal.
color = alt.Color('line-label') # note: the intent here is for the line label to show up in the legend
)
rate = base.mark_line(strokeDash=[5,3]).encode(
y=alt.Y('rate:Q'),
color = alt.Color('rate-label',),
tooltip=['rate','X','Y']
)
scatter_rate = alt.layer(scatter, rate, data=source)
line_x_equals_y
)scatter_rate.facet('Series',columns=2).resolve_axis(
x='independent',
y='independent',
)
alt.layer(scatter_rate, line_x_equals_y, data=source).facet('Series',columns=2).resolve_axis(
x='independent',
y='independent',
)
chart_generator = (alt.layer(line_x_equals_y, scatter_rate, data = source, title=f"Series {val}").transform_filter(alt.datum.Series == val).resolve_scale(y='independent',x='independent') \
for val in source.Series.unique())
alt.concat(*(
chart_generator
), columns=2)
scatter_rate
is a faceted (by series) dual axis chart w/ separate scales appropriate for the range of values. y=x
that goes from (0,0) to y=max(X)
value of the individual chart.You can do this by creating your layers as normal, and calling the facet()
method on the layer chart. The only requirement is that all layers share the same source data; there is no need to construct the facet manually, and there is no need for late data binding for facets in the current version of Altair:
import altair as alt
from vega_datasets import data
import pandas as pd
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'line y=x'
source_linear = source.groupby(by=['Series']).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=['Series'])
source_origin = source_linear.copy()
source_origin['y_linear'] = 0
source_origin['x_linear'] = 0
source_linear = pd.concat([source_origin,source_linear]).sort_values(by=['Series'])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart(source).mark_circle(size=60, opacity=0.60).encode(
x='X:Q',
y='Y:Q',
color='Series:N',
tooltip=['X','Y','rate']
)
rate = alt.Chart(source).mark_line(strokeDash=[5,3]).encode(
x='X:Q',
y='rate:Q',
color = 'rate-label:N'
)
line_plot = alt.Chart(source).mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('x_linear', title = ''),
y=alt.Y('y_linear', title = ''),
shape = alt.Shape('rate-label', title = 'Break Even'),
color = alt.value('black')
)
alt.layer(scatter, rate, line_plot).facet(
'Series:N'
).properties(
columns=2
).resolve_scale(
x='independent',
y='independent'
)
This solution builds the desired line at y=x
scaled for the data on each chart; however, points are duplicated in the merge step and I'm unsure how to add the dual axis rate.
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'line y=x'
source_linear = source.groupby(by=['Series']).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=['Series'])
source_origin = source_linear.copy()
source_origin['y_linear'] = 0
source_origin['x_linear'] = 0
source_linear = pd.concat([source_origin,source_linear]).sort_values(by=['Series'])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart().mark_circle(size=60, opacity=0.60).encode(
x=alt.X('X', title='X'),
y=alt.Y('Y', title='Y'),
#color='year:N',
tooltip=['X','Y','rate']
)
line_plot = alt.Chart().mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('x_linear', title = ''),
y=alt.Y('y_linear', title = ''),
shape = alt.Shape('rate-label', title = 'Break Even'),
color = alt.value('black')
)
chart_generator = (alt.layer(scatter, line_plot, data = source, title=f"{val}: Duplicated Points w/ Line at Y=X").transform_filter(alt.datum.Series == val) \
for val in source.Series.unique())
chart = alt.concat(*(
chart_generator
), columns=3)
chart.display()
This solution includes the rate, but isn't a dual axis w/ Y
on one axis and rate
on the other.
import altair as alt
from vega_datasets import data
import pandas as pd
source = data.anscombe().copy()
source['line-label'] = 'x=y'
source = pd.concat([source,source.groupby('Series').agg(x_diff=('X','diff'), y_diff=('Y','diff'))],axis=1)
source['rate'] = source.y_diff/source.x_diff
source['rate-label'] = 'rate of change'
source['line-label'] = 'line y=x'
source_linear = source.groupby(by=['Series']).agg(x_linear=('X','max'), y_linear=('X', 'max')).reset_index().sort_values(by=['Series'])
source_origin = source_linear.copy()
source_origin['y_linear'] = 0
source_origin['x_linear'] = 0
source_linear = pd.concat([source_origin,source_linear]).sort_values(by=['Series'])
source = source.merge(source_linear,on='Series').drop_duplicates()
scatter = alt.Chart(source).mark_circle(size=60, opacity=0.60).encode(
x=alt.X('X', title='X'),
y=alt.Y('Y', title='Y'),
color='Series:N',
tooltip=['X','Y','rate']
)
line_plot = alt.Chart(source).mark_line(color= 'black', strokeDash=[3,8]).encode(
x=alt.X('x_linear', title = ''),
y=alt.Y('y_linear', title = ''),
shape = alt.Shape('line-label', title = 'Break Even'),
color = alt.value('black')
)
rate = alt.Chart(source).mark_line(strokeDash=[5,3]).encode(
x=alt.X('X', axis=None, title = 'X'),
y=alt.Y('rate:Q'),
color = alt.Color('rate-label',),
tooltip=['rate','X','Y']
)
alt.layer(scatter, line_plot, rate).facet(
'Series:N'
).properties(
columns=2
).resolve_scale(
x='independent',
y='independent'
).display()
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.