[英]Problem with missing values in Altair or Plotly choropleth map
我有美國幾個州的數據,其他的是 Null。 在創建地圖時,我想用文本中的缺失值來遮蔽狀態,但我正在努力尋找正確的方法。 使用我當前的代碼,我無法獲得整個美國 map 包括具有 Null 值的狀態,並且只有具有特定分配值的狀態才會彈出。 我還查看了之前發布的問題並嘗試對地圖進行分層,但這給了我一個錯誤。 這是 cc_df 的樣子
這是我的代碼:
# import the required library
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt# import seaborn library
%matplotlib inline
import altair as alt
from vega_datasets import data
# State database
states_df = pd.read_csv(
'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)
# The data to map
cc_df = pd.read_csv('hv_cwad.csv',
usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()
# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()
# %%
# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')
variable_list = ['CWAD']
alt.Chart(states).mark_geoshape(stroke='lightgrey',
strokeWidth=.5).encode(
alt.Color(alt.repeat('row'), type='quantitative')
).transform_lookup(
lookup='id',
from_=alt.LookupData(cc_state_df, 'id', variable_list)
).properties(
width=300,
height=300
).project(
type='albersUsa'
).repeat(
row=variable_list
).resolve_scale(
color='independent'
)
output 看起來像這樣:
沒有數據的狀態是 NA,所以我們需要用fillna(0, inplace=True)
更新它們。 使用NA,我們需要做一些研究來設置一個特殊的顏色。
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
from vega_datasets import data
# State database
states_df = pd.read_csv('https://www2.census.gov/geo/docs/reference/state.txt',
sep="|",
dtype='str',
header=0,
names=['state_fips', 'state', 'state_name', 'StateENS'],
usecols=['state_fips', 'state_name', 'state']).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)
import io
data = '''
state CWAD
AR 377.715148
FL 6560.929494
GA 1958.122132
IA 0.409179
KS 63.706671
'''
cc_df = pd.read_csv(io.StringIO(data), delim_whitespace=True)
# The data to map
#cc_df = pd.read_csv('hv_cwad.csv', usecols=['state', 'CWAD'])
cc_df = cc_df.groupby('state').mean()
# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df)
cc_state_df = states_df.join(cc_df).reset_index()
cc_state_df.fillna(0, inplace=True)
# Create the map
states = alt.topo_feature(data.us_10m.url, 'states')
variable_list = ['CWAD']
alt.Chart(states).mark_geoshape(
stroke='lightgrey',
strokeWidth=.5
).encode(
alt.Color(
alt.repeat('row'), type='quantitative')
).transform_lookup(
lookup='id',
from_=alt.LookupData(
cc_state_df,
'id',
variable_list)
).properties(
width=300,
height=300
).project(
type='albersUsa'
).repeat(
row=variable_list
).resolve_scale(
color='independent'
)
我能夠使用 Plotly 而不是 Altair 來 plot 丟失的數據。 如果您對使用 Altair 並不特別,這可能會有所幫助。
導入所需的庫
import plotly.graph_objects as go import numpy as np import pandas as pd import matplotlib.pyplot as plt %matplotlib inline import altair as alt from vega_datasets import data
# State database
states_df = pd.read_csv(
'https://www2.census.gov/geo/docs/reference/state.txt',
# pipe seperated file
sep="|",
# FIPS are best as strings
dtype='str',
# rename columns
header=0, names=['state_fips', 'state', 'state_name', 'StateENS'],
# drop last column
usecols=['state_fips', 'state_name', 'state']
).set_index('state')
states_df['id'] = states_df['state_fips'].astype(int)
# The data to map
cc_df = pd.read_csv('cwad_hv.csv',
usecols=['state', 'GWAD'])
cc_df = cc_df.groupby('state').mean()
# Combine state database and our own data
#cc_state_df = pd.merge(cc_df, states_df) #don't use this.
cc_state_df = states_df.join(cc_df).reset_index()
#cc_state_df.fillna(0, inplace=True)#This changes the states with no data from NA to zero. If your data has a range -ve to +ve, skip this.
fig = go.Figure(data=go.Choropleth(
locations=cc_state_df['state'],
z=cc_state_df['CWAD'].astype(float),
locationmode='USA-states',
#color='Greens',
autocolorscale=True,
#range_color=[0, 6500],
#text=df['text'], # hover text
marker_line_color='black', # line markers between states
colorbar_title="CWAD kg/ha"
))
fig.update_layout(
title_text='CWAD',
geo = dict(
scope='usa',
projection=go.layout.geo.Projection(type = 'albers usa'),
showlakes=False, # lakes
lakecolor='rgb(255, 255, 255)'),
)
fig.show()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.