[英]MatPlotLib plot does not have clear boundaries, weird data points
我正在嘗試映射數據,但地圖輸出沒有明確的邊界,並且顯示的數據不應該是連續的。 下面的第一張地圖使用的數據與第二張地圖的確切代碼相似,所以我不知道出了什么問題。 我想知道是否有辦法格式化代碼,使情節在風格上與第一個相似。
import matplotlib.pyplot as plt
f, ax = plt.subplots(1, figsize=(12,6))
ax = states_00_14.plot(column='num_fires', cmap='OrRd',
legend=True, ax=ax)
lims = plt.axis('equal')
f.suptitle('US Wildfire count per state in 2000-2014')
ax.set_axis_off()
我對 python 和 matplotlib 很陌生,所以我基本上不知道我做錯了什么。 如果相關,我正在使用 Jupyter Notebook。 提前致謝!
第一個是等值線圖,它可以通過空間連接(合並)狀態幾何數據與野火位置數據(例如https://geopandas.org/docs/reference/api/geopandas.sjoin.html )並使用 geopandas 繪制來生成( https://geopandas.org/docs/user_guide/mapping.html )。
第二張地圖是帶有圓形標記的散點圖,因此不清晰。
# spatial join of fire locations to states
state_fire = gdf_fire.loc[fire_mask, fire_cols].sjoin(
gdf2.loc[boundary_mask, boundary_cols]
)
edgecolor="black"
以便清楚地標記邊緣。import geopandas as gpd
import pandas as pd
import plotly.express as px
import requests
from pathlib import Path
from zipfile import ZipFile
import urllib
import requests
# get wild fire data..
# https://data-nifc.opendata.arcgis.com/datasets/wfigs-wildland-fire-locations-full-history/explore?showTable=true
gdf_fire = gpd.GeoDataFrame.from_features(
requests.get(
"https://opendata.arcgis.com/datasets/d8fdb82b3931403db3ef47744c825fbf_0.geojson"
).json()
)
# fmt: off
# download boundaries
url = "https://www.naturalearthdata.com/http//www.naturalearthdata.com/download/110m/cultural/ne_110m_admin_1_states_provinces.zip"
f = Path.cwd().joinpath(urllib.parse.urlparse(url).path.split("/")[-1])
# fmt: on
if not f.exists():
r = requests.get(url, stream=True, headers={"User-Agent": "XY"})
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128):
fd.write(chunk)
zfile = ZipFile(f)
zfile.extractall(f.stem)
# load downloaded boundaries
gdf2 = gpd.read_file(str(f.parent.joinpath(f.stem).joinpath(f"{f.stem}.shp")))
# a bit of cleanup, data types and CRS
gdf_fire["FireDiscoveryDateTime"] = pd.to_datetime(gdf_fire["FireDiscoveryDateTime"])
gdf_fire = gdf_fire.set_crs("EPSG:4326")
# filters, US states and fires with a date...
boundary_cols = ["adm1_code", "iso_3166_2", "iso_a2", "name", "geometry"]
boundary_mask = gdf2["iso_a2"].eq("US")
fire_cols = ["OBJECTID", "FireDiscoveryDateTime", "geometry"]
# fire_mask = gdf_fire["FireDiscoveryDateTime"].dt.year.between(2010,2012)
fire_mask = ~gdf_fire["FireDiscoveryDateTime"].isna()
# spatial join of fire locations to states
state_fire = gdf_fire.loc[fire_mask, fire_cols].sjoin(
gdf2.loc[boundary_mask, boundary_cols]
)
# summarize data by year and state
df_fires_by_year = (
state_fire.groupby(
boundary_cols[0:-1]
+ ["index_right", state_fire["FireDiscoveryDateTime"].dt.year],
as_index=False,
)
.size()
.sort_values(["FireDiscoveryDateTime", "index_right"])
)
# and finally visualize...
px.choropleth_mapbox(
df_fires_by_year,
geojson=gdf2.loc[boundary_mask, "geometry"].__geo_interface__,
locations="index_right",
color="size",
animation_frame="FireDiscoveryDateTime",
hover_name="name",
hover_data={"index_right": False},
color_continuous_scale="OrRd",
).update_layout(
mapbox={
"style": "carto-positron",
"zoom": 2,
"center": {"lat": 39.50, "lon": -98.35},
},
margin={"l": 0, "r": 0, "t": 0, "b": 0},
)
import matplotlib.pyplot as plt
# recreate dataframe in question. Exclude Alaska and Hawaii as they mess up boundaries...
# further aggregate the defined years....
states_00_14 = gpd.GeoDataFrame(
df_fires_by_year.loc[df_fires_by_year["FireDiscoveryDateTime"].between(2000, 2014)]
.groupby("index_right", as_index=False)
.agg({"size": "sum"})
.merge(
gdf2.loc[boundary_mask & ~gdf2["iso_3166_2"].isin(["US-AK","US-HI"])],
left_on="index_right",
right_index=True,
how="inner",
)
)
f, ax = plt.subplots(1, figsize=(12, 6))
ax = states_00_14.plot(column="size", cmap="OrRd", legend=True, ax=ax, edgecolor="black")
lims = plt.axis("equal")
f.suptitle("US Wildfire count per state in 2000-2014")
ax.set_axis_off()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.