I've got some satellite data from which I am attempting to extract some features from. make_gridspace
& make_features
are just used to construct some sample data. The satellite data is just on brightness intensity scale 0-255
The feature data has bounds
minx,maxx,miny,maxy
and some other values that aren't generated. Feature data is non-grided so the min_diff
function finds the nearest grid point.
Extract values from the gridspace (satellite data) within the features bounds.
I was unable to come up with a vectorized solution extract the features from the gridspace.
How can I vectorize the list comprehension result at the end of the main
function?
I removed the code to randomly generate feature data, per request.
import numpy as np
import pandas as pd
idx = pd.IndexSlice
random = np.random.randint
def make_gs(x1, y1, x2, y2, x_size, y_size):
col = pd.Index(np.linspace(x1, x2, x_size, dtype=np.float32), name="lon")
ndx = pd.Index(np.linspace(y1, y2, y_size, dtype=np.float32), name="lat")
grd = (
pd.DataFrame(columns=col, index=ndx).unstack("lat").reset_index().dropna(axis=1)
)
grd["wv"] = random(0, 255, size=len(grd))
grd["ir"] = random(0, 255, size=len(grd))
return grd.set_index(["lat", "lon"]).sort_index(level=["lat", "lon"])
min_diff = lambda t, v: t[np.argmin(abs(t[:, np.newaxis] - v), axis=0)]
def main():
gs = make_gs(-129, 54, -60, 20, x_size=972, y_size=635)
lat, lon = (gs.index.unique(crd).to_numpy() for crd in ("lat", "lon"))
n_features = 76_020
f = pd.DataFrame(
[
{"minx": -126, "maxx": -68, "miny": 37, "maxy": 39},
{"minx": -91, "maxx": -70, "miny": 31, "maxy": 37},
{"minx": -124, "maxx": -64, "miny": 24, "maxy": 26},
]
* (n_features // 3)
)
params = [(lon, "minx"), (lon, "maxx"), (lat, "miny"), (lat, "maxy")]
arr = np.array([min_diff(a, f[col].to_numpy()) for a, col in params]).T
# How can this loop be vectorized
return [gs.loc[idx[x1:x2, y1:y2, :]] for x1, x2, y1, y2 in arr[:100]]
if __name__ == "__main__":
main()
I changed the min_diff
function to return the index rather than the indexed values and ended up writing a cython
implementation which runs in a couple seconds.
app/_api.pyx
import numpy as np
cimport numpy as np
def extract_features_from_grid(
np.ndarray[np.float32_t, ndim=2] arr,
np.ndarray[np.long_t, ndim=1] min_lon,
np.ndarray[np.long_t, ndim=1] max_lon,
np.ndarray[np.long_t, ndim=1] min_lat,
np.ndarray[np.long_t, ndim=1] max_lat
):
result = [
arr[x1:x2, y1:y2]
for x1, x2, y1, y2 in np.c_[min_lat, max_lat, min_lon, max_lon]
]
return result
and in main
...
from app import _api
D1 = NewType("D1", tuple[int])
D2 = NewType("D2", tuple[int, int])
def extract_features(
arr: np.ndarray[D2, np.floating],
min_lat: np.ndarray[D1, np.unsignedinteger],
max_lat: np.ndarray[D1, np.unsignedinteger],
min_lon: np.ndarray[D1, np.unsignedinteger],
max_lon: np.ndarray[D1, np.unsignedinteger],
) -> list[np.ndarray[D2, np.floating]]:
return _api.extract_features_from_grid(arr, min_lon, max_lon, min_lat, max_lat)
features["water_vapor"] = extract_features(
gs.unstack("lon")["water_vapor"].to_numpy(dtype=np.float32),
min_lon,
max_lon,
min_lat,
max_lat,
)
features["long_wave_ir"] = extract_features(
gs.unstack("lon")["long_wave_ir"].to_numpy(dtype=np.float32),
min_lon,
max_lon,
min_lat,
max_lat,
)
minx maxx miny maxy water_vapor long_wave_ir
0 -88 -64 30 31 [[83.0, 100.0, 194.0, 150.0, 104.0, 130.0, 121... [[79.0, 146.0, 97.0, 7.0, 227.0, 233.0, 145.0,...
1 -112 -76 21 53 [[60.0, 90.0, 71.0, 31.0, 209.0, 97.0, 27.0, 9... [[131.0, 74.0, 141.0, 49.0, 194.0, 148.0, 238....
2 -127 -89 28 35 [[151.0, 135.0, 145.0, 181.0, 103.0, 44.0, 192... [[20.0, 137.0, 35.0, 153.0, 217.0, 177.0, 234....
3 -98 -64 30 45 [[188.0, 36.0, 174.0, 192.0, 164.0, 150.0, 124... [[226.0, 247.0, 17.0, 164.0, 134.0, 150.0, 117...
4 -111 -102 30 42 [[247.0, 215.0, 45.0, 82.0, 71.0, 90.0, 147.0,... [[225.0, 67.0, 191.0, 25.0, 180.0, 42.0, 109.0...
... ... ... ... ... ... ...
76015 -90 -88 23 39 [[73.0, 61.0, 81.0, 5.0, 25.0, 106.0, 22.0, 82... [[109.0, 214.0, 97.0, 199.0, 111.0, 26.0, 25.0...
76016 -127 -99 35 37 [[14.0, 58.0, 55.0, 109.0, 98.0, 24.0, 105.0, ... [[174.0, 60.0, 126.0, 42.0, 21.0, 0.0, 235.0, ...
76017 -125 -115 40 42 [[29.0, 2.0, 213.0, 176.0, 207.0, 22.0, 26.0, ... [[86.0, 134.0, 161.0, 65.0, 185.0, 99.0, 0.0, ...
76018 -117 -81 50 53 [[197.0, 47.0, 195.0, 176.0, 23.0, 77.0, 138.0... [[84.0, 50.0, 35.0, 13.0, 151.0, 180.0, 165.0,...
76019 -124 -79 25 45 [[114.0, 203.0, 183.0, 135.0, 140.0, 94.0, 203... [[172.0, 91.0, 180.0, 116.0, 21.0, 138.0, 184....
76020 rows × 6 columns
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.