[英]Export 3D climate data to a netCDF file in R
我在將網格化的每日氣候數據導出到 netCDF 時遇到問題。 我首先創建一些隨機數據,將其導出為 netCDF,然后重新打開它和 plot 導出前后的時間序列。 兩條線都應該匹配,但一條線始終是不變的。 我編寫 netCDF 文件的方式一定有一個錯誤,但我就是找不到解決方案,盡管我已經分析了大多數在線可用的示例。
library(ncdf4)
library(lubridate)
library(reshape2)
library(dplyr)
library(ggplot2)
# create some example data
dates = seq(as.Date("1950-01-01"), length.out = 365, by="1 day")
lon = seq(-180.00, 180.00, by = 10)
lat = seq(0,90, by = 10)
tmp_df <- expand.grid(date = dates, lon = lon, lat = lat)
tmp_df$t2m <- rnorm(15, 15, n = nrow(tmp_df)) + 272.15 # some random Kelvins
# export data as netCDF
xvals <- unique(tmp_df$lon)
nx <- length(xvals)
yvals <- unique(tmp_df$lat)
ny <- length(yvals)
lon1 <- ncdim_def("longitude", "degrees_east", xvals)
lat2 <- ncdim_def("latitude", "degrees_north", yvals)
time <-unique(tmp_df$date)
n_time <- length(time)
dimtime <- ncdim_def("time", "days since 1970-01-01 00:00:00", as.numeric(time),
unlim=T, calendar="gregorian")
# different sorting options, none of them works properly
vals <- dplyr::arrange(tmp_df, lon, lat, date)
# vals <- dplyr::arrange(tmp_df, date, lon, lat)
# vals <- dplyr::arrange(tmp_df, lat, lon, date)
vals <- vals$t2m
mv <- -999 #missing value to use
var_temp <- ncvar_def("t2m", "K", list(lon1, lat2, dimtime), longname = "test.nc", mv)
ncnew <- nc_create("test.nc", list(var_temp))
ncvar_put(ncnew, var_temp, as.array(vals), start=c(1,1,1), count=c(nx,ny,n_time))
nc_close(ncnew)
###############################################################################
# save data from one random grid point for the comparisson
ts1 <- dplyr::filter(tmp_df, lon < 1, lon > -1, lat < 1)
ts1$source <- 'initial'
##############################################################################
# Open the nc file
nc2 <- nc_open("test.nc")
# get longitude and latitude
lon <- ncvar_get(nc2,"longitude")
nlon <- dim(lon)
lat <- ncvar_get(nc2,"latitude")
nlat <- dim(lat)
time <- ncvar_get(nc2,"time")
tunits <- ncatt_get(nc2,"time","units")
nt <- dim(time)
tmp_array <- ncvar_get( nc2, "t2m", start= c(1, 1, 1),
count = c(nlon, nlat, nt))
tmp_vec_long <- as.vector(tmp_array)
tmp_mat <- matrix(tmp_vec_long, nrow=nlon*nlat, ncol=nt)
lonlat <- as.matrix(expand.grid(lon,lat))
tmp_df02 <- data.frame(cbind(lonlat,tmp_mat))
names(tmp_df02)[1:2] <- c("lon","lat")
colnames(tmp_df02)[3:ncol(tmp_df02)] <- time
tmp_df02 <- reshape2::melt(tmp_df02, id.vars = c("lon", "lat"))
# convert back to Date
tmp_df02$date <- as.Date(as.POSIXct(time*24*60*60, origin = "1970-01-01"))
tmp_df02$variable <- NULL
nc_close(nc2)
##############################################################################
# extract the same point data as before
ts2 <- dplyr::filter(tmp_df02, lon < 1, lon > -1, lat < 1)
ts2$source <- 'after'
# rbind now and before
ts <- rbind(ts1,rename(ts2, "t2m" = "value"))
# plot initial and after values: both lines should overlap
ggplot(ts, aes(x = date, y = t2m, col = source)) + geom_line()
我究竟做錯了什么?
我發現了問題:存在與 lon、lat 和 time 變量的排列有關的問題。 工作腳本發布在下面。
library(ncdf4)
library(lubridate)
library(reshape2)
library(dplyr)
library(ggplot2)
# create some example data
dates = seq(as.Date("1950-01-01"), length.out = 365, by="1 day")
lon = seq(-180.00, 180.00, by = 10)
lat = seq(0,90, by = 10)
tmp_df <- expand.grid(date = dates, lon = lon, lat = lat)
tmp_df$t2m <- rnorm(15, 15, n = nrow(tmp_df)) + 272.15 # some random Kelvins
# export data as netCDF
xvals <- unique(tmp_df$lon)
nx <- length(xvals)
yvals <- unique(tmp_df$lat)
ny <- length(yvals)
lon1 <- ncdim_def("longitude", "degrees_east", xvals)
lat2 <- ncdim_def("latitude", "degrees_north", yvals)
time <-unique(tmp_df$date)
n_time <- length(time)
dimtime <- ncdim_def("time", "days since 1970-01-01 00:00:00", as.numeric(time),
unlim=T, calendar="gregorian")
vals <- dplyr::arrange(tmp_df, date, lat, lon)
vals <- vals$t2m
mat1 <- array(vals, c(nx, ny, n_time));
mv <- -999 #missing value to use
var_temp <- ncvar_def("t2m", "K", list(lon1, lat2, dimtime), longname = "test.nc", mv)
ncnew <- nc_create("test.nc", list(var_temp))
ncvar_put(ncnew, var_temp, mat1, start=c(1,1,1), count=c(nx,ny,n_time))
nc_close(ncnew)
###############################################################################
# save data from one random grid point for the comparisson
ts1 <- dplyr::filter(tmp_df, lon < 1, lon > -1, lat < 1)
ts1$source <- 'initial'
##############################################################################
# Open the nc file
nc2 <- nc_open("test.nc")
# get longitude and latitude
lon <- ncvar_get(nc2,"longitude")
nlon <- dim(lon)
lat <- ncvar_get(nc2,"latitude")
nlat <- dim(lat)
time <- ncvar_get(nc2,"time")
tunits <- ncatt_get(nc2,"time","units")
nt <- dim(time)
tmp_array <- ncvar_get( nc2, "t2m", start= c(1, 1, 1),
count = c(nlon, nlat, nt))
tmp_vec_long <- as.vector(tmp_array)
tmp_df02 <- melt(mat1)
mat1 <- array(tmp_vec_long, c(nlon, nlat, nt));
tmp_mat <- matrix(tmp_vec_long, nrow=nlon*nlat, ncol=nt)
lonlat <- as.matrix(expand.grid(lon,lat))
tmp_df02 <- data.frame(cbind(lonlat,tmp_mat))
names(tmp_df02)[1:2] <- c("lon","lat")
colnames(tmp_df02)[3:ncol(tmp_df02)] <- time
tmp_df02 <- reshape2::melt(tmp_df02, id.vars = c("lon", "lat"))
colnames(tmp_df02)[1:3] <- c("lon", "lat", "date")
# convert back to Date
tmp_df02$date <- as.Date(as.POSIXct(as.numeric(as.character(tmp_df02$date))*24*60*60, origin = "1970-01-01"))
# tmp_df02$variable <- NULL
nc_close(nc2)
##############################################################################
# extract the same point data as before
ts2 <- dplyr::filter(tmp_df02, lon < 1, lon > -1, lat < 1)
ts2$source <- 'after'
# rbind now and before
ts <- rbind(ts1,rename(ts2, "t2m" = "value"))
# plot initial and after values: both lines should overlap
ggplot(ts, aes(x = date, y = t2m, col = source)) + geom_line()
# another test
dcast(ts, formula = date ~ source, value.var = "t2m")
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.