繁体   English   中英

在具有不同站点的数据集上运行 Mann Kendall

[英]Running Mann Kendall on data set with different sites

我使用一种格式的水质数据

Station_Name, Sample_Date, Value, Parameter, Units
F1001         2/04/2020   5.6    NO3        mg/L
F1001         4/05/2020   3.9    NO3        mg/L
F1001         8/06/2020   2.7    NO3        mg/L
F1002         6/03/2020   1.7    NO3        mg/L
F1002         3/04/2020   2.5    NO3        mg/L
F1003         1/03/2020   4.9    NO3        mg/L
F1003         5/04/2020   1.5    NO3        mg/L
F1003         6/05/2020   3.1    NO3        mg/L
F1004         4/05/2020   9.3    NO3        mg/L
F1004         5/06/2020   3.6    NO3        mg/L

excel 文件中还有 1000 多行数据。

我目前使用一个将数据收集到数据库中的程序。 在这个程序中,他们包括了 R-Console。 我需要做的是获取所有数据,在每个站(不是整个数据库)上运行 Mann-Kendall 测试并给我结果。 第 7,8 和 9 行是通过双击程序中的必填字段获得的,我很高兴它从我给数据库的数据中创建它。

package_list <- c("ggplot2", "Kendall", "dplyr")
lapply(package_list, require, character.only = TRUE)
library(Kendall)
library(dplyr)
library(purrr)

Station_Name = c(STATION[Name,StationGroup(-1)])
SampleDate = c(SAMPLE[SampleDate,StationGroup(-1)])
NO3 = c(PARAMETER[NO3,Unit(mg/L),StationGroup(-1)])

#Add it to a Data Frame
df = data.frame(Station_Name, NO3, SampleDate)


cv <- function(val) {sd(val)/mean(val)}

MK <- function(x){

MKfun <- possibly(Kendall::MannKendall, 
                list(tau  = NA_real_,
                     sl   = NA_real_,
                     S    = NA_real_,
                     D    = NA_real_,
                     varS = NA_real_))
out <- MKfun(x)
class(out) <- "list"
data.frame(out)

}

df %>%
group_by(Station_Name) %>% 
summarise(N     = n(),
        MEAN  = mean    (Value),
        STDEV = sd      (Value),
        CoV   = cv      (Value), 
        MAX   = max     (Value),
        Q75   = quantile(Value, 0.75), 
        Q50   = quantile(Value, 0.50), 
        Q25   = quantile(Value, 0.25), 
        MIN   = min     (Value),
        MK(Value))



print("")
print(df)

当我运行脚本时,它会显示 sl = NA_real_, 的状态错误:意外','(第 24 行)

> sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1

Matrix products: default

locale:
[1] LC_COLLATE=English_United Kingdom.1252  LC_CTYPE=English_United Kingdom.1252   
[3] LC_MONETARY=English_United Kingdom.1252 LC_NUMERIC=C                           
[5] LC_TIME=English_United Kingdom.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] reshape2_1.4.4 purrr_0.3.3    dplyr_0.8.5    Kendall_2.2    readxl_1.3.1  

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.4       rstudioapi_0.11  magrittr_1.5     tidyselect_1.0.0 R6_2.4.1        
 [6] rlang_0.4.5      fansi_0.4.1      stringr_1.4.0    plyr_1.8.6       tools_3.6.3     
[11] utf8_1.1.4       cli_2.0.2        assertthat_0.2.1 tibble_2.1.3     crayon_1.3.4    
[16] vctrs_0.2.4      glue_1.3.2       stringi_1.4.6    compiler_3.6.3   pillar_1.4.3    
[21] cellranger_1.1.0 boot_1.3-24      pkgconfig_2.0.3 

您需要外部 function 吗? 否则你可以留下一个非常易读的 dplyr 代码..

数据:

df <- tibble::tribble(~Station_Name, ~Sample_Date, ~Value, ~Parameter, ~Units,
                      "F1001", "2/04/2020", 5.6, "NO3", "mg/L",
                      "F1001", "4/05/2020", 3.9, "NO3", "mg/L",
                      "F1001", "8/06/2020", 2.7, "NO3", "mg/L",
                      "F1002", "6/03/2020", 1.7, "NO3", "mg/L",
                      "F1002", "3/04/2020", 2.5, "NO3", "mg/L",
                      "F1003", "1/03/2020", 4.9, "NO3", "mg/L",
                      "F1003", "5/04/2020", 1.5, "NO3", "mg/L",
                      "F1003", "6/05/2020", 3.1, "NO3", "mg/L",
                      "F1004", "4/05/2020", 9.3, "NO3", "mg/L",
                      "F1004", "5/06/2020", 3.6, "NO3", "mg/L")

解决方案:


library(dplyr)

cv <- function(val) {sd(val)/mean(val)}

df %>%
  group_by(Station_Name) %>% 
  summarise(MEAN  = mean    (Value),
            STDEV = sd      (Value),
            CoV   = cv      (Value), 
            MAX   = max     (Value),
            Q75   = quantile(Value, 0.75), 
            Q50   = quantile(Value, 0.50), 
            Q25   = quantile(Value, 0.25), 
            MIN   = min     (Value))

# # A tibble: 4 x 9
#   Station_Name  MEAN STDEV   CoV   MAX   Q75   Q50   Q25   MIN
#   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F1001         4.07 1.46  0.358   5.6  4.75  3.9   3.3    2.7
# 2 F1002         2.1  0.566 0.269   2.5  2.3   2.1   1.9    1.7
# 3 F1003         3.17 1.70  0.537   4.9  4     3.1   2.3    1.5
# 4 F1004         6.45 4.03  0.625   9.3  7.88  6.45  5.03   3.6

*********** 编辑 ***************

对于曼肯德尔:

library(Kendall)
library(dplyr)
library(purrr)


MK <- function(x){
  
  # this provides a fallback in case of error
  MKfun <- possibly(Kendall::MannKendall, 
                    list(tau  = NA_real_,
                         sl   = NA_real_,
                         S    = NA_real_,
                         D    = NA_real_,
                         varS = NA_real_))
  out <- MKfun(x)

  # force to list and then to df to integrate it with the final tibble
  class(out) <- "list"
  data.frame(out)
  
}


df %>%
  group_by(Station_Name) %>% 
  summarise(MK(Value))

# # A tibble: 4 x 6
#   Station_Name    tau    sl     S     D  varS
#   <chr>         <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F1001        -1         1    -3     3  3.67
# 2 F1002        NA        NA    NA    NA NA   
# 3 F1003        -0.333     1    -1     3  3.67
# 4 F1004        NA        NA    NA    NA NA   

您会看到一些 NA,因为只有 2 个观察值,无法计算 MK。 在您的数据中,您不应该有这个问题。 万一有退路。

如您所见,我保留了函数自动计算的所有统计数据,因为我不知道您是否只需要结果或 rest。


全部一起

在这里,我将 MannKendall 与您的统计数据放在一起。

library(Kendall)
library(dplyr)
library(purrr)

cv <- function(val) {sd(val)/mean(val)}

MK <- function(x){
  
  MKfun <- possibly(Kendall::MannKendall, 
                    list(tau  = NA_real_,
                         sl   = NA_real_,
                         S    = NA_real_,
                         D    = NA_real_,
                         varS = NA_real_))
  out <- MKfun(x)
  class(out) <- "list"
  data.frame(out)
  
}

df %>%
  group_by(Station_Name) %>% 
  summarise(N     = n(),
            MEAN  = mean    (Value),
            STDEV = sd      (Value),
            CoV   = cv      (Value), 
            MAX   = max     (Value),
            Q75   = quantile(Value, 0.75), 
            Q50   = quantile(Value, 0.50), 
            Q25   = quantile(Value, 0.25), 
            MIN   = min     (Value),
            MK(Value))


# # A tibble: 4 x 15
#   Station_Name     N  MEAN STDEV   CoV   MAX   Q75   Q50   Q25   MIN    tau    sl     S     D  varS
#   <chr>        <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F1001            3  4.07 1.46  0.358   5.6  4.75  3.9   3.3    2.7 -1         1    -3     3  3.67
# 2 F1002            2  2.1  0.566 0.269   2.5  2.3   2.1   1.9    1.7 NA        NA    NA    NA NA   
# 3 F1003            3  3.17 1.70  0.537   4.9  4     3.1   2.3    1.5 -0.333     1    -1     3  3.67
# 4 F1004            2  6.45 4.03  0.625   9.3  7.88  6.45  5.03   3.6 NA        NA    NA    NA NA   

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM