在具有不同站点的数据集上运行 Mann Kendall

Question

I work with water quality data in a format我使用一种格式的水质数据

Station_Name, Sample_Date, Value, Parameter, Units
F1001         2/04/2020   5.6    NO3        mg/L
F1001         4/05/2020   3.9    NO3        mg/L
F1001         8/06/2020   2.7    NO3        mg/L
F1002         6/03/2020   1.7    NO3        mg/L
F1002         3/04/2020   2.5    NO3        mg/L
F1003         1/03/2020   4.9    NO3        mg/L
F1003         5/04/2020   1.5    NO3        mg/L
F1003         6/05/2020   3.1    NO3        mg/L
F1004         4/05/2020   9.3    NO3        mg/L
F1004         5/06/2020   3.6    NO3        mg/L

There are 1000 more rows of data in an excel file. excel 文件中还有 1000 多行数据。

I currently use a program that gathers data into a database.我目前使用一个将数据收集到数据库中的程序。 Within this program they have included R-Console.在这个程序中，他们包括了 R-Console。 What I need to do is get all the data, run Mann-Kendall test on each Station (not whole database) and give me the results.我需要做的是获取所有数据，在每个站（不是整个数据库）上运行 Mann-Kendall 测试并给我结果。 Lines 7,8 and 9 are obtained by double clicking on the required fields within the program and im amusing it creates it from the data i have given the database.第 7,8 和 9 行是通过双击程序中的必填字段获得的，我很高兴它从我给数据库的数据中创建它。

package_list <- c("ggplot2", "Kendall", "dplyr")
lapply(package_list, require, character.only = TRUE)
library(Kendall)
library(dplyr)
library(purrr)

Station_Name = c(STATION[Name,StationGroup(-1)])
SampleDate = c(SAMPLE[SampleDate,StationGroup(-1)])
NO3 = c(PARAMETER[NO3,Unit(mg/L),StationGroup(-1)])

#Add it to a Data Frame
df = data.frame(Station_Name, NO3, SampleDate)


cv <- function(val) {sd(val)/mean(val)}

MK <- function(x){

MKfun <- possibly(Kendall::MannKendall, 
                list(tau  = NA_real_,
                     sl   = NA_real_,
                     S    = NA_real_,
                     D    = NA_real_,
                     varS = NA_real_))
out <- MKfun(x)
class(out) <- "list"
data.frame(out)

}

df %>%
group_by(Station_Name) %>% 
summarise(N     = n(),
        MEAN  = mean    (Value),
        STDEV = sd      (Value),
        CoV   = cv      (Value), 
        MAX   = max     (Value),
        Q75   = quantile(Value, 0.75), 
        Q50   = quantile(Value, 0.50), 
        Q25   = quantile(Value, 0.25), 
        MIN   = min     (Value),
        MK(Value))



print("")
print(df)

When i run the script it brings up Status Error for sl = NA_real_, : unexpected ',' (Line 24)当我运行脚本时，它会显示 sl = NA_real_, 的状态错误：意外'，'（第 24 行）

> sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1

Matrix products: default

locale:
[1] LC_COLLATE=English_United Kingdom.1252  LC_CTYPE=English_United Kingdom.1252   
[3] LC_MONETARY=English_United Kingdom.1252 LC_NUMERIC=C                           
[5] LC_TIME=English_United Kingdom.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] reshape2_1.4.4 purrr_0.3.3    dplyr_0.8.5    Kendall_2.2    readxl_1.3.1  

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.4       rstudioapi_0.11  magrittr_1.5     tidyselect_1.0.0 R6_2.4.1        
 [6] rlang_0.4.5      fansi_0.4.1      stringr_1.4.0    plyr_1.8.6       tools_3.6.3     
[11] utf8_1.1.4       cli_2.0.2        assertthat_0.2.1 tibble_2.1.3     crayon_1.3.4    
[16] vctrs_0.2.4      glue_1.3.2       stringi_1.4.6    compiler_3.6.3   pillar_1.4.3    
[21] cellranger_1.1.0 boot_1.3-24      pkgconfig_2.0.3

Answer 1

Do you need an external function?您需要外部 function 吗？ Otherwise you can just leave a very readable dplyr code..否则你可以留下一个非常易读的 dplyr 代码..

Data:数据：

df <- tibble::tribble(~Station_Name, ~Sample_Date, ~Value, ~Parameter, ~Units,
                      "F1001", "2/04/2020", 5.6, "NO3", "mg/L",
                      "F1001", "4/05/2020", 3.9, "NO3", "mg/L",
                      "F1001", "8/06/2020", 2.7, "NO3", "mg/L",
                      "F1002", "6/03/2020", 1.7, "NO3", "mg/L",
                      "F1002", "3/04/2020", 2.5, "NO3", "mg/L",
                      "F1003", "1/03/2020", 4.9, "NO3", "mg/L",
                      "F1003", "5/04/2020", 1.5, "NO3", "mg/L",
                      "F1003", "6/05/2020", 3.1, "NO3", "mg/L",
                      "F1004", "4/05/2020", 9.3, "NO3", "mg/L",
                      "F1004", "5/06/2020", 3.6, "NO3", "mg/L")

Solution:解决方案：


library(dplyr)

cv <- function(val) {sd(val)/mean(val)}

df %>%
  group_by(Station_Name) %>% 
  summarise(MEAN  = mean    (Value),
            STDEV = sd      (Value),
            CoV   = cv      (Value), 
            MAX   = max     (Value),
            Q75   = quantile(Value, 0.75), 
            Q50   = quantile(Value, 0.50), 
            Q25   = quantile(Value, 0.25), 
            MIN   = min     (Value))

# # A tibble: 4 x 9
#   Station_Name  MEAN STDEV   CoV   MAX   Q75   Q50   Q25   MIN
#   <chr>        <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F1001         4.07 1.46  0.358   5.6  4.75  3.9   3.3    2.7
# 2 F1002         2.1  0.566 0.269   2.5  2.3   2.1   1.9    1.7
# 3 F1003         3.17 1.70  0.537   4.9  4     3.1   2.3    1.5
# 4 F1004         6.45 4.03  0.625   9.3  7.88  6.45  5.03   3.6

*********** EDIT *************** *********** 编辑 ***************

For MannKendall:对于曼肯德尔：

library(Kendall)
library(dplyr)
library(purrr)


MK <- function(x){
  
  # this provides a fallback in case of error
  MKfun <- possibly(Kendall::MannKendall, 
                    list(tau  = NA_real_,
                         sl   = NA_real_,
                         S    = NA_real_,
                         D    = NA_real_,
                         varS = NA_real_))
  out <- MKfun(x)

  # force to list and then to df to integrate it with the final tibble
  class(out) <- "list"
  data.frame(out)
  
}


df %>%
  group_by(Station_Name) %>% 
  summarise(MK(Value))

# # A tibble: 4 x 6
#   Station_Name    tau    sl     S     D  varS
#   <chr>         <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F1001        -1         1    -3     3  3.67
# 2 F1002        NA        NA    NA    NA NA   
# 3 F1003        -0.333     1    -1     3  3.67
# 4 F1004        NA        NA    NA    NA NA

You see some NAs because with only 2 observations, MK can't be calculated.您会看到一些 NA，因为只有 2 个观察值，无法计算 MK。 In your data you shouldn't have this problem.在您的数据中，您不应该有这个问题。 In case there is a fallback.万一有退路。

As you can see I kept all the stats that the functions automatically calculates because I didn't know whether you need just the result or the rest too.如您所见，我保留了函数自动计算的所有统计数据，因为我不知道您是否只需要结果或 rest。

ALL TOGETHER全部一起

Here I put MannKendall with your stats.在这里，我将 MannKendall 与您的统计数据放在一起。

library(Kendall)
library(dplyr)
library(purrr)

cv <- function(val) {sd(val)/mean(val)}

MK <- function(x){
  
  MKfun <- possibly(Kendall::MannKendall, 
                    list(tau  = NA_real_,
                         sl   = NA_real_,
                         S    = NA_real_,
                         D    = NA_real_,
                         varS = NA_real_))
  out <- MKfun(x)
  class(out) <- "list"
  data.frame(out)
  
}

df %>%
  group_by(Station_Name) %>% 
  summarise(N     = n(),
            MEAN  = mean    (Value),
            STDEV = sd      (Value),
            CoV   = cv      (Value), 
            MAX   = max     (Value),
            Q75   = quantile(Value, 0.75), 
            Q50   = quantile(Value, 0.50), 
            Q25   = quantile(Value, 0.25), 
            MIN   = min     (Value),
            MK(Value))


# # A tibble: 4 x 15
#   Station_Name     N  MEAN STDEV   CoV   MAX   Q75   Q50   Q25   MIN    tau    sl     S     D  varS
#   <chr>        <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 F1001            3  4.07 1.46  0.358   5.6  4.75  3.9   3.3    2.7 -1         1    -3     3  3.67
# 2 F1002            2  2.1  0.566 0.269   2.5  2.3   2.1   1.9    1.7 NA        NA    NA    NA NA   
# 3 F1003            3  3.17 1.70  0.537   4.9  4     3.1   2.3    1.5 -0.333     1    -1     3  3.67
# 4 F1004            2  6.45 4.03  0.625   9.3  7.88  6.45  5.03   3.6 NA        NA    NA    NA NA

在具有不同站点的数据集上运行 Mann Kendall

问题描述

1 个解决方案

解决方案1
0 已采纳 2020-07-31 13:03:39

在具有不同站点的数据集上运行 Mann Kendall

问题描述

1 个解决方案

解决方案1 0 已采纳 2020-07-31 13:03:39

解决方案1
0 已采纳 2020-07-31 13:03:39