簡體   English   中英

如何在 R 中創建計數表?

[英]How to create a count table in R?

我有一個如下所示的數據框,其中包含每個站點的體長和觀察到的物種 A 和 B 的數量

> set.seed(10)
> df <- data.frame(
+   species = c(rep("A",4), rep("B",4)),
+   station = rep(1:2, 4),
+   length = round(rnorm(8, 15, 2)),
+   count = round(rnorm(8, 5, 2))
+ )
> df
  species station length count
1       A       1     15     2
2       A       2     15     4
3       A       1     12     7
4       A       2     14     7
5       B       1     16     5
6       B       2     16     7
7       B       1     13     6
8       B       2     14     5

我想做的就是把它變成一個像這樣有 2cm 箱子的桌子。 但我怎么能得到這個?

> cnt_table <- data.frame(
+   species = c("A","A","B","B"),
+   station = c(1,2,1,2),
+   L11_12 = c(0,0,0,0),
+   L13_14 = c(7,7,6,5),
+   L15_16 = c(2,4,5,7),
+   L17_18 = c(0,0,0,0),
+   L19_20 = c(0,0,0,0)
+ )
> cnt_table
  species station L11_12 L13_14 L15_16 L17_18 L19_20
1       A       1      0      7      2      0      0
2       A       2      0      7      4      0      0
3       B       1      0      6      5      0      0
4       B       2      0      5      7      0      0

首先,您需要使用cut對長度進行分類。 然后您可以使用complete將缺失的計數填充為 0。然后, group_by物種、站和 bin 並使用summarize添加每個組的計數。 最后,使用pivot_wider制作 bins 列標簽。

注意 1:結果與您的預期輸出不同,但我認為您有錯字。

注2:我不知道是否需要分組和求和。 在您的示例中不是,但從邏輯上講,我會包括它。

library(tidyverse)

set.seed(10)
df <- data.frame(
  species = c(rep("A",4), rep("B",4)),
  station = rep(1:2, 4),
  length = round(rnorm(8, 15, 2)),
  count = round(rnorm(8, 5, 2))
)
df


#---------------------
df %>% 
  mutate(length = cut(length, 
                      breaks = seq(10.5, 20.5, by = 2),
                      labels = c("L11_12", "L13_14", "L15_16", "L17_18", "L19_20"))) %>% 
  complete(species, station, length, fill = list(count = 0)) %>% 
  group_by(species, station, length) %>% 
  summarize(count = sum(count)) %>% 
  pivot_wider(names_from = length, values_from = count)


#---------------------
# A tibble: 4 x 7
# Groups:   species, station [4]
  species station L11_12 L13_14 L15_16 L17_18 L19_20
  <chr>     <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1 A             1      7      0      2      0      0
2 A             2      0      7      4      0      0
3 B             1      0      6      5      0      0
4 B             2      0      5      7      0      0

應該可以很好地擴展的data.table方法

library(data.table)
# create alookup table with categories
lookup <- data.table(
  name = paste0("L", seq(11, 19, 2), "_", seq(12,20, 2)),
  from = seq(11, 19, 2),
  to = seq(12,20, 2)
)
lookup
# join with a non-equi join
setDT(df)[lookup, name := i.name, on = .(length >= from, length <= to)]
# spreak to wide
dcast(df, species + station ~ name, value.var = "count", fun.aggregate = sum)
#    species station L11_12 L13_14 L15_16
# 1:       A       1      7      0      2
# 2:       A       2      0      7      4
# 3:       B       1      0      6      5
# 4:       B       2      0      5      7

您可以執行以下操作:


library(dplyr)
library(tidyr)

# Create the bins
df$bin  <- cut(df$length, breaks = seq(from = min(df$length-1), to = max(df$length)+1, by = 2))

# Get the data frame into shape
df  |> 
    mutate(
        bin_start = substr(bin, 2,3), 
        bin_end = as.numeric(substr(bin, 5,6))-1, 
        bin_name = paste0("L", bin_start, "_", bin_end))  |>
    group_by(species, station, bin_name)  |>
    summarise(n = n())  |>
    pivot_wider(
        names_from = bin_name, 
        values_from = n, 
        values_fill = list(n = 0)
   )

# A tibble: 4 x 5
# Groups:   species, station [4]
#   species station L11_12 L13_14 L15_16
#   <chr>     <int>  <int>  <int>  <int>
# 1 A             1      1      1      0
# 2 A             2      0      2      0
# 3 B             1      1      0      1
# 4 B             2      0      1      1

定義您想要的范圍r ,即11:20 ,如 OP 所示(長度應該是偶數!)。 然后將df$lengh與輔助矩陣m匹配以獲取 bin,計算aggregate d sum並重塑為寬格式,例如使用reshape2::dcast

r <- 11:20; m <- matrix(r, ncol=2, byrow=TRUE)

transform(df, bin=factor(sapply(df$length, \(z) which(rowSums(z == m) > 0)),
                         levels=seq_along(x), labels=apply(m, 1, paste, collapse='_'))) |>
  aggregate(count ~ bin + species + station, sum) |>
  reshape2::dcast(species + station ~ bin, value.var='count', drop=FALSE, fill=0)
#   species station 11_12 13_14 15_16 17_18 19_20
# 1       A       1     7     0     2     0     0
# 2       A       2     0     7     4     0     0
# 3       B       1     0     6     5     0     0
# 4       B       2     0     5     7     0     0

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM