繁体   English   中英

如何重塑R中的复杂数据框?

[英]How to reshape a complicated data frame in R?

我有一个复杂的 dataframe,我试图重塑它。

这是我拥有的数据框类型的示例:

names <- c("var1", 'var2', "split")
values <- rnorm(8)
from <- data.frame(a = rep(1, 10),
           b = c(rep(1,3), rep(2, 7)),
           c = c(names, names, rep("split", 4)),
           d = c(rep("NA", 5), names, rep("split", 2)),
           e = c(rep("NA", 7), names),
           f = c(values[1:2], "NA", values[3:8], "NA"))

这会产生如下所示的内容:

> from
   a b     c     d     e                   f
1  1 1  var1    NA    NA  -0.271930473373158
2  1 1  var2    NA    NA -0.0968100775823158
3  1 1 split    NA    NA                  NA
4  1 2  var1    NA    NA   -1.73919094720254
5  1 2  var2    NA    NA   -0.52398152119997
6  1 2 split  var1    NA   0.856367467674763
7  1 2 split  var2    NA  -0.729762707907525
8  1 2 split split  var1   0.561460771889416
9  1 2 split split  var2  0.0432022687633195
10 1 2 split split split                  NA

在我的数据框中from我想将var1var2转换为列。 然后使用from列中的值作为对应于fvar2var1 (按行读取)。

换句话说,我正在尝试将这个数据框重塑为如下所示:

   > out
  a b       var1        var2
1 1 1 -0.2719305 -0.09681008
2 1 2 -1.7391909 -0.52398152
3 1 2  0.8563675 -0.72976271
4 1 2  0.5614608  0.04320227

关于我如何做到这一点的任何建议?

这可以通过耦合一系列逻辑操作来获取from$f中的值来实现

data.frame( a=from$a[rowSums(from == "var1", na.rm=T) == 1], 
            b=from$b[rowSums(from == "var1", na.rm=T) == 1], 
            var1=from$f[rowSums(from == "var1", na.rm=T) == 1], 
            var2=from$f[rowSums(from == "var2", na.rm=T) == 1] )
  a b       var1        var2
1 1 1 -0.2719305 -0.09681008
2 1 2 -1.7391909 -0.52398152
3 1 2  0.8563675 -0.72976271
4 1 2  0.5614608  0.04320227

这是一次旋转的解决方案:

library(dplyr)
library(tidyr)
library(stringr)

from %>%
  type.convert(as.is = TRUE) %>% 
  filter(!is.na(f)) %>% 
  mutate(name = str_extract_all(paste(c,d,e), 'var(.)')) %>% 
  select(a, b, f, name) %>% 
  pivot_wider(
    names_from = name,
    values_from = f,
    values_fn = list
    ) %>% 
  unnest(cols = c(var1, var2))
      a     b   var1    var2
  <int> <int>  <dbl>   <dbl>
1     1     1 -0.272 -0.0968
2     1     2 -1.74  -0.524 
3     1     2  0.856 -0.730 
4     1     2  0.561  0.0432

我们可以使用pivot_longer重塑为“long”,删除NA元素并通过保留“var”元素进行filter ,然后使用pivot_wider返回“wide”

library(dplyr)
library(tidyr)
library(stringr)
library(data.table)
from %>%
   type.convert(as.is = TRUE) %>%
   pivot_longer(cols = c:e, values_drop_na = TRUE) %>% 
   filter(str_detect(value, 'var')) %>% 
   select(-name) %>%
   mutate(rn = rowid(a, b, value)) %>% 
   pivot_wider(names_from = value, values_from = f) %>% 
   select(-rn)

-输出

# A tibble: 4 × 4
      a     b   var1    var2
  <int> <int>  <dbl>   <dbl>
1     1     1 -0.272 -0.0968
2     1     2 -1.74  -0.524 
3     1     2  0.856 -0.730 
4     1     2  0.561  0.0432

数据

from <- structure(list(a = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), 
    b = c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), c = c("var1", 
    "var2", "split", "var1", "var2", "split", "split", "split", 
    "split", "split"), d = c("NA", "NA", "NA", "NA", "NA", "var1", 
    "var2", "split", "split", "split"), e = c("NA", "NA", "NA", 
    "NA", "NA", "NA", "NA", "var1", "var2", "split"), f = c("-0.271930473373158", 
    "-0.0968100775823158", "NA", "-1.73919094720254", "-0.52398152119997", 
    "0.856367467674763", "-0.729762707907525", "0.561460771889416", 
    "0.0432022687633195", "NA")), row.names = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10"), class = "data.frame")

这个概念是有一个row_number突变:

library(dplyr)
library(tidyr)

from %>%
  type.convert(as.is = TRUE) %>%
  filter(!is.na(f)) %>%
  group_by(name = invoke(coalesce, across(c:e, na_if, 'split')))%>%
  mutate(id = row_number()) %>%
  pivot_wider(c(a, b, id), values_from = f) %>%
  select(-id)

# A tibble: 4 x 4
      a     b   var1    var2
  <int> <int>  <dbl>   <dbl>
1     1     1 -0.272 -0.0968
2     1     2 -1.74  -0.524 
3     1     2  0.856 -0.730 
4     1     2  0.561  0.0432

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM