简体   繁体   中英

Apply a function to the columns of a dataframe every two columns, store the results of that function in a list, and then insert that list as a column

So I have a dataframe of 6 columns, all of them numeric and of the same length. The dataframe look like this:

df <- data.frame(var1 = 1:10, var2 = 5:15, var3 = 7:17, var4 = 3:13, var5 = 20:30, var6 = 15:25)
print(df)

What I want to do is to take var1 and var2 and substract them (var1 - var2) and get the result from that in a new column that will insert itself in the same dataframe just after var2. Then I want to take var3 and var4 and substract them (var3 - var4) and get the result from that in a new column that I will insert just after var4, and so on.

Any suggestions?

There is .after in mutate from dplyr , which can be used

library(dplyr)
df %>% 
   mutate(var12 = var1 - var2, .after = var2) %>%
   mutate(var34 = var3 - var4, .after = var4)

-output

#    var1 var2 var12 var3 var4 var34 var5 var6
#1     1    5    -4    7    3     4   20   15
#2     2    6    -4    8    4     4   21   16
#3     3    7    -4    9    5     4   22   17
#4     4    8    -4   10    6     4   23   18
#5     5    9    -4   11    7     4   24   19
#6     6   10    -4   12    8     4   25   20
#7     7   11    -4   13    9     4   26   21
#8     8   12    -4   14   10     4   27   22
#9     9   13    -4   15   11     4   28   23
#10   10   14    -4   16   12     4   29   24
#11   11   15    -4   17   13     4   30   25

If we need to create columns after every 2 columns

library(stringr)
out <- df
for(i in seq(2, ncol(df), by = 2)) {
     out <- out %>%
             mutate(!! str_c('var', i-1, i) := 
               .[[names(df)[i]]] - .[[names(df)[i-1]]],
              .after = all_of(names(df)[i]))
   } 
        

-output

out
#   var1 var2 var12 var3 var4 var34 var5 var6 var56
#1     1    5     4    7    3    -4   20   15    -5
#2     2    6     4    8    4    -4   21   16    -5
#3     3    7     4    9    5    -4   22   17    -5
#4     4    8     4   10    6    -4   23   18    -5
#5     5    9     4   11    7    -4   24   19    -5
#6     6   10     4   12    8    -4   25   20    -5
#7     7   11     4   13    9    -4   26   21    -5
#8     8   12     4   14   10    -4   27   22    -5
#9     9   13     4   15   11    -4   28   23    -5
#10   10   14     4   16   12    -4   29   24    -5
#11   11   15     4   17   13    -4   30   25    -5
 

In base R , we can also do

out1 <- df[c(FALSE, TRUE)] - df[c(TRUE, FALSE)]
names(out1) <- paste0(names(out1), "_", names(df)[c(TRUE, FALSE)])

and then we cbind the datasets and order based on the column name

out2 <- cbind(df, out1)
out3 <- out2[gtools::mixedorder(names(out2))]
out3
# var1 var2 var2_var1 var3 var4 var4_var3 var5 var6 var6_var5
#1     1    5         4    7    3        -4   20   15        -5
#2     2    6         4    8    4        -4   21   16        -5
#3     3    7         4    9    5        -4   22   17        -5
#4     4    8         4   10    6        -4   23   18        -5
#5     5    9         4   11    7        -4   24   19        -5
#6     6   10         4   12    8        -4   25   20        -5
#7     7   11         4   13    9        -4   26   21        -5
#8     8   12         4   14   10        -4   27   22        -5
#9     9   13         4   15   11        -4   28   23        -5
#10   10   14         4   16   12        -4   29   24        -5
#11   11   15         4   17   13        -4   30   25        -5

data

df <- structure(list(var1 = 1:11, var2 = 5:15, var3 = 7:17, var4 = 3:13, 
    var5 = 20:30, var6 = 15:25), class = "data.frame", row.names = c(NA, 
-11L))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM