简体   繁体   中英

Rearranging / dataframe in R

I have an excel file that looks like this:

ID strength_score_week_1 agility_score_week_1 strength_score_week_2 agility_score_week_2
1 3 6 4 6
2 5 6 6 6
3 8 8 9 8
4 6 7 6 4

I want to rearrange/ rewrite the data above into a data frame that arranges it to this format:

Week training type mean score
1 agility
1 strength
2 agility
2 strength

essentially what I want to do with the final table is - I want to group it by training type and plot 2 line graphs showing the mean score for agility and strength over a period of 40 weeks

any help would be very much appreciated!

df <- data.frame(
                     ID = c(1L, 2L, 3L, 4L),
  strength_score_week_1 = c(3L, 5L, 8L, 6L),
   agility_score_week_1 = c(6L, 6L, 8L, 7L),
  strength_score_week_2 = c(4L, 6L, 9L, 6L),
   agility_score_week_2 = c(6L, 6L, 8L, 4L)
      )
df
#>   ID strength_score_week_1 agility_score_week_1 strength_score_week_2
#> 1  1                     3                    6                     4
#> 2  2                     5                    6                     6
#> 3  3                     8                    8                     9
#> 4  4                     6                    7                     6
#>   agility_score_week_2
#> 1                    6
#> 2                    6
#> 3                    8
#> 4                    4
library(tidyverse)

df %>% 
  pivot_longer(!ID, names_pattern = '([^_]*)_score_week_(.*)', names_to = c('training_type', 'week')) %>%
  group_by(week, training_type) %>%
  summarise(mean_score = mean(value), .groups = 'drop') %>%
  mutate(week = as.numeric(week)) %>%
  ggplot(aes(x = week, y = mean_score, color = training_type, group = training_type)) +
  geom_line()

Created on 2021-07-22 by the reprex package (v2.0.0)

Try this

library(readxl) #library to import excel sheets

df <- t(read_excel('Book1.xlsx')[,-1]) #import data (remove id column)

df_mean <- rowMeans(df) #calculate mean score

#get auxiliar matrix with names of elements
aux <- matrix(unlist(strsplit(rownames(df), '_')), nrow = nrow(df), byrow = T)[,c(1,4)]
colnames(aux) <- c('feature', 'week')

#Join everything in a data frame
df <- as.data.frame(cbind(df_mean, aux))

#plot
library(ggplot2)
ggplot(df)+
  geom_point(aes(x = week, y = df_mean, colour = factor(feature)))
library(dplyr)
library(tibble)
library(stringr)

dt <- as.data.frame(t(dt))[-1,]
dt %>%
  rownames_to_column() %>%
  rowwise() %>%
  mutate(`training type` = str_split(rowname, "_")[[1]][1],
         week = str_split(rowname, "_")[[1]][4]) %>%
  ungroup() %>%
  mutate(`mean score` = rowMeans(.[,2:5])) %>%
  select(week, `training type`, `mean score`)

Which results to:

# A tibble: 4 x 3
  week  `training type` `mean score`
  <chr> <chr>                  <dbl>
1 1     strength                5.5 
2 1     agility                 6.75
3 2     strength                6.25
4 2     agility                 6   

If you have training types that contain multiple words you would to use a different function instead of str_split. If that is the case I can re-write that part of the code

A base R option

do.call(
    rbind,
    apply(
        aggregate(
            cbind(strength, agility) ~ time,
            reshape(
                setNames(df, gsub("_score_", ".", names(df))),
                direction = "long",
                idvar = "ID",
                varying = -1
            ), mean
        ), 1, function(x) cbind(week = x[[1]], rev(stack(x[-1])))
    )
)

gives

    week      ind values
1 week_1 strength   5.50
2 week_1  agility   6.75
3 week_2 strength   6.25
4 week_2  agility   6.00

I would use a mix of pivot_longer , seperate and mutate in this fashion,

data %>% 
        pivot_longer(cols = -"ID", names_to = "training_type") %>%
        mutate(training_type = str_remove(training_type, "_score")) %>%
        group_by(training_type) %>%
        summarise(mean_score = mean(value, na.rm = TRUE)) %>%
        separate(
                col = "training_type",
                sep = "_week_",
                into = c("training_type", "week")
        ) %>%
        mutate(week = as.numeric(week))

Which gives you the following output ,

# A tibble: 4 x 3
  training_type  week mean_score
  <chr>         <dbl>      <dbl>
1 agility           1       6.75
2 agility           2       6   
3 strength          1       5.5 
4 strength          2       6.25

Which are ready to be plotted by,

data %>% ggplot(
        mapping = aes(
                x = week,
                y = mean_score,
                color = training_type
        )
) + geom_line() 

在此处输入图片说明

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM