I have an excel file that looks like this:
ID | strength_score_week_1 | agility_score_week_1 | strength_score_week_2 | agility_score_week_2 |
---|---|---|---|---|
1 | 3 | 6 | 4 | 6 |
2 | 5 | 6 | 6 | 6 |
3 | 8 | 8 | 9 | 8 |
4 | 6 | 7 | 6 | 4 |
I want to rearrange/ rewrite the data above into a data frame that arranges it to this format:
Week | training type | mean score |
---|---|---|
1 | agility | |
1 | strength | |
2 | agility | |
2 | strength |
essentially what I want to do with the final table is - I want to group it by training type and plot 2 line graphs showing the mean score for agility and strength over a period of 40 weeks
any help would be very much appreciated!
df <- data.frame(
ID = c(1L, 2L, 3L, 4L),
strength_score_week_1 = c(3L, 5L, 8L, 6L),
agility_score_week_1 = c(6L, 6L, 8L, 7L),
strength_score_week_2 = c(4L, 6L, 9L, 6L),
agility_score_week_2 = c(6L, 6L, 8L, 4L)
)
df
#> ID strength_score_week_1 agility_score_week_1 strength_score_week_2
#> 1 1 3 6 4
#> 2 2 5 6 6
#> 3 3 8 8 9
#> 4 4 6 7 6
#> agility_score_week_2
#> 1 6
#> 2 6
#> 3 8
#> 4 4
library(tidyverse)
df %>%
pivot_longer(!ID, names_pattern = '([^_]*)_score_week_(.*)', names_to = c('training_type', 'week')) %>%
group_by(week, training_type) %>%
summarise(mean_score = mean(value), .groups = 'drop') %>%
mutate(week = as.numeric(week)) %>%
ggplot(aes(x = week, y = mean_score, color = training_type, group = training_type)) +
geom_line()
Created on 2021-07-22 by the reprex package (v2.0.0)
Try this
library(readxl) #library to import excel sheets
df <- t(read_excel('Book1.xlsx')[,-1]) #import data (remove id column)
df_mean <- rowMeans(df) #calculate mean score
#get auxiliar matrix with names of elements
aux <- matrix(unlist(strsplit(rownames(df), '_')), nrow = nrow(df), byrow = T)[,c(1,4)]
colnames(aux) <- c('feature', 'week')
#Join everything in a data frame
df <- as.data.frame(cbind(df_mean, aux))
#plot
library(ggplot2)
ggplot(df)+
geom_point(aes(x = week, y = df_mean, colour = factor(feature)))
library(dplyr)
library(tibble)
library(stringr)
dt <- as.data.frame(t(dt))[-1,]
dt %>%
rownames_to_column() %>%
rowwise() %>%
mutate(`training type` = str_split(rowname, "_")[[1]][1],
week = str_split(rowname, "_")[[1]][4]) %>%
ungroup() %>%
mutate(`mean score` = rowMeans(.[,2:5])) %>%
select(week, `training type`, `mean score`)
Which results to:
# A tibble: 4 x 3
week `training type` `mean score`
<chr> <chr> <dbl>
1 1 strength 5.5
2 1 agility 6.75
3 2 strength 6.25
4 2 agility 6
If you have training types that contain multiple words you would to use a different function instead of str_split. If that is the case I can re-write that part of the code
A base R option
do.call(
rbind,
apply(
aggregate(
cbind(strength, agility) ~ time,
reshape(
setNames(df, gsub("_score_", ".", names(df))),
direction = "long",
idvar = "ID",
varying = -1
), mean
), 1, function(x) cbind(week = x[[1]], rev(stack(x[-1])))
)
)
gives
week ind values
1 week_1 strength 5.50
2 week_1 agility 6.75
3 week_2 strength 6.25
4 week_2 agility 6.00
I would use a mix of pivot_longer
, seperate
and mutate
in this fashion,
data %>%
pivot_longer(cols = -"ID", names_to = "training_type") %>%
mutate(training_type = str_remove(training_type, "_score")) %>%
group_by(training_type) %>%
summarise(mean_score = mean(value, na.rm = TRUE)) %>%
separate(
col = "training_type",
sep = "_week_",
into = c("training_type", "week")
) %>%
mutate(week = as.numeric(week))
Which gives you the following output
,
# A tibble: 4 x 3
training_type week mean_score
<chr> <dbl> <dbl>
1 agility 1 6.75
2 agility 2 6
3 strength 1 5.5
4 strength 2 6.25
Which are ready to be plotted by,
data %>% ggplot(
mapping = aes(
x = week,
y = mean_score,
color = training_type
)
) + geom_line()
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.