Looking at similar questions, I could not find one that matched my need. If one does contain a solution, please share its link.
I have this dput-produced data:
structure(list(Player = c("Seth Lugo", "Jacob deGrom", "Rick Porcello",
"David Peterson", "Michael Wacha", "Seth Lugo", "Jacob deGrom",
"Rick Porcello", "David Peterson", "Steven Matz", "Seth Lugo",
"Jacob deGrom", "Rick Porcello", "David Peterson", "Seth Lugo",
"Jacob deGrom", "Rick Porcello", "Michael Wacha", "David Peterson",
"Jacob deGrom", "Seth Lugo", "Rick Porcello", "Robert Gsellman",
"Michael Wacha", "Ariel Jurado", "Jacob deGrom", "Rick Porcello",
"Seth Lugo", "Robert Gsellman", "David Peterson"), Date = structure(c(1601164800,
1601078400, 1601078400, 1600905600, 1600819200, 1600732800, 1600646400,
1600560000, 1600473600, 1600387200, 1600300800, 1600214400, 1600128000,
1599955200, 1599868800, 1599782400, 1599609600, 1599523200, 1599436800,
1599350400, 1599264000, 1599177600, 1599091200, 1599004800, 1598918400,
1598832000, 1598745600, 1598745600, 1598659200, 1598572800), tzone = "UTC", class = c("POSIXct",
"POSIXt")), DblHdr = c(0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, 2), DateStr = c("09/27/2020",
"09/26/2020", "09/26/2020", "09/24/2020", "09/23/2020", "09/22/2020",
"09/21/2020", "09/20/2020", "09/19/2020", "09/18/2020", "09/17/2020",
"09/16/2020", "09/15/2020", "09/13/2020", "09/12/2020", "09/11/2020",
"09/09/2020", "09/08/2020", "09/07/2020", "09/06/2020", "09/05/2020",
"09/04/2020", "09/03/2020", "09/02/2020", "09/01/2020", "08/31/2020",
"08/30/2020", "08/30/2020", "08/29/2020", "08/28/2020"), Month = c("09",
"09", "09", "09", "09", "09", "09", "09", "09", "09", "09", "09",
"09", "09", "09", "09", "09", "09", "09", "09", "09", "09", "09",
"09", "09", "08", "08", "08", "08", "08"), Tm = c("NYM", "NYM",
"NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM",
"NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM",
"NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM", "NYM",
"NYM"), Opp = c("WSN", "WSN", "WSN", "WSN", "TBR", "TBR", "TBR",
"ATL", "ATL", "ATL", "PHI", "PHI", "PHI", "TOR", "TOR", "TOR",
"BAL", "BAL", "PHI", "PHI", "PHI", "PHI", "NYY", "BAL", "BAL",
"MIA", "NYY", "NYY", "NYY", "NYY"), Rslt = c("L 5-15", "L 3-4",
"L 3-5", "W 3-2", "L 5-8", "W 5-2", "L 1-2", "L 0-7", "W 7-2",
"L 2-15", "W 10-6", "W 5-4", "L 1-4", "L 3-7", "L 2-3", "W 18-1",
"W 7-6", "L 2-11", "L 8-9", "W 14-1", "W 5-1", "L 3-5", "W 9-7",
"W 9-4", "L 5-9", "L 3-5", "L 7-8", "L 2-5", "L 1-2", "W 4-3"
), W_L = c("L", "L", "L", "W", "L", "W", "L", "L", "W", "L",
"W", "W", "L", "L", "L", "W", "W", "L", "L", "W", "W", "L", "W",
"W", "L", "L", "L", "L", "L", "W"), temp = c("L 5", "L 3", "L 3",
"W 3", "L 5", "W 5", "L 1", "L 0", "W 7", "L 2", "W 10", "W 5",
"L 1", "L 3", "L 2", "W 18", "W 7", "L 2", "L 8", "W 14", "W 5",
"L 3", "W 9", "W 9", "L 5", "L 3", "L 7", "L 2", "L 1", "W 4"
), RS = c(5, 3, 3, 3, 5, 5, 1, 0, 7, 2, 10, 5, 1, 3, 2, 18, 7,
2, 8, 14, 5, 3, 9, 9, 5, 3, 7, 2, 1, 4), RA = c(15, 4, 5, 2,
8, 2, 2, 7, 2, 15, 6, 4, 4, 7, 3, 1, 6, 11, 9, 1, 1, 5, 7, 4,
9, 5, 8, 5, 2, 3), Rdiff = c(-10, -1, -2, 1, -3, 3, -1, -7, 5,
-13, 4, 1, -3, -4, -1, 17, 1, -9, -1, 13, 4, -2, 2, 5, -4, -2,
-1, -3, -1, 1), absV = c(10, 1, 2, 1, 3, 3, 1, 7, 5, 13, 4, 1,
3, 4, 1, 17, 1, 9, 1, 13, 4, 2, 2, 5, 4, 2, 1, 3, 1, 1), App_Dec = c("GS-2, L",
"GS-5", "GS-3, L", "GS-7, W", "GS-6, L", "GS-7, W", "GS-7, L",
"GS-7, L", "GS-6, W", "GS-3, L", "GS-2", "GS-2", "GS-6, L", "GS-5, L",
"GS-6, L", "GS-6, W", "GS-4", "GS-4, L", "GS-2", "GS-7, W", "GS-5, W",
"GS-6", "GS-2", "GS-3", "GS-4", "GS-6, L", "GS-5", "GS-4", "GS-4",
"GS-4"), IP = c(1.1, 5, 3, 7, 6, 6.1, 7, 7, 6, 2.2, 1.2, 2, 6,
5, 5.1, 6, 4, 4, 2, 7, 5, 6, 1.2, 3, 4, 6, 5, 3.2, 4, 4), H = c(5,
5, 8, 4, 6, 4, 4, 3, 3, 8, 8, 4, 6, 3, 7, 3, 10, 7, 3, 3, 4,
3, 4, 4, 9, 6, 4, 4, 4, 4), R = c(6, 3, 5, 1, 4, 2, 2, 1, 1,
6, 6, 3, 4, 2, 3, 1, 5, 5, 5, 1, 1, 2, 4, 2, 5, 4, 2, 1, 1, 3
), ER = c(6, 3, 3, 1, 4, 1, 2, 1, 1, 6, 6, 3, 4, 2, 3, 1, 5,
4, 5, 1, 1, 2, 4, 2, 5, 1, 2, 1, 1, 3), BB = c(2, 2, 1, 1, 0,
1, 2, 2, 4, 3, 0, 1, 2, 2, 1, 2, 0, 0, 4, 2, 2, 2, 4, 1, 0, 2,
2, 2, 0, 3), SO = c(1, 10, 3, 4, 4, 7, 14, 10, 10, 5, 3, 1, 5,
2, 5, 9, 3, 3, 3, 12, 8, 6, 0, 2, 2, 9, 2, 7, 4, 3), HR = c(0,
2, 1, 0, 2, 1, 1, 1, 1, 2, 4, 0, 1, 1, 0, 0, 0, 2, 1, 1, 1, 0,
0, 0, 1, 1, 0, 1, 1, 0), UER = c(0, 0, 2, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0),
Pit = c(38, 113, 67, 107, 66, 95, 112, 100, 102, 76, 52,
40, 94, 81, 91, 102, 66, 71, 70, 108, 81, 100, 52, 69, 84,
103, 86, 60, 57, 70), Str = c(24, 78, 42, 68, 45, 66, 70,
70, 62, 45, 30, 25, 66, 52, 60, 68, 45, 49, 37, 74, 50, 65,
22, 41, 53, 72, 55, 39, 33, 37), GSc = c(19, 53, 29, 68,
48, 65, 73, 75, 68, 20, 18, 36, 47, 53, 46, 69, 25, 33, 29,
77, 61, 62, 27, 44, 26, 57, 51, 54, 54, 42), BF = c(12, 22,
19, 26, 23, 24, 26, 26, 24, 18, 14, 11, 26, 20, 24, 23, 21,
20, 14, 26, 21, 23, 13, 15, 21, 27, 20, 16, 15, 18), AB = c(8,
20, 18, 24, 23, 23, 23, 23, 20, 15, 13, 9, 24, 18, 22, 21,
21, 20, 9, 24, 19, 21, 8, 13, 20, 25, 18, 14, 15, 15), H2B = c(2,
0, 1, 1, 1, 0, 2, 0, 2, 2, 1, 2, 1, 0, 2, 1, 1, 1, 1, 1,
0, 0, 1, 0, 2, 2, 2, 0, 1, 0), H3B = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 1, 0), IBB = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0),
HBP = c(1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0), SH = c(0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 0, 0, 0, 0, 0), SF = c(1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
0), GDP = c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1), SB = c(0, 1,
1, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 2, 0,
1, 0, 0, 0, 3, 0, 0, 0, 0), CS = c(0, 0, 0, 0, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0), PO = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), BK = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0), WP = c(0, 1, 1, 1, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
0, 1, 0, 0), ERA = c("40.5", "5.4", "9", "1.29", "6", "1.42",
"2.57", "1.29", "1.5", "20.25", "32.4", "13.5", "6", "3.6",
"5.0599999999999996", "1.5", "11.25", "9", "22.5", "1.29",
"1.8", "3", "21.6", "6", "11.25", "1.5", "3.6", "2.4500000000000002",
"2.25", "6.75"), WPA = c(-0.471, -0.087, -0.256, 0.34, -0.22,
0.18, 0.107, 0.219, 0.229, -0.358, -0.487, -0.186, -0.156,
0.036, -0.047, 0.049, -0.329, -0.321, -0.34, 0.193, 0.156,
0.07, -0.312, -0.042, -0.278, -0.271, 0.029, 0.02, 0.092,
-0.174), RE24 = c(-5.122, -0.193, -3.316, 2.931, -1.08, 1.509,
1.406, 2.406, 1.92, -4.641, -5.444, -1.919, -0.758, 0.679,
0.245, 2.215, -3.054, -3.054, -4.027, 2.406, 1.433, 0.92,
-3.788, -0.359, -2.812, -1.08, 0.707, 0.364, 1.166, -0.834
), aLI = c(1.45, 1.244, 0.974, 1.271, 0.965, 0.921, 0.955,
0.888, 1.066, 0.962, 0.767, 1.073, 0.941, 0.852, 1.353, 0.392,
0.857, 0.805, 0.904, 0.75, 1.037, 0.861, 1.232, 1.355, 0.914,
1.239, 1.213, 1.28, 0.748, 1.407)), row.names = c(NA, -30L
), class = c("tbl_df", "tbl", "data.frame"))
Desired output:
The numbers starting in the second column are the total absV values for each player for each column. The last column contains the sum of all the absV values for each player where absV > 5. Only a sample of the first 3 rows are shown, and the absV values are just filler numbers.
| Player | 1 | 2 | 3 | 4 | 5 | >5 |
| deGrom | 2 | 3 | 5 | 0 | 1 | 3 |
| Matz | 2 | 3 | 5 | 0 | 1 | 3 |
Code tried (I need help getting beyond the point shown). I would prefer if the code uses dplyr:
starter %>%
select(Player, absV) %>%
group_by(Player, absV) %>%
summarize(numG= n()) %>%
arrange(Player,absV)
To do this you to bifurcate your data with rows
per player >5
and <=5
, then rbind
them together and thereafter pivot_wider
. Follow this code
library(dplyr)
library(tidyr)
df <- starter %>% group_by(Player) %>%
mutate(row = row_number()) %>%
select(Player, absV, row) %>% arrange(Player)
df %>% filter(row <= 5) %>%
mutate(row = as.character(row)) %>%
rbind(df %>% filter(row > 5) %>%
summarise( absV = sum(absV)) %>%
mutate(row = ">5")) %>%
pivot_wider(id_cols = Player, names_from = row, values_from = absV)
# A tibble: 8 x 7
# Groups: Player [8]
Player `1` `2` `3` `4` `5` `>5`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Ariel Jurado 4 NA NA NA NA NA
2 David Peterson 1 5 4 1 1 NA
3 Jacob deGrom 1 1 1 17 13 2
4 Michael Wacha 3 9 5 NA NA NA
5 Rick Porcello 2 7 3 1 2 1
6 Robert Gsellman 2 1 NA NA NA NA
7 Seth Lugo 10 3 4 1 4 3
8 Steven Matz 13 NA NA NA NA NA
Note. Loading tidyverse
package, at once, directly is advised.
Note-2 If you still want to sort absV before changing the data-format, add absV in arrange syntax beforehand joining them..
df <- starter %>% group_by(Player) %>%
arrange(Player, absV) %>%
mutate(row = row_number()) %>%
select(Player, absV, row)
df %>% filter(row <= 5) %>%
mutate(row = as.character(row)) %>%
rbind(df %>% filter(row > 5) %>%
summarise( absV = sum(absV)) %>%
mutate(row = ">5")) %>%
pivot_wider(id_cols = Player, names_from = row, values_from = absV)
#this will give the following diff output
# A tibble: 8 x 7
# Groups: Player [8]
Player `1` `2` `3` `4` `5` `>5`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 Ariel Jurado 4 NA NA NA NA NA
2 David Peterson 1 1 1 4 5 NA
3 Jacob deGrom 1 1 1 2 13 17
4 Michael Wacha 3 5 9 NA NA NA
5 Rick Porcello 1 1 2 2 3 7
6 Robert Gsellman 1 2 NA NA NA NA
7 Seth Lugo 1 3 3 4 4 10
8 Steven Matz 13 NA NA NA NA NA
Follow this code to work out frequency of each absV
df %>% group_by(Player, absV) %>% mutate(freq = n()) %>% ungroup()
#check it
df %>% group_by(Player, absV) %>% mutate(freq = n()) %>% ungroup() %>% select(Player, absV, freq)
Player absV freq
<chr> <dbl> <int>
1 Seth Lugo 10 1
2 Jacob deGrom 1 3
3 Rick Porcello 2 2
4 David Peterson 1 3
5 Michael Wacha 3 1
6 Seth Lugo 3 2
7 Jacob deGrom 1 3
8 Rick Porcello 7 1
9 David Peterson 5 1
10 Steven Matz 13 1
# ... with 20 more rows
Using data.table
library(data.table)
dcast(setDT(starter), Player ~ rowid(Player), value.var = 'absV')
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.