[英]Creating a dataframe from vectors for mean, min, and max in r
我正在嘗試創建一個新的 dataframe,其中包含現有 dataframe(top_SIM_weekly)和另一個 Z6A8064B5DF479455500553C47C555(C47C555)的平均值、最小值和最大值。
我已經包含了可重現的數據集: dput(top_SIM_weekly)
structure(list(X1 = c(18.3039606, 18.3039606, 18.3039606, 18.3039606,
18.3039606, 18.3039606), X2 = c(30.73478281, 30.73478281, 30.73478281,
30.73478281, 30.73478281, 30.73478281), X3 = c(50.88565679, 50.88565679,
50.88565679, 50.88565679, 50.88565679, 50.88565679), X4 = c(35.40195165,
35.40195165, 35.40195165, 35.40195165, 35.40195165, 35.40195165 ), X5 = c(23.12681287, 23.12681287, 23.12681287, 23.12681287,
23.12681287, 23.12681287), X6 = c(41.35229054, 41.35229054, 41.35229054,
41.35229054, 41.35229054, 41.35229054), X7 = c(67.48505048, 67.48505048,
67.48505048, 67.48505048, 67.48505048, 67.48505048), X8 = c(53.04008984,
53.04008984, 53.04008984, 53.04008984, 53.04008984, 53.04008984 ), X9 = c(58.26971959, 58.26971959, 58.26971959, 58.26971959,
58.26971959, 58.26971959), X10 = c(111.1841342, 111.1841342,
111.1841342, 111.1841342, 111.1841342, 111.1841342), X11 = c(100.7146155,
100.7146155, 100.7146155, 100.7146155, 100.7146155, 100.7146155 ), X12 = c(224.9074216, 224.9074216, 224.9074216, 224.9074216,
224.9074216, 224.9074216), X13 = c(255.2490203, 255.2490203,
255.2490203, 255.2490203, 255.2490203, 255.2490203), X14 = c(404.1588161,
404.1588161, 404.1588161, 404.1588161, 404.1588161, 404.1588161 ), X15 = c(443.9414483, 443.9414483, 443.9414483, 443.9414483,
443.9414483, 443.9414483), X16 = c(886.3440158, 886.3440158,
886.3440158, 886.3440158, 886.3440158, 886.3440158), X17 = c(1120.305186,
1120.305186, 1120.305186, 1120.305186, 1120.305186, 1120.305186 ), X18 = c(1295.250279, 1295.250279, 1295.250279, 1295.250279,
1295.250279, 1295.250279), X19 = c(1227.257398, 1227.257398,
1227.257398, 1227.257398, 1227.257398, 1227.257398), X20 = c(462.4336653,
462.4336653, 462.4336653, 462.4336653, 462.4336653, 462.4336653 ), X21 = c(533.812084, 533.812084, 533.812084, 533.812084, 533.812084,
533.812084), X22 = c(-0.895926113, -0.895926113, -0.895926113,
-0.895926113, -0.895926113, -0.895926113), X23 = c(0, 0, 0, 0, 0, 0), X24 = c(0.00587596, 0.00587596, 0.00587596, 0.00587596,
0.00587596, 0.00587596), X25 = c(0, 0, 0, 0, 0, 0), X26 = c(0, 0, 0, 0, 0, 0), X27 = c(0, 0, 0, 0, 0, 0), X28 = c(0, 0, 0, 0, 0, 0), X29 = c(0, 0, 0, 0, 0, 0), X30 = c(0, 0, 0, 0, 0, 0),
X31 = c(0, 0, 0, 0, 0, 0), X32 = c(0, 0, 0, 0, 0, 0), X33 = c(0,
0, 0, 0, 0, 0), X34 = c(0, 0, 0, 0, 0, 0), X35 = c(0, 0,
0, 0, 0, 0), X36 = c(0, 0, 0, 0, 0, 0), X37 = c(0, 0, 0,
0, 0, 0), X38 = c(0, 0, 0, 0, 0, 0), X39 = c(0, 0, 0, 0,
0, 0), X40 = c(0, 0, 0, 0, 0, 0), X41 = c(0.076302837, 0.076302837,
0.076302837, 0.076302837, 0.076302837, 0.076302837), X42 = c(0.04497379,
0.04497379, 0.04497379, 0.04497379, 0.04497379, 0.04497379
), X43 = c(0.47941265, 0.47941265, 0.47941265, 0.47941265,
0.47941265, 0.47941265), X44 = c(1.70405395, 1.70405395,
1.70405395, 1.70405395, 1.70405395, 1.70405395), X45 = c(1.069342585,
1.069342585, 1.069342585, 1.069342585, 1.069342585, 1.069342585
), X46 = c(0.642559657, 0.642559657, 0.642559657, 0.642559657,
0.642559657, 0.642559657), X47 = c(1.423669878, 1.423669878,
1.423669878, 1.423669878, 1.423669878, 1.423669878), X48 = c(2.947726076,
2.947726076, 2.947726076, 2.947726076, 2.947726076, 2.947726076
), X49 = c(2.083167482, 2.083167482, 2.083167482, 2.083167482,
2.083167482, 2.083167482), X50 = c(3.001766438, 3.001766438,
3.001766438, 3.001766438, 3.001766438, 3.001766438), X51 = c(4.02263428,
4.02263428, 4.02263428, 4.02263428, 4.02263428, 4.02263428
), X52 = c(5.236041751, 5.236041751, 5.236041751, 5.236041751,
5.236041751, 5.236041751), year = c(2018, 2018, 2018, 2018,
2018, 2018), corr = c(0.598566601816163, 0.598566601816163,
0.598566601816163, 0.598566601816163, 0.598566601816163,
0.598566601816163)), row.names = 272:277, class = "data.frame")
和dput(df_obs_weekly)
structure(list(Epiweek = structure(1L, .Label = "n", class = "factor"),
X1 = 119L, X2 = 103L, X3 = 96L, X4 = 99L, X5 = 53L, X6 = 91L,
X7 = 94L, X8 = 101L, X9 = 106L, X10 = 132L, X11 = 134L, X12 = 164L,
X13 = 189L, X14 = 275L, X15 = 302L, X16 = 416L, X17 = 493L,
X18 = 201L, X19 = NaN, X20 = NaN, X21 = 410L, X22 = 370L,
X23 = 426L, X24 = 311L, X25 = 253L, X26 = 264L, X27 = 175L,
X28 = 158L, X29 = 128L, X30 = 124L, X31 = 96L, X32 = 83L,
X33 = 69L, X34 = 47L, X35 = 72L, X36 = 64L, X37 = 62L, X38 = 61L,
X39 = 53L, X40 = 70L, X41 = 75L, X42 = 62L, X43 = 75L, X44 = 44L,
X45 = 66L, X46 = 86L, X47 = 91L, X48 = 88L, X49 = 86L, X50 = 135L,
X51 = 95L, X52 = 99L, year = 2018, corr = 1), row.names = 1L, class = "data.frame")
到目前為止,我已經嘗試使用以下代碼從均值、最小值和最大值的向量創建矩陣:
mean <- top_SIM_weekly %>% summarise_if(is.numeric, mean)
max <- top_SIM_weekly %>% summarise_if(is.numeric, max)
min <- top_SIM_weekly %>% summarise_if(is.numeric, min)
y <- rbind(mean,max,min)
row.names(y) <- c("mean","max","min")
但我收到以下錯誤: Error: expecting a one sided formula, a function, or a function name.
這個錯誤之前沒有出現,所以我不確定發生了什么。 我還想以第一行作為列名來切換最終 dataframe 的行和列。
或許,我們需要
rbind(mean = x1, max = x2, min = x3)
因為mean
, max
, min
對象不是為rbind
創建的
由於 'year' 和 'corr' 變量也是numeric
,因此使用starts_with
可能會更好
library(dplyr)
library(tidyr)
top_SIM_weekly %>%
summarise(across(starts_with('X'), ~
list(c(mean = mean(.), max = max(.), min = min(.))))) %>%
unnest(everything())
另一種方法是將select
numeric
變量 pivot 轉換為帶有pivot_longer
的“long”格式,並按mean
、 min
、 max
進行分組
library(dplyr) #1.0.0
library(tidyr)
library(tibble)
top_SIM_weekly %>%
select(where(is.numeric)) %>%
pivot_longer(cols = everything()) %>%
group_by(name) %>%
summarise(Mean = mean(value), Max = max(value), Min = min(value))%>%
column_to_rownames('name') %>%
t
c(mean, sum, min) %>%
map_df(function(.x){
top_SIM_weekly %>%
group_by(year,corr) %>%
summarise_at(vars(starts_with('X')), .x) %>%
return()
}) %>%
bind_rows()
您可以將 map_df function 與 function_names 一起使用。 此代碼不符合您的要求。 但我相信它會有所幫助
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.