[英]Replacing NULL value in Dataframe in R with Median of Column
我有一個 dataframe 有多個NULL
值。 class 列的類型是LIST
而不是NUMERIC
。 是否可以用列的中值替換所有NULL
值? 我嘗試了一種手動方法,即使用as.numeric()
function 將第 1 列的NULL
值更改為 1,然后應用median()
function。是否有更有效的方法來執行此操作?
i1 <- sapply(pivot_table_1$`Start Working`, is.null)
pivot_table_1$`Start Working`[i1] <- 0
來自dput()
:
structure(list(Day = 1:31, `Start Sleeping` = list(0, 20, 35,
40, 50, 0, 40, 0, 0, 40, 50, 0, 0, 40, 0, 40, 35, 45, 0,
0, 65, 35, 40, 40, 0, 50, 40, 0, 0, 0, 0), `Stop Sleeping` = list(
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440, 440,
440, 440, 440, 440, 440, 440, 440), `Start Working` = list(
490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0, 0,
490, 490, 490, 490, 490, 0, 0, 490, 490, 490, 490, 490, 0,
0, 490, 490, 490, 490), `Stop Working` = list(1005, 1005,
1005, 1005, NULL, NULL, 965, 965, 965, 965, 965, NULL, NULL,
965, 965, 965, 965, 965, NULL, NULL, 965, 965, 965, 965,
965, NULL, NULL, 965, 965, 965, 965), Breakfast = list(690,
645, 615, 540, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
475, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 475, NULL,
NULL, NULL, NULL, NULL, 475, NULL, NULL, NULL, NULL, NULL),
Dinner = list(1390, 1360, 1285, 1270, 1390, NULL, 1140, 1140,
1130, 1135, 1130, NULL, 1165, 1140, 1130, 1135, 1130,
1140, 1140, 1180, NULL, 1145, 1135, 1140, 1135, 1160,
1140, 1140, NULL, 1140, NULL)), row.names = c(NA, -31L
), class = c("tbl_df", "tbl", "data.frame"))
如果您希望將條目保留為長度為一的列表,您可以執行以下操作:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
ifelse(lengths(x) == 1, x, list(median(unlist(x))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <list> <list> <list> <list>
#> 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> # ... with 21 more rows, and 2 more variables: Breakfast <list>, Dinner <list>
或者,如果您希望它們作為數字列,請執行以下操作:
pivot_table_1[] <- lapply(pivot_table_1, function(x) {
unlist(ifelse(lengths(x) == 1, x, list(median(unlist(x)))))})
pivot_table_1
#> # A tibble: 31 x 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 440 490 1005
#> 2 2 20 440 490 1005
#> 3 3 35 440 490 1005
#> 4 4 40 440 490 1005
#> 5 5 50 440 0 965
#> 6 6 0 440 0 965
#> 7 7 40 440 490 965
#> 8 8 0 440 490 965
#> 9 9 0 440 490 965
#> 10 10 40 440 490 965
#> # ... with 21 more rows, and 2 more variables: Breakfast <dbl>, Dinner <dbl>
由reprex package (v2.0.1) 創建於 2022-05-22
tidyr
的replace_na()
可用於替換列表中的NULL
s。 ( NULL
是NA
的列表列等效項)
library(tidyverse)
replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x)))))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <list> <list> <list> <list> <list> <list>
# 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
# # … with 21 more rows
如果您希望這些列表列被展平,請嘗試unnest()
:
replace_na(df, map(keep(df, is.list), ~ list(median(unlist(.x))))) %>%
unnest(where(is.list))
# # A tibble: 31 × 7
# Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working` Breakfast Dinner
# <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 1 0 440 490 1005 690 1390
# 2 2 20 440 490 1005 645 1360
# 3 3 35 440 490 1005 615 1285
# 4 4 40 440 490 1005 540 1270
# 5 5 50 440 0 965 540 1390
# 6 6 0 440 0 965 540 1140
# 7 7 40 440 490 965 540 1140
# 8 8 0 440 490 965 540 1140
# 9 9 0 440 490 965 540 1130
# 10 10 40 440 490 965 540 1135
另一種可能的解決方案:
library(tidyverse)
df %>%
mutate(across(-Day, ~ ifelse(lengths(.x) == 0, median(unlist(.x)), .x)))
#> # A tibble: 31 × 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <list> <list> <list> <list>
#> 1 1 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 2 2 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 3 3 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 4 4 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 5 5 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 6 6 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 7 7 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 8 8 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 9 9 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> 10 10 <dbl [1]> <dbl [1]> <dbl [1]> <dbl [1]>
#> # … with 21 more rows, and 2 more variables: Breakfast <list>, Dinner <list>
取消嵌套:
library(tidyverse)
df %>%
mutate(across(-Day, ~ ifelse(lengths(.x) == 0, median(unlist(.x)), .x))) %>%
unnest(everything())
#> # A tibble: 31 × 7
#> Day `Start Sleeping` `Stop Sleeping` `Start Working` `Stop Working`
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0 440 490 1005
#> 2 2 20 440 490 1005
#> 3 3 35 440 490 1005
#> 4 4 40 440 490 1005
#> 5 5 50 440 0 965
#> 6 6 0 440 0 965
#> 7 7 40 440 490 965
#> 8 8 0 440 490 965
#> 9 9 0 440 490 965
#> 10 10 40 440 490 965
#> # … with 21 more rows, and 2 more variables: Breakfast <dbl>, Dinner <dbl>
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.