繁体   English   中英

一次对分组的 dataframe n 行应用自定义 function

[英]apply a custom function on grouped dataframe n rows at a time

用户定义 function

  CollageImage <- function(path, country, strain, assay,subgroup) {
  img_out <- magick::image_read(path) %>%
    magick::image_trim() %>%
    magick::image_convert(format = "jpeg") %>%
    magick::image_montage(
      tile = tile,
      geometry = paste(500, "x", 500, "+5+5", sep = "")
    ) %>%
    magick::image_border(geometry = "10x80", color = "#FFFFFF") %>%
    magick::image_annotate(
      paste(country, "\n", strain,
            sep = " "
      ),
      weight = 700,
      size = 30,
      location = "+0+0",
      gravity = "north"
    ) %>%
    magick::image_convert("jpg")
  
  #' write the image to file
  img_out %>%
    magick::image_write(
      format = "jpeg",
      path = here::here(paste(country, strain, assay,subgroup, "collage.jpg", sep = "_")),
      quality = 100,
      density = 300
    )
  #' check the collage info
  magick::image_info(img_out)
}
    

分组数据框

out_df <- df %>% dplyr::group_by(country ,strain)

组 map 在组 dataframe 上应用 function

out_df %>% 
  dplyr::group_map( ~ CollageEachGroup(
  path = .x$path,
  country = .y$country,
  assay = .x$assay,
  strain = .y$strain,
  subgroup
))
  

我想通过在每个组中一次移动 10 行的 window 来应用 function。 感谢有关如何执行此操作的任何输入。 例如,如果一个组中有 19 个图像,我想写 2 个文件。 1 将是 10 个文件的拼贴画,而其他将是 9 个文件的拼贴画。 而且,文件名必须是A_UK_19_1.csvA_UK_19_2.csv

这是我想解决的一种方法(来自 So answers),但这不是一种优雅的方法。

- Filter each group put
- create a block for each group as follows

    df_subset$bloc <-
      rep(seq(1, 1 + nrow(df_subset) %/% bloc_len), each = bloc_len, length.out = nrow(df_subset))


dput(df)
structure(list(png_file = c("A_UK_1_lp21_pmn1__1.png", "A_UK_1_xno9_pmn1__1.png", 
"A_UK_2.14.3_lp21_pmn1__1.png", "A_UK_2.14.3_xno9_pmn1__1.png", 
"A_UK_2.2_lp21_zn78__1.png", "A_UK_2.2_xno9_zn78__1.png", "A_UK_2.3_lp21_pmn1__1.png", 
"A_UK_2.3_xno9_pmn1__1.png", "A_UK_2.4_lp21_yun7__1.png", "A_UK_2.8.1_lp21_pmn1__1.png", 
"A_UK_2.8.1_xno9_pmn1__1.png", "A_UK_2.8.2_lp21_pmn1__1.png", 
"A_UK_2.8.2_xno9_pmn1__1.png", "B_UK_2.1_lp21_pmn1__1.png", "B_UK_2.1_xno9_pmn1__1.png", 
"B_UK_2.14.1_lp21_pmn1__1.png", "B_UK_2.14.1_xno9_pmn1__1.png", 
"B_UK_2.14.2_lp21_pmn1__1.png", "B_UK_2.14.2_xno9_pmn1__1.png", 
"A_UK_2.14.3_lp21_pmn1__1.png", "A_UK_2.14.3_xno9_pmn1__1.png", 
"A_UK_2.2_lp21_zn78__1.png", "A_UK_2.2_xno9_zn78__1.png", "A_UK_2.3_lp21_pmn1__1.png", 
"A_UK_2.3_xno9_pmn1__1.png", "A_UK_2.4_lp21_yun7__1.png", "A_UK_2.8.1_lp21_pmn1__1.png", 
"A_UK_2.8.1_xno9_pmn1__1.png", "A_UK_2.8.2_lp21_pmn1__1.png", 
"A_UK_2.8.2_xno9_pmn1__1.png", "B_UK_2.14.1_lp21_pmn1__1.png", 
"B_UK_2.14.1_xno9_pmn1__1.png", "B_UK_2.14.2_lp21_pmn1__1.png", 
"B_UK_2.14.2_xno9_pmn1__1.png", "A_UK_2.2_lp21_zn78__1.png", 
"A_UK_2.2_xno9_zn78__1.png", "A_UK_2.3_lp21_pmn1__1.png", "A_UK_2.3_xno9_pmn1__1.png", 
"A_UK_2.4_lp21_yun7__1.png", "A_UK_2.9.1_lp21_yun7__1.png", "B_UK_2.12.1_lp21_yun7__1.png", 
"B_UK_2.12.2_lp21_yun7__1.png", "B_UK_2.7.1_lp21_pmn1__1.png", 
"B_UK_2.7.1_xno9_pmn1__1.png", "B_UK_2.7.4_lp21_yun7__1.png", 
"B_UK_2.9.2_lp21_yun7__1.png", "A_UK_2.4_lp21_yun7__1.png", "A_UK_2.5.4_lp21_pmn1__1.png", 
"A_UK_2.5.4_xno9_pmn1__1.png", "A_UK_2.6.4_lp21_yun7__1.png", 
"B_UK_2.5.3_lp21_yun7__1.png", "A_UK_2.4_lp21_yun7__1.png"), 
    path = c("C:/path/A_UK_1_lp21_pmn1__1.png", "C:/path/A_UK_1_xno9_pmn1__1.png", 
    "C:/path/A_UK_2.14.3_lp21_pmn1__1.png", "C:/path/A_UK_2.14.3_xno9_pmn1__1.png", 
    "C:/path/A_UK_2.2_lp21_zn78__1.png", "C:/path/A_UK_2.2_xno9_zn78__1.png", 
    "C:/path/A_UK_2.3_lp21_pmn1__1.png", "C:/path/A_UK_2.3_xno9_pmn1__1.png", 
    "C:/path/A_UK_2.4_lp21_yun7__1.png", "C:/path/A_UK_2.8.1_lp21_pmn1__1.png", 
    "C:/path/A_UK_2.8.1_xno9_pmn1__1.png", "C:/path/A_UK_2.8.2_lp21_pmn1__1.png", 
    "C:/path/A_UK_2.8.2_xno9_pmn1__1.png", "C:/path/B_UK_2.1_lp21_pmn1__1.png", 
    "C:/path/B_UK_2.1_xno9_pmn1__1.png", "C:/path/B_UK_2.14.1_lp21_pmn1__1.png", 
    "C:/path/B_UK_2.14.1_xno9_pmn1__1.png", "C:/path/B_UK_2.14.2_lp21_pmn1__1.png", 
    "C:/path/B_UK_2.14.2_xno9_pmn1__1.png", "C:/path/A_UK_2.14.3_lp21_pmn1__1.png", 
    "C:/path/A_UK_2.14.3_xno9_pmn1__1.png", "C:/path/A_UK_2.2_lp21_zn78__1.png", 
    "C:/path/A_UK_2.2_xno9_zn78__1.png", "C:/path/A_UK_2.3_lp21_pmn1__1.png", 
    "C:/path/A_UK_2.3_xno9_pmn1__1.png", "C:/path/A_UK_2.4_lp21_yun7__1.png", 
    "C:/path/A_UK_2.8.1_lp21_pmn1__1.png", "C:/path/A_UK_2.8.1_xno9_pmn1__1.png", 
    "C:/path/A_UK_2.8.2_lp21_pmn1__1.png", "C:/path/A_UK_2.8.2_xno9_pmn1__1.png", 
    "C:/path/B_UK_2.14.1_lp21_pmn1__1.png", "C:/path/B_UK_2.14.1_xno9_pmn1__1.png", 
    "C:/path/B_UK_2.14.2_lp21_pmn1__1.png", "C:/path/B_UK_2.14.2_xno9_pmn1__1.png", 
    "C:/path/A_UK_2.2_lp21_zn78__1.png", "C:/path/A_UK_2.2_xno9_zn78__1.png", 
    "C:/path/A_UK_2.3_lp21_pmn1__1.png", "C:/path/A_UK_2.3_xno9_pmn1__1.png", 
    "C:/path/A_UK_2.4_lp21_yun7__1.png", "C:/path/A_UK_2.9.1_lp21_yun7__1.png", 
    "C:/path/B_UK_2.12.1_lp21_yun7__1.png", "C:/path/B_UK_2.12.2_lp21_yun7__1.png", 
    "C:/path/B_UK_2.7.1_lp21_pmn1__1.png", "C:/path/B_UK_2.7.1_xno9_pmn1__1.png", 
    "C:/path/B_UK_2.7.4_lp21_yun7__1.png", "C:/path/B_UK_2.9.2_lp21_yun7__1.png", 
    "C:/path/A_UK_2.4_lp21_yun7__1.png", "C:/path/A_UK_2.5.4_lp21_pmn1__1.png", 
    "C:/path/A_UK_2.5.4_xno9_pmn1__1.png", "C:/path/A_UK_2.6.4_lp21_yun7__1.png", 
    "C:/path/B_UK_2.5.3_lp21_yun7__1.png", "C:/path/A_UK_2.4_lp21_yun7__1.png"
    ), assay = c("A", "A", "A", "A", "A", "A", "A", "A", "A", 
    "A", "A", "A", "A", "B", "B", "B", "B", "B", "B", "A", "A", 
    "A", "A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", 
    "B", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B", 
    "B", "A", "A", "A", "A", "B", "A"), country = c("UK", "UK", 
    "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", 
    "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", 
    "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", 
    "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", 
    "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK", "UK"
    ), strain = c("Covid_123", "Covid_123", "Covid_123", "Covid_123", 
    "Covid_123", "Covid_123", "Covid_123", "Covid_123", "Covid_123", 
    "Covid_123", "Covid_123", "Covid_123", "Covid_123", "Covid_123", 
    "Covid_123", "Covid_123", "Covid_123", "Covid_123", "Covid_123", 
    "Covid_125", "Covid_125", "Covid_125", "Covid_125", "Covid_125", 
    "Covid_125", "Covid_125", "Covid_125", "Covid_125", "Covid_125", 
    "Covid_125", "Covid_125", "Covid_125", "Covid_125", "Covid_125", 
    "Covid_127", "Covid_127", "Covid_127", "Covid_127", "Covid_127", 
    "Covid_127", "Covid_127", "Covid_127", "Covid_127", "Covid_127", 
    "Covid_127", "Covid_127", "Covid_127", "Covid_127", "Covid_127", 
    "Covid_127", "Covid_127", "Covid_128")), spec = structure(list(
    cols = list(png_file = structure(list(), class = c("collector_character", 
    "collector")), path = structure(list(), class = c("collector_character", 
    "collector")), assay = structure(list(), class = c("collector_character", 
    "collector")), country = structure(list(), class = c("collector_character", 
    "collector")), strain = structure(list(), class = c("collector_character", 
    "collector"))), default = structure(list(), class = c("collector_guess", 
    "collector")), delim = ","), class = "col_spec"), row.names = c(NA, 
-52L), class = c("tbl_df", "tbl", "data.frame"))
 
 

您可以使用slider::slide 创建子组

library(dplyr)
library(purrr)
library(slider)

N <- 10

Collage <- function(country,strain,subgroupnumber,data) {
  cat(paste('Processing:',country,'-',strain,'/',subgroupnumber),'\n')
  cat(paste(nrow(data) , ' files to read \n'))
  cat(paste(data$png_file,collapse=' ; '),'\n')
  cat('\n')
}

res <- df %>% group_by(country,strain) %>% 
  group_walk(~{
    group <- .y
    subgroups <- slider::slide(.x,.f=~.x,.step = N ,.after = N-1)
    # Remove empty elements
    subgroups <- subgroups[lengths(subgroups) != 0]
    
    # Run wished function on each subgroup
    subgroups %>% iwalk(~{
      Collage(group[1],group[2],.y,.x)
  })

})

Processing: UK - Covid_123 / 1 
10  files to read 
A_UK_1_lp21_pmn1__1.png ; A_UK_1_xno9_pmn1__1.png ; A_UK_2.14.3_lp21_pmn1__1.png ; A_UK_2.14.3_xno9_pmn1__1.png ; A_UK_2.2_lp21_zn78__1.png ; A_UK_2.2_xno9_zn78__1.png ; A_UK_2.3_lp21_pmn1__1.png ; A_UK_2.3_xno9_pmn1__1.png ; A_UK_2.4_lp21_yun7__1.png ; A_UK_2.8.1_lp21_pmn1__1.png 

Processing: UK - Covid_123 / 2 
9  files to read 
A_UK_2.8.1_xno9_pmn1__1.png ; A_UK_2.8.2_lp21_pmn1__1.png ; A_UK_2.8.2_xno9_pmn1__1.png ; B_UK_2.1_lp21_pmn1__1.png ; B_UK_2.1_xno9_pmn1__1.png ; B_UK_2.14.1_lp21_pmn1__1.png ; B_UK_2.14.1_xno9_pmn1__1.png ; B_UK_2.14.2_lp21_pmn1__1.png ; B_UK_2.14.2_xno9_pmn1__1.png 

Processing: UK - Covid_125 / 1 
10  files to read 
A_UK_2.14.3_lp21_pmn1__1.png ; A_UK_2.14.3_xno9_pmn1__1.png ; A_UK_2.2_lp21_zn78__1.png ; A_UK_2.2_xno9_zn78__1.png ; A_UK_2.3_lp21_pmn1__1.png ; A_UK_2.3_xno9_pmn1__1.png ; A_UK_2.4_lp21_yun7__1.png ; A_UK_2.8.1_lp21_pmn1__1.png ; A_UK_2.8.1_xno9_pmn1__1.png ; A_UK_2.8.2_lp21_pmn1__1.png 

Processing: UK - Covid_125 / 2 
5  files to read 
A_UK_2.8.2_xno9_pmn1__1.png ; B_UK_2.14.1_lp21_pmn1__1.png ; B_UK_2.14.1_xno9_pmn1__1.png ; B_UK_2.14.2_lp21_pmn1__1.png ; B_UK_2.14.2_xno9_pmn1__1.png 

Processing: UK - Covid_127 / 1 
10  files to read 
A_UK_2.2_lp21_zn78__1.png ; A_UK_2.2_xno9_zn78__1.png ; A_UK_2.3_lp21_pmn1__1.png ; A_UK_2.3_xno9_pmn1__1.png ; A_UK_2.4_lp21_yun7__1.png ; A_UK_2.9.1_lp21_yun7__1.png ; B_UK_2.12.1_lp21_yun7__1.png ; B_UK_2.12.2_lp21_yun7__1.png ; B_UK_2.7.1_lp21_pmn1__1.png ; B_UK_2.7.1_xno9_pmn1__1.png 

Processing: UK - Covid_127 / 2 
7  files to read 
B_UK_2.7.4_lp21_yun7__1.png ; B_UK_2.9.2_lp21_yun7__1.png ; A_UK_2.4_lp21_yun7__1.png ; A_UK_2.5.4_lp21_pmn1__1.png ; A_UK_2.5.4_xno9_pmn1__1.png ; A_UK_2.6.4_lp21_yun7__1.png ; B_UK_2.5.3_lp21_yun7__1.png 

Processing: UK - Covid_128 / 1 
1  files to read 
A_UK_2.4_lp21_yun7__1.png 
              1

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM