[英]R: update rownames in data.frame using conversion table
我有一个以下形式的data.frame:
> set.seed(1)
> myp <- paste0('P', sort(sample(1:15, 10)))
> mydf <- data.frame(param=c(paste0(myp, 'B'), paste0(myp, 'R'), paste0(myp, 'max'), paste0(myp, 'min')),
+ value1=NA, value2=NA, value3=NA)
> rownames(mydf) <- mydf$param
> mydf$param <- NULL
> mydf
value1 value2 value3
P1B NA NA NA
P3B NA NA NA
P4B NA NA NA
P5B NA NA NA
P6B NA NA NA
P8B NA NA NA
P9B NA NA NA
P10B NA NA NA
P11B NA NA NA
P14B NA NA NA
P1R NA NA NA
P3R NA NA NA
P4R NA NA NA
P5R NA NA NA
P6R NA NA NA
P8R NA NA NA
P9R NA NA NA
P10R NA NA NA
P11R NA NA NA
P14R NA NA NA
P1max NA NA NA
P3max NA NA NA
P4max NA NA NA
P5max NA NA NA
P6max NA NA NA
P8max NA NA NA
P9max NA NA NA
P10max NA NA NA
P11max NA NA NA
P14max NA NA NA
P1min NA NA NA
P3min NA NA NA
P4min NA NA NA
P5min NA NA NA
P6min NA NA NA
P8min NA NA NA
P9min NA NA NA
P10min NA NA NA
P11min NA NA NA
P14min NA NA NA
我想更新行的名称,为此我有一个类似于下面的转换表:
> conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10))
> conv.df
name new.name
1 P1 P1
2 P3 P2
3 P4 P3
4 P5 P4
5 P6 P5
6 P8 P6
7 P9 P7
8 P10 P8
9 P11 P9
10 P14 P10
我想要做的唯一一件事就是更新的rownames mydf
所以它反映了new.names
在转换表conv.df
。
这似乎很容易,但我不能换我的头周围......我想尝试一个regular expression
的方法,我唯一的问题是,我不是100%肯定,我只遇到rownames
形式的PnB
, PnR
, Pnmax
, Pnmin
...我想要一个适用于任何PnX
实例的解决方案(它始终是Pn
后跟[:alpha:]
)
您可以将rowname设为列,将根“Pn”和字母“[:alpha:]”拆分,然后按如下方式重命名,
set.seed(1)
myp <- paste0('P', sort(sample(1:15, 10)))
mydf <- data.frame(param=c(paste0(myp, 'B'), paste0(myp, 'R'), paste0(myp, 'max'), paste0(myp, 'min')),
value1=NA, value2=NA, value3=NA)
rownames(mydf) <- mydf$param
mydf$param <- NULL
mydf
library(tidyverse)
mydf%>%
rownames_to_column()%>%
mutate(root = gsub("^(P\\d\\d?).*$","\\1",rowname),
letter = gsub("^P\\d\\d?(.*)$","\\1",rowname))%>%
mutate(root = recode(root,
P1 = "P1",
P3 = "P2",
P4 = "P3",
P5 = "P4",
P6 = "P5",
P8 = "P6",
P9 = "P7",
P10 = "P8",
P11 = "P9",
P14 = "P10"
))%>%
mutate(rowname = paste0(root,letter))%>%
column_to_rownames()%>%
select(-root,-letter)
@teofil的答案很好,而且很有效。 这是通过仍然使用recode和一些元编程来实现它的另一种方法,
library(tidyverse)
rename_col_df <- function(data,colname,df_rename){
# data is the input data frame
# colname is the column to be modified
# df_rename must have columns name and new.name
colname = enexpr(colname) # Capture the user input col name as a symbol
old_name = df_rename$name
new_name = df_rename$new.name
# Start construcing an expression
# The following line creates a recode function
# recode_expr[[1]] is "recode"
# recode_expr[[2]] is the first argument
recode_expr = expr(recode(!!colname))
# All subsequent arguments to recode are added here
for(i in seq_along(old_name)){
recode_expr[[old_name[i]]] = new_name[i]
}
data = data %>% mutate(!!colname := !!recode_expr)
return(data)
}
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10),stringsAsFactors = FALSE)
mydf%>%
rownames_to_column()%>%
mutate(root = gsub("^(P\\d\\d?).*$","\\1",rowname),
letter = gsub("^P\\d\\d?(.*)$","\\1",rowname))%>%
rename_col_df(root,conv.df) %>%
mutate(rowname = paste0(root,letter))%>%
column_to_rownames()%>%
select(-root,-letter)
如果我们想将重命名应用于向量而不是数据帧,
rename_vec_df <- function(vec,df_rename){
# vec is the vector to be modified
# df_rename must have columns name and new.name
old_name = df_rename$name
new_name = df_rename$new.name
# Start construcing an expression
# The following line creates a recode function
# recode_expr[[1]] is "recode"
# recode_expr[[2]] is the first argument
recode_expr = expr(recode(!!vec))
# All subsequent arguments to recode are added here
for(i in seq_along(old_name)){
recode_expr[[old_name[i]]] = new_name[i]
}
vec = eval(recode_expr)
return(vec)
}
myp <- paste0('P', sort(sample(1:15, 10)))
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10),stringsAsFactors = FALSE)
vec <- c("P1","P14","P10")
rename_vec_df(vec,conv.df)
要了解有关此处使用的技术的更多信息,
https://rlang.r-lib.org/reference/quotation.html https://adv-r.hadley.nz/metaprogramming.html
跟进@ Sada93代码。 要避免recode
步骤,请使用join
:
library(tidyverse)
set.seed(1)
myp <- paste0('P', sort(sample(1:15, 10)))
mydf <-
data.frame(
param = c(
paste0(myp, 'B'),
paste0(myp, 'R'),
paste0(myp, 'max'),
paste0(myp, 'min')
),
value1 = NA,
value2 = NA,
value3 = NA
)
rownames(mydf) <- mydf$param
mydf$param <- NULL
mydf
conv.df <- data.frame(name=myp, new.name=paste0('P', 1:10))
mydf %>% rownames_to_column() %>%
mutate(name = gsub("^(P\\d\\d?).*$","\\1",rowname),
letter = gsub("^P\\d\\d?(.*)$","\\1",rowname)) %>%
left_join(., conv.df, by="name") %>%
mutate(rowname=paste(new.name, letter, sep="")) %>%
column_to_rownames() %>%
select(-name, -letter)
value1 value2 value3 new.name
P1B NA NA NA P1
P2B NA NA NA P2
P3B NA NA NA P3
P4B NA NA NA P4
P5B NA NA NA P5
P6B NA NA NA P6
P7B NA NA NA P7
P8B NA NA NA P8
P9B NA NA NA P9
P10B NA NA NA P10
P1R NA NA NA P1
您不能在数据框中具有重复的行名称。 以下是基本R中将其添加为列名称的一种方法。 在这里,我们提取原始rownames中的公共部分,即“P”后跟一个数字, match
其与conv.df$name
match
,并获取相应的conv.df$new.name
。
mydf$new_name <- conv.df$new.name[
match(sub("(P\\d+).*", "\\1", rownames(mydf)), conv.df$name)]
mydf
# value1 value2 value3 new_name
#P1B NA NA NA P1
#P2B NA NA NA P2
#P3B NA NA NA P3
#P4B NA NA NA P4
#P7B NA NA NA P5
#P8B NA NA NA P6
#P9B NA NA NA P7
#P11B NA NA NA P8
#P12B NA NA NA P9
#P13B NA NA NA P10
#P1R NA NA NA P1
#P2R NA NA NA P2
#...
哪里
sub("(P\\d+).*", "\\1", rownames(mydf)) #returns
#[1] "P1" "P2" "P3" "P4" "P7" "P8" "P9" "P11" "P12" "P13" "P1" "P2" "P3"
# "P4" "P7" "P8" "P9" "P11" "P12" "P13" "P1" "P2" "P3" "P4" "P7" "P8"
# "P9" "P11" "P12" "P13" "P1" "P2" "P3" "P4" "P7" "P8" "P9" "P11" "P12"
# "P13"
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.