简体   繁体   中英

Determine which data frames in a list are unique in R

I have a large list of data frames. I want to identify which are unique and assign them unique names. For a small example, but I need to automate.

df1 <- data.frame(a = 1:10, b = 2:11, c = 3:12)
df2 <- df1
df3 <- data.frame(b = 2:11, a = 1:10, c = 3:12)

# this is 2 unique data frames 

dfs <- list(df1, df2, df3)
names(dfs) <- c('df1', 'df2', 'df3')

I want to identify and name unique data frames, though the below gives an error

names(unique(dfs)) = paste0('unique', 1:length(unique(dfs))) 
# this gives error 'could not find function "name<-'"

Then, I want to assign a the unique id to each item in my dfs list, so that df1 and df 2 each have a column called "unique1" and df3 has a column called "unique2." How can I do this in an automated manner?

*EDITED to clarify. This is what I want to accomplish in the end, associating each original df with a unique identifier, as in:

dfs[[1]]$unique_df <- 'unique_1'
dfs[[2]]$unique_df <- 'unique_1'
dfs[[3]]$unique_df <- 'unique_2'

df

$df1
    a  b  c unique_df
1   1  2  3  unique_1
2   2  3  4  unique_1
3   3  4  5  unique_1
4   4  5  6  unique_1
5   5  6  7  unique_1
6   6  7  8  unique_1
7   7  8  9  unique_1
8   8  9 10  unique_1
9   9 10 11  unique_1
10 10 11 12  unique_1

$df2
    a  b  c unique_df
1   1  2  3  unique_1
2   2  3  4  unique_1
3   3  4  5  unique_1
4   4  5  6  unique_1
5   5  6  7  unique_1
6   6  7  8  unique_1
7   7  8  9  unique_1
8   8  9 10  unique_1
9   9 10 11  unique_1
10 10 11 12  unique_1

$df3
    b  a  c unique_df
1   2  1  3  unique_2
2   3  2  4  unique_2
3   4  3  5  unique_2
4   5  4  6  unique_2
5   6  5  7  unique_2
6   7  6  8  unique_2
7   8  7  9  unique_2
8   9  8 10  unique_2
9  10  9 11  unique_2
10 11 10 12  unique_2
dfs<-unique(dfs)
names(dfs) <- c(paste0('unique_', 1:length(unique(dfs)))) 
dfs

$unique_1
    a  b  c
1   1  2  3
2   2  3  4
3   3  4  5
4   4  5  6
5   5  6  7
6   6  7  8
7   7  8  9
8   8  9 10
9   9 10 11
10 10 11 12

$unique_2
    b  a  c
1   2  1  3
2   3  2  4
3   4  3  5
4   5  4  6
5   6  5  7
6   7  6  8
7   8  7  9
8   9  8 10
9  10  9 11
10 11 10 12

We can use lapply to iterate over unique_df and transform to add new column

unique_df <- unique(dfs)

lapply(seq_along(unique_df), function(i) 
      transform(unique_df[[i]], unique_id = paste0("unique", i)))


#[[1]]
#    a  b  c unique_id
#1   1  2  3   unique1
#2   2  3  4   unique1
#3   3  4  5   unique1
#4   4  5  6   unique1
#5   5  6  7   unique1
#6   6  7  8   unique1
#7   7  8  9   unique1
#8   8  9 10   unique1
#9   9 10 11   unique1
#10 10 11 12   unique1

#[[2]]
#    b  a  c unique_id
#1   2  1  3   unique2
#2   3  2  4   unique2
#3   4  3  5   unique2
#4   5  4  6   unique2
#5   6  5  7   unique2
#6   7  6  8   unique2
#7   8  7  9   unique2
#8   9  8 10   unique2
#9  10  9 11   unique2
#10 11 10 12   unique2

Or in a tidyverse way.

library(dplyr)
purrr::map(seq_along(unique_df), ~unique_df[[.x]] %>% mutate(unique_id = .x))

We can use match to get the index of unique elements and with Map create the column in base R

Map(cbind,  dfs, unique_df = paste0("unique_", match(dfs, unique(dfs))))
#$df1
#    a  b  c unique_df
#1   1  2  3  unique_1
#2   2  3  4  unique_1
#3   3  4  5  unique_1
#4   4  5  6  unique_1
#5   5  6  7  unique_1
#6   6  7  8  unique_1
#7   7  8  9  unique_1
#8   8  9 10  unique_1
#9   9 10 11  unique_1
#10 10 11 12  unique_1

#$df2
#    a  b  c unique_df
#1   1  2  3  unique_1
#2   2  3  4  unique_1
#3   3  4  5  unique_1
#4   4  5  6  unique_1
#5   5  6  7  unique_1
#6   6  7  8  unique_1
#7   7  8  9  unique_1
#8   8  9 10  unique_1
#9   9 10 11  unique_1
#10 10 11 12  unique_1

#$df3
#    b  a  c unique_df
#1   2  1  3  unique_2
#2   3  2  4  unique_2
#3   4  3  5  unique_2
#4   5  4  6  unique_2
#5   6  5  7  unique_2
#6   7  6  8  unique_2
#7   8  7  9  unique_2
#8   9  8 10  unique_2
#9  10  9 11  unique_2
#10 11 10 12  unique_2

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM