简体   繁体   中英

Assign specific value based on two column in R

I have a dataframe which looks like:

   Student_ID    Number  Position
    VB-123        10      2
    VB-456        15      5
    VB-789        25      25
    VB-889        12      2
    VB-965        15      7
    VB-758        45      9
    VB-245        25      25

I want to add new column and assign a value based on below conditions:

  1. If only Number is duplicate in entire dataframe then Assign A
  2. If only Position is duplicate in entire dataframe then assign B
  3. If both Number and Position are duplicate then assign C
  4. If none of the duplicate then assign D .

Output would looks like:

Student_ID    Number  Position   Assign
    VB-123        10      2      B
    VB-456        15      5      A
    VB-789        25      25     C
    VB-889        12      2      B
    VB-965        15      7      A
    VB-758        45      9      D
    VB-245        25      25     C

With dplyr,

library(dplyr)

students <- data.frame(Student_ID = c("VB-123", "VB-456", "VB-789", "VB-889", "VB-965", "VB-758", "VB-245"), 
                       Number = c(10L, 15L, 25L, 12L, 15L, 45L, 25L), 
                       Position = c(2L, 5L, 25L, 2L, 7L, 9L, 25L))

students2 <- students %>% 
    mutate_at(vars(Number, Position), funs(n = table(.)[as.character(.)])) %>% 
    mutate(Assign = case_when(Number_n > 1 & Position_n > 1 ~ 'C', 
                              Number_n > 1 ~ 'A', 
                              Position_n > 1 ~ 'B', 
                              TRUE ~ 'D'))

students2
#>   Student_ID Number Position Number_n Position_n Assign
#> 1     VB-123     10        2        1          2      B
#> 2     VB-456     15        5        2          1      A
#> 3     VB-789     25       25        2          2      C
#> 4     VB-889     12        2        1          2      B
#> 5     VB-965     15        7        2          1      A
#> 6     VB-758     45        9        1          1      D
#> 7     VB-245     25       25        2          2      C

As an alternative to the mutate_at line, you could use add_count twice, renaming as necessary. To remove the intermediary columns, tack on select(-matches('_n$')) .

You can more or less replicate the logic in base by assigning to subsets:

students2 <- cbind(students, lapply(students[2:3], function(x) table(x)[as.character(x)]))
students2$Assign <- 'D'
students2$Assign[students2$Number.Freq > 1 & students2$Position.Freq > 1] <- 'C'
students2$Assign[students2$Number.Freq > 1 & students2$Position.Freq == 1] <- 'A'
students2$Assign[students2$Number.Freq == 1 & students2$Position.Freq > 1] <- 'B'
students2[4:7] <- NULL

students2
#>   Student_ID Number Position Assign
#> 1     VB-123     10        2      B
#> 2     VB-456     15        5      A
#> 3     VB-789     25       25      C
#> 4     VB-889     12        2      B
#> 5     VB-965     15        7      A
#> 6     VB-758     45        9      D
#> 7     VB-245     25       25      C

Here is an option using base R . Create a list of column names as in the order of evaluatin ('l1'), pre assign 'D' to create the 'Assign' column in 'dat', loop through the sequence of 'l1', subset the columns of data based on the column names in 'l1', use duplicated to find the duplicate elements and reassign the 'Assign' column to the corresponding LETTER

l1 <- list("Number", "Position", c("Number", "Position"))
dat$Assign <- rep("D", nrow(dat))
for(i in seq_along(l1)){
    df <- dat[l1[[i]]]  
    i1 <- duplicated(df)|duplicated(df, fromLast = TRUE)
    dat$Assign <- replace(dat$Assign, i1, LETTERS[i])
}   

-output

dat
#  Student_ID Number Position Assign
#1     VB-123     10        2      B
#2     VB-456     15        5      A
#3     VB-789     25       25      C
#4     VB-889     12        2      B
#5     VB-965     15        7      A
#6     VB-758     45        9      D
#7     VB-245     25       25      C

A solution using .

library(dplyr)

dat2 <- dat %>% count(Number)
dat3 <- dat %>% count(Position)
dat4 <- dat %>% count(Number, Position)

dat5 <- dat %>%
  left_join(dat2, by = "Number") %>%
  left_join(dat3, by = "Position") %>%
  left_join(dat4, by = c("Number", "Position")) %>%
  mutate(Assign = case_when(
    n > 1               ~ "C",
    n.x > 1 & n.y == 1  ~ "A",
    n.y > 1 & n.x == 1  ~ "B",
    TRUE                ~ "D"
  )) %>%
  select(-n.x, -n.y, -n)
dat5
#   Student_ID Number Position Assign
# 1     VB-123     10        2      B
# 2     VB-456     15        5      A
# 3     VB-789     25       25      C
# 4     VB-889     12        2      B
# 5     VB-965     15        7      A
# 6     VB-758     45        9      D
# 7     VB-245     25       25      C

DATA

dat <- read.table(text = "Student_ID    Number  Position
    'VB-123'        10      2
    'VB-456'        15      5
    'VB-789'        25      25
    'VB-889'        12      2
    'VB-965'        15      7
    'VB-758'        45      9
    'VB-245'        25      25",
                  header = TRUE, stringsAsFactors = FALSE)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM