Create a new column in a dataframe with values based on another column

Question

I'd like to create a new column in an existing data frame that is filled based on the values in another column in the same data frame and based on specific criteria.

RAVE_ITN_BVAS_ADVIS3$subtype_ANCA_type_abr <-
  apply(
    RAVE_ITN_BVAS_ADVIS3$Subtype_ANCA_type,
    1,
    FUN = function(x) {
      if (x == "Wegener's Granulomatosis (WG)-PR3") {
        return("GPA_PR3")
      }

      if (x == "Wegener's Granulomatosis (WG)-MPO") {
        return("GPA_MPO")
      }

      if (x == "Microscopic Polyangiitis (MPA)-PR3") {
        return("MPA_PR3")
      }

      if (x == "Microscopic Polyangiitis (MPA)-MPO") {
        return("MPA_MPO")
      }

    }
  )

View(RAVE_ITN_BVAS_ADVIS3$Subtype_ANCA_type_abr)

I've tried the code above (apologies for the poor formatting, stackoverflow seems to have changed things up recently).

I keep getting the error:

Error in apply(RAVE_ITN_BVAS_ADVIS3$Subtype_ANCA_type, 1, FUN = function(x) { : 
  dim(X) must have a positive length

Any help here would be immensely appreciated, many thanks.

Answer 1

This is a strange place to use apply. R has a lot of other functions to help. I would look up some tutorials on this subject. There are a ton of similar questions on Stack Overflow.

The problem with your apply is that apply takes a dataframe and then runs through for each row. You are trying to use a vector as an input to the apply functions...and vectors don't have rows... Then each x in your function is a "named vector". You can call the appropriate item from the named vector using x["Subtype_ANCA_type"] . But I wouldn't solve the problem this way. Just wanted to explain how to use apply since that is the question you asked.

#************************************************************#
# The original question
RAVE_ITN_BVAS_ADVIS3 <- data.frame(
  Subtype_ANCA_type = rep(
    c("Wegener's Granulomatosis (WG)-PR3",
      "Wegener's Granulomatosis (WG)-MPO",
      "Microscopic Polyangiitis (MPA)-PR3",
      "Microscopic Polyangiitis (MPA)-MPO"),
    2
  ),
  stringsAsFactors = FALSE)

RAVE_ITN_BVAS_ADVIS3$subtype_ANCA_type_abr <-
  apply(
    RAVE_ITN_BVAS_ADVIS3,
    1,
    FUN = function(x) {
      if (x["Subtype_ANCA_type"] == "Wegener's Granulomatosis (WG)-PR3") {
        return("GPA_PR3")
      }

      if (x["Subtype_ANCA_type"] == "Wegener's Granulomatosis (WG)-MPO") {
        return("GPA_MPO")
      }

      if (x["Subtype_ANCA_type"] == "Microscopic Polyangiitis (MPA)-PR3") {
        return("MPA_PR3")
      }

      if (x["Subtype_ANCA_type"] == "Microscopic Polyangiitis (MPA)-MPO") {
        return("MPA_MPO")
      }
    }
  )

If you want to do things manually (like you are doing above) you can simply use the [] notation to identify where to put the new column data.

#************************************************************#
# Manually add new column for alternative variables
RAVE_ITN_BVAS_ADVIS3 <- data.frame(
  Subtype_ANCA_type = rep(
    c("Wegener's Granulomatosis (WG)-PR3",
      "Wegener's Granulomatosis (WG)-MPO",
      "Microscopic Polyangiitis (MPA)-PR3",
      "Microscopic Polyangiitis (MPA)-MPO"),
    2
  ),
stringsAsFactors = FALSE)

# For the rows in the dataframe where Subtype_ANCA_type == "something", fill the next column.
RAVE_ITN_BVAS_ADVIS3[RAVE_ITN_BVAS_ADVIS3$Subtype_ANCA_type == "Wegener's Granulomatosis (WG)-PR3" ,"subtype_ANCA_type_abr"] <- "GPA_PR3"
RAVE_ITN_BVAS_ADVIS3[RAVE_ITN_BVAS_ADVIS3$Subtype_ANCA_type == "Wegener's Granulomatosis (WG)-MPO" ,"subtype_ANCA_type_abr"] <- "GPA_MPO"
RAVE_ITN_BVAS_ADVIS3[RAVE_ITN_BVAS_ADVIS3$Subtype_ANCA_type == "Microscopic Polyangiitis (MPA)-PR3","subtype_ANCA_type_abr"] <- "MPA_PR3"
RAVE_ITN_BVAS_ADVIS3[RAVE_ITN_BVAS_ADVIS3$Subtype_ANCA_type == "Microscopic Polyangiitis (MPA)-MPO","subtype_ANCA_type_abr"] <- "MPA_MPO"

RAVE_ITN_BVAS_ADVIS3
#                    Subtype_ANCA_type subtype_ANCA_type_abr
# 1  Wegener's Granulomatosis (WG)-PR3               GPA_PR3
# 2  Wegener's Granulomatosis (WG)-MPO               GPA_MPO
# 3 Microscopic Polyangiitis (MPA)-PR3               MPA_PR3
# 4 Microscopic Polyangiitis (MPA)-MPO               MPA_MPO
# 5  Wegener's Granulomatosis (WG)-PR3               GPA_PR3
# 6  Wegener's Granulomatosis (WG)-MPO               GPA_MPO
# 7 Microscopic Polyangiitis (MPA)-PR3               MPA_PR3
# 8 Microscopic Polyangiitis (MPA)-MPO               MPA_MPO

If you're going to have a lot of these you may want to make a lookup table. You can even make the lookup table in a csv file using excel or some other source and read the table in using read.csv.

#************************************************************#
# Add new column from a lookup table
abv_lookup <- data.frame(
  Subtype_ANCA_type = c(
    "Wegener's Granulomatosis (WG)-PR3",
    "Wegener's Granulomatosis (WG)-MPO",
    "Microscopic Polyangiitis (MPA)-PR3",
    "Microscopic Polyangiitis (MPA)-MPO"
  ),
  subtype_ANCA_type_abr = c(
    "GPA_PR3",
    "GPA_MPO",
    "MPA_PR3",
    "MPA_MPO"
  ),
  stringsAsFactors = FALSE
)

RAVE_ITN_BVAS_ADVIS3 <- data.frame(
  Subtype_ANCA_type = rep(
    c("Wegener's Granulomatosis (WG)-PR3",
      "Wegener's Granulomatosis (WG)-MPO",
      "Microscopic Polyangiitis (MPA)-PR3",
      "Microscopic Polyangiitis (MPA)-MPO"),
    2
  ),
  stringsAsFactors = FALSE)

# Merge the two dataframes together by any common columns (Subtype_ANCA_type)
RAVE_ITN_BVAS_ADVIS3 <- merge(RAVE_ITN_BVAS_ADVIS3,abv_lookup)

RAVE_ITN_BVAS_ADVIS3
#                    Subtype_ANCA_type subtype_ANCA_type_abr
# 1 Microscopic Polyangiitis (MPA)-MPO               MPA_MPO
# 2 Microscopic Polyangiitis (MPA)-MPO               MPA_MPO
# 3 Microscopic Polyangiitis (MPA)-PR3               MPA_PR3
# 4 Microscopic Polyangiitis (MPA)-PR3               MPA_PR3
# 5  Wegener's Granulomatosis (WG)-MPO               GPA_MPO
# 6  Wegener's Granulomatosis (WG)-MPO               GPA_MPO
# 7  Wegener's Granulomatosis (WG)-PR3               GPA_PR3
# 8  Wegener's Granulomatosis (WG)-PR3               GPA_PR3

Create a new column in a dataframe with values based on another column

Question

1 answers

solution1
1 ACCPTED 2019-01-24 21:42:32

Create a new column in a dataframe with values based on another column

Question

1 answers

solution1 1 ACCPTED 2019-01-24 21:42:32

solution1
1 ACCPTED 2019-01-24 21:42:32