passing strings as arguments in ddply

Question

set.seed(1)
df <-data.frame(category=rep(LETTERS[1:5],each=10),superregion=sample(c("EMEA","LATAM","AMER","APAC"),100,replace=T),country=sample(c("Country1","Country2","Country3","Country4","Country5","Country6","Country7","Country8"),100,replace=T),market=sample(c("Market1","Market2","Market3","Market4","Market5","Market6","Market7","Market8","Market9","Market10","Market11","Market12"),100,replace=T),hospitalID=sample(seq(1,50,1),100,replace=T),IndicatorFlag=sample(seq(0,1,1),100,replace=T))

I'm trying to create a SummaryTab grouped by level and a binaryIndicator, where level is an argument that could be country or market. I hence want to combine the following examples into one, by making level an argument.

 SummaryTab1 = ddply(df, .(market,IndicatorFlag), summarize, counts=length(unique(hospitalID)))
 SummaryTab1 = ddply(df, .(country,IndicatorFlag), summarize, counts=length(unique(hospitalID)))

With level as an argument, I tried the following:

 level<-c("market")
 string<-paste(level,"IndicatorFlag",sep=" , ")
 SummaryTab1 = ddply(df,.(string) , summarize, counts=length(unique(hospitalID)))

which just gives a string

I also tried this

   SummaryTab1 =as.formula(paste0("ddply(df,.(",level,",IndicatorFlag),summarize, counts=length(unique(hospitalID)))"))

Any suggestions how to do this ?

What I'm trying to do is group by the level and the IndicatorFlag

In terms of what Miha suggested, I'm trying to do this (Neither works):

library(dplyr)

SumTab<-df %>% group_by_(my_level,IndicatorFlag) %>% summarise(counts = length(unique(hospitalID)))

SumTab<-ddply(df, .(my_level[2],IndicatorFlag), summarize, counts=length(unique(hospitalID)))

Answer 1

Is this what you are after...

library(plyr)
library(dplyr)

Data

set.seed(1)

df <-data.frame(category=rep(LETTERS[1:5],each=10),
                superregion=sample(c("EMEA","LATAM","AMER","APAC"),100,replace=T),
                country=sample(c("Country1","Country2","Country3","Country4","Country5","Country6","Country7","Country8"),100,replace=T),
                market=sample(c("Market1","Market2","Market3","Market4","Market5","Market6","Market7","Market8","Market9","Market10","Market11","Market12"),100,replace=T),
                hospitalID=sample(seq(1,50,1),100,replace=T),IndicatorFlag=sample(seq(0,1,1),100,replace=T))

Solution

dplyr

Solution updated with suggestions given by docendo discimus in comment section below.

# your level of choice
my_level <- "market"

# code
df %>%
  group_by_(my_level) %>%
  summarise(counts = n_distinct(hospitalID))

     market counts
1   Market1      5
2  Market10      4
3  Market11      3
4  Market12      9
5   Market2     12
6   Market3     10
7   Market4     12
8   Market5      8
9   Market6      9
10  Market7      7
11  Market8      4
12  Market9      5

# multiple levels
my_level <- c("market", "country", "IndicatorFlag")

  df %>%
  group_by_(.dots = my_level[c(1, 3)]) %>%
  summarise(counts = n_distinct(hospitalID))

     market IndicatorFlag counts
1   Market1             0      1
2   Market1             1      5
3  Market10             0      2
4  Market10             1      3
5  Market11             1      3
6  Market12             0      7
7  Market12             1      3
8   Market2             0      4
9   Market2             1     10
10  Market3             0      7
..      ...           ...    ...

 # using all levels

  df %>%
  group_by_(.dots = my_level) %>%
  summarise(counts = n_distinct(hospitalID))

     market  country IndicatorFlag counts
1   Market1 Country1             1      1
2   Market1 Country2             1      1
3   Market1 Country3             1      2
4   Market1 Country6             0      1
5   Market1 Country8             1      1
6  Market10 Country2             1      1
7  Market10 Country3             0      1
8  Market10 Country3             1      1
9  Market10 Country5             1      1
10 Market10 Country7             0      1
..      ...      ...           ...    ...

plyr

ddply(df, c(my_level[1], my_level[3]), 
      summarize, 
      counts = n_distinct(hospitalID)) %>%
  head(.)

    market IndicatorFlag counts
1  Market1             0      1
2  Market1             1      5
3 Market10             0      2
4 Market10             1      3
5 Market11             1      3
6 Market12             0      7

passing strings as arguments in ddply

Question

1 answers

solution1
1 ACCPTED 2015-03-12 08:46:41

Data

Solution

dplyr

plyr

passing strings as arguments in ddply

Question

1 answers

solution1 1 ACCPTED 2015-03-12 08:46:41

Data

Solution

dplyr

plyr

solution1
1 ACCPTED 2015-03-12 08:46:41