簡體   English   中英

R-匯總兩列

[英]R - aggregate two columns

我有一個看起來像這樣的數據框

 id1    id2    attr   
 ------------------
 11              a     
 11              a    
         11      a   
         11      b   
         11      c   
 22              a   
 22              a
         22      a
         22      a
 33              d
 44              e

我希望它看起來像這樣。 id1,id2是計數(頻率)。

id1    id2    attr   
 ------------------
 2              a     
        1       a  
        1       b
        1       c
 2              a
        2       a
 1              d
 1              e

差距中沒有值,因此如果需要,我可以用NA填充它。 我嘗試使用聚合函數,但無法獲得所需的輸出。 感謝您的幫助。

這是你的數據

dat<-structure(list(id1 = c(11L, 11L, NA, NA, NA, 22L, 22L, NA, NA, 
                        33L, 44L), id2 = c(NA, NA, 11L, 11L, 11L, NA, NA, 22L, 22L, NA, 
                                           NA), attr = structure(c(1L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L, 4L, 
                                                                   5L), .Label = c("a", "b", "c", "d", "e"), class = "factor")), .Names = c("id1", 
                                                                                                                                            "id2", "attr"), class = "data.frame", row.names = c(NA, -11L))

所需的輸出不是典型的,但這似乎可以使用'plyr'

library(plyr)

#use ddply and count to count the number of instances of each case in each id
temp<-ddply(dat, .(id1, id2), transform,
  freq = count(attr))

#only keep unique rows
temp<-unique(temp)

#need to create an id column for whether there is 11,22,33,44 in either id1 or id2
temp$id<-pmax(temp$id1, temp$id2, na.rm=TRUE)

#order the rows into desired order 
temp <- temp[order(temp$id, temp$attr),]

#use these ifelse statements to replace id1 and id2
temp$id1<-ifelse(is.na(temp$id1), NA, temp$freq.freq)
temp$id2<-ifelse(is.na(temp$id2), NA, temp$freq.freq)


#just keep variables you want
temp<-temp[c(1,2,3)]

temp



   id1 id2 attr
1    2  NA    a
7   NA   1    a
8   NA   1    b
9   NA   1    c
3    2  NA    a
10  NA   2    a
5    1  NA    d
6    1  NA    e

使用@jfreels使用dplyrdattally

library(dplyr)

dat1 <- dat%>% 
       group_by(id1,id2, attr) %>%
       tally()
dat2 <- dat %>% 
         unique() 

left_join(dat2,dat1) %>% 
 mutate(id1=ifelse(!is.na(id1), n, NA),id2=ifelse(!is.na(id2), n, NA)) %>% 
 select(-n)
#Joining by: c("id1", "id2", "attr")
 #  id1 id2 attr
#1   2  NA    a
#2  NA   1    a
#3  NA   1    b
#4  NA   1    c
#5   2  NA    a
#6  NA   2    a
#7   1  NA    d
#8   1  NA    e

此方法的結果未按照您想要的方式精確格式化,但可能更易於理解。

# load library
library(dplyr)

# your data
dat<-structure(list(id1 = c(11L, 11L, NA, NA, NA, 22L, 22L, NA, NA,33L, 44L), id2 = c(NA, NA, 11L, 11L, 11L, NA, NA, 22L, 22L, NA,NA), attr = structure(c(1L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L, 4L,5L), .Label = c("a", "b", "c", "d", "e"), class = "factor")), .Names = c("id1","id2", "attr"), class = "data.frame", row.names = c(NA, -11L))

# tally counts the number of observations
dat %>% 
  group_by(id1,id2,attr) %>%
  tally

# output
Source: local data frame [8 x 4]
Groups: id1, id2

  id1 id2 attr n
1  11  NA    a 2
2  22  NA    a 2
3  33  NA    d 1
4  44  NA    e 1
5  NA  11    a 1
6  NA  11    b 1
7  NA  11    c 1
8  NA  22    a 2

請原諒我可憐的R代碼,但是為了使您想要的成為可能,我不得不做一些非常規的事情。 不幸的是,該代碼可伸縮性不高。 當然可以改進它,但可以提供示例輸出。 唯一的區別是您的輸入值假定在空白處具有NA。

#Concatenate each row to a single value and find the unique rows
unique.pasted<-apply(rawdata[!duplicated(rawdata),],1,paste,collapse="-")

#Concatenate each row
pasted.rows<-apply(rawdata,1,paste,collapse="-")

#Get frequencies and maintain row order
frequencies<-table(pasted.rows)[unique.pasted]

#Separate id1 and id2
id1.freq<-frequencies
id1.freq[is.na(rawdata[!duplicated(rawdata),"id1"])]<-NA
id2.freq<-frequencies
id2.freq[is.na(rawdata[!duplicated(rawdata),"id2"])]<-NA

#Obtain the final table
final.table<-data.frame(id1=id1.freq,id2=id2.freq,attr=rawdata[!duplicated(rawdata),"attr"])

#Remove row names
row.names(final.table)<-NULL

#Replace NA with empty values
final.table[is.na(final.table)]<-""
final.table

 id1 id2 attr
1   2        a
2       1    a
3       1    b
4       1    c
5   2        a
6       2    a
7   1        d
8   1        e

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM