繁体   English   中英

如何将 data.frame 中的变量循环到另一个列中

[英]How to loop variables from a data.frame into another into a single column

我试图从data.frame dat仅提取 32 个特定物种,并创建另一个data.frame将所有species放入单个col ,同时我还提取yearvaluestemperature并将它们放入单个列中。 我还放置了属于每个月份的月份。

data.frame一个例子:

structure(list(Year = c(1994L, 1995L, 1996L, 1997L, 1998L, 1999L, 
2000L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L, 2008L, 2009L, 
2010L, 2011L, 2012L, 2013L), Species = structure(c(1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), .Label = "Blackbird", class = "factor"), Farmland = c(96.0309523809524, 
96.8520833333333, 96.781746031746, 96.8597222222222, 97.4410299003322, 
96.6654846335697, 96.858803986711, 97.0811403508772, 96.9259974259974, 
97.2803571428571, 96.6017598343685, 96.3777777777778, 96.3227670288895, 
96.8100546279118, 96.431746031746, 96.6232323232323, 96.2537878787879, 
96.1431827431827, 96.0778288740245), X.Jan. = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L), .Label = "Jan", class = "factor"), atwo.TempJanuary = c(5.06916107894286, 
4.390669300225, 3.88357903166667, 1.80642228995455, 5.16489863448837, 
5.54367179174468, 4.83031500397674, 5.40830211455263, 4.26790743608108, 
4.927588606725, 5.841963431, 4.3303368412, 7.08188921457143, 
6.75067792993878, 2.83417096753488, 1.36880495640909, 4.35569636247727, 
5.82305364068889, 3.52697043756522)), row.names = c(NA, -19L), class = "data.frame")

一个额外的例子(这是原始的data.frame dat ):

structure(list(Year = c(2006L, 2007L, 1999L, 2004L, 1995L, 2011L, 
2011L), Species = structure(c(2L, 4L, 3L, 6L, 2L, 5L, 1L), .Label = c("Buzzard", 
"Collared Dove", "Greenfinch", "Linnet", "Meadow Pipit", "Willow Warbler"
), class = "factor"), TempJanuary = c(2.128387049, 4.233225712, 
5.270967624, 4.826451505, 4.390322483, 3.841290237, 3.981290234
), TempFebruary = c(0.927499979, 3.098928502, 4.67428561, 5.05103437, 
6.343214144, 6.414285571, 6.625356995), TempMarch = c(1.637741899, 
3.22096767, 7.312257901, 6.444515985, 5.337096655, 6.787741784, 
7.052903068), TempApril = c(4.877333224, 5.888999868, 9.510666454, 
9.386333124, 9.005333132, 12.40966639, 12.50166639), TempMay = c(8.729999805, 
7.748064343, 13.09096745, 12.1638707, 11.68935458, 12.83032229, 
13.07967713), TempJune = c(11.48033308, 11.20633308, 13.91166636, 
15.77399965, 14.05266635, 14.30733301, 14.56133301), TempJuly = c(14.86354805, 
11.9338707, 17.85612863, 16.44451576, 18.92935442, 15.53612868, 
15.75161255), TempAugust = c(12.45225779, 11.48419329, 16.54935447, 
18.31516088, 19.22483828, 15.80225771, 16.08387061), TempSeptember = c(13.45633303, 
10.09333311, 15.94333298, 15.27299966, 13.52733303, 15.41933299, 
15.68566632), TempOctober = c(10.24387074, 7.462903059, 10.5161288, 
10.84709653, 13.05967713, 12.67774165, 12.83967713), TempNovember = c(4.650999896, 
3.614999919, 7.246333171, 7.388666502, 7.455999833, 9.371333124, 
9.511333121), TempDecember = c(3.764516045, 2.116774146, 4.268064421, 
4.825161182, 2.01741931, 5.582903101, 5.701290195), Farmland = c(100L, 
100L, 40L, 90L, 80L, 10L, 80L)), row.names = c(1L, 100L, 1000L, 
2000L, 3000L, 5000L, 10000L), class = "data.frame")

再看看data.frame

'data.frame':   19 obs. of  5 variables:
 $ Year            : int  1994 1995 1996 1997 1998 1999 2000 2002 2003 2004 ...
 $ Species         : Factor w/ 1 level "Blackbird": 1 1 1 1 1 1 1 1 1 1 ...
 $ Farmland        : num  96 96.9 96.8 96.9 97.4 ...
 $ X.Jan.          : Factor w/ 1 level "Jan": 1 1 1 1 1 1 1 1 1 1 ...
 $ atwo.TempJanuary: num  5.07 4.39 3.88 1.81 5.16 ...

深入了解dat

      Year        Species TempJanuary TempFebruary TempMarch TempApril
1     2006  Collared Dove    2.128387     0.927500  1.637742  4.877333
100   2007         Linnet    4.233226     3.098929  3.220968  5.889000
1000  1999     Greenfinch    5.270968     4.674286  7.312258  9.510666
2000  2004 Willow Warbler    4.826452     5.051034  6.444516  9.386333
3000  1995  Collared Dove    4.390322     6.343214  5.337097  9.005333
5000  2011   Meadow Pipit    3.841290     6.414286  6.787742 12.409666
10000 2011        Buzzard    3.981290     6.625357  7.052903 12.501666
        TempMay TempJune TempJuly TempAugust TempSeptember TempOctober
1      8.730000 11.48033 14.86355   12.45226      13.45633   10.243871
100    7.748064 11.20633 11.93387   11.48419      10.09333    7.462903
1000  13.090967 13.91167 17.85613   16.54935      15.94333   10.516129
2000  12.163871 15.77400 16.44452   18.31516      15.27300   10.847097
3000  11.689355 14.05267 18.92935   19.22484      13.52733   13.059677
5000  12.830322 14.30733 15.53613   15.80226      15.41933   12.677742
10000 13.079677 14.56133 15.75161   16.08387      15.68567   12.839677
      TempNovember TempDecember Farmland
1         4.651000     3.764516      100
100       3.615000     2.116774      100
1000      7.246333     4.268064       40
2000      7.388667     4.825161       90
3000      7.456000     2.017419       80
5000      9.371333     5.582903       10
10000     9.511333     5.701290       80

以下是我用来到达这里的一些代码示例:

#Blackbird population-------------------------------------------------------------
Black_Bird<-aggregate(Farmland ~ Year + Species + TempJanuary, dat[dat$Species=="Blackbird" & dat$Farmland >80,],mean)
Black_bird <- ddply(Black_Bird, .(Year, Species, TempJanuary), Farmland=round(mean(Farmland), 2))
aone<-aggregate(Farmland ~ Year + Species, Black_bird, mean)
atwo<-aggregate(TempJanuary ~ Year + Species, Black_bird, mean)
aone<-aone[, -2]
#Buzzard Population-----------
Buzzard_Bird <-aggregate(Farmland ~ Year + Species + TempJanuary, dat[dat$Species=="Buzzard" & dat$Farmland >80,],mean)
Buzzard_bird <- ddply(Buzzard_Bird, .(Year, Species, TempJanuary), Farmland=round(mean(Farmland), 2))
athree<-aggregate(Farmland ~ Year + Species, Buzzard_bird, mean)
afour<-aggregate(TempJanuary ~ Year + Species, Buzzard_bird, mean)
athree<-athree[, -2]
#Combine and melt into single columns-----------------------------------------------------
mod1<-cbind(atwo, afour, aone, athree)
melt(mod1, id.vars = c("Year", "Farmland", "Species"), measure.vars = c("TempJanuary"), variable.name = "Month", value.name = "Temperature" )

melt没有有效地工作,它似乎没有将 buzzard 与 Blackbird 放在同一列中。 它停在第 19 行并切断。 这似乎无效且耗时。 有没有更快更有效的解决方案?

它应该是这样的:

    Year    Species Farmland Month Temperature
    2008  Blackbird     83.0   Jan    9.011174
    2009  Blackbird     83.0   Jan   10.155201
    2012 Greenfinch     83.0   Feb    9.578269
    2009    Swallow     83.0   Mar   10.361573
    2010      Robin     84.5   Oct    9.191641

我有 32 种可供选择:

 [1] Dunnock          Blackbird        Song Thrush      Bullfinch       
 [5] Corn Bunting     Turtle Dove      Grey Partridge   Yellow Wagtail  
 [9] Starling         Linnet           Yellowhammer     Skylark         
[13] Kestrel          Reed Bunting     Whitethroat      Greenfinch      
[17] Rook             Stock dove       Goldfinch        Woodpigeon      
[21] Jackdaw          House martin     Swallow          Lapwing         
[25] Wren             Robin            Blue Tit         Great tit       
[29] Long-tailed Tit  Chaffinch        Buzzard          Sparrowhawk     
32 Levels: Blackbird Blue Tit  Bullfinch Buzzard ... Yellowhammer

1 月至 12 月有 12 个月的温度。


这些是以前的一些代码,使我走向错误的方向:

library(psych)
dat_two <- aggregate(Farmland ~ Species + Year + TempJanuary + TempFebruary + TempMarch + TempApril + TempMay + TempJune + TempJuly + TempAugust + TempSeptember + TempOctober + TempNovember + TempDecember, dat[dat$Species %in% c('Starling', 'Skylark', 'Yellow Wagtail', 'Kestrel', 'Yellowhammer', 'Greenfinch', 'Swallow', 'Lapwing', 'House Martin', 'Long-tailed Tit', 'Linnet', 'Grey Partridge', 'Turtle Dove', 'Corn Bunting', 'Bullfinch', 'Song Thrush', 'Blackbird', 'Dunnock', 'Whitethroat', 'Rook', 'Woodpigeon', 'Reed Bunting', 'Stock Dove', 'Goldfinch', 'Jackdaw', 'Wren', 'Robin', 'Blue Tit', 'Great Tit', 'Chaffinch', 'Buzzard', 'Sparrowhawk') & dat$Farmland >80,], mean)
dat_three <- aggregate(Farmland ~ Species + Year + TempJanuary + TempFebruary + TempMarch + TempApril + TempMay + TempJune + TempJuly + TempAugust + TempSeptember + TempOctober + TempNovember + TempDecember , dat_two, mean)
colnames(dat_two) <- c("Species", "Year", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "Farmland")
library(plyr)
dat_one <- ddply(dat_three, .(Species, Year, TempJanuary, TempFebruary, TempMarch, TempApril, TempMay, TempJune, TempJuly, TempAugust, TempSeptember, TempOctober, TempNovember, TempDecember), summarise, mean = round(mean(Farmland), 2))
#-----------------------------------------------------------------
Jan_Year <- ddply(dat_one, .(Year), summarise, TempJanuary=round(geometric.mean(TempJanuary, na.rm=TRUE), 2))
Feb_Year <- ddply(dat_one, .(Year), summarise, TempFebruary=round(geometric.mean(TempFebruary, na.rm=TRUE), 2))
Mar_Year <- ddply(dat_one, .(Year), summarise, TempMarch=round(geometric.mean(TempMarch, na.rm=TRUE), 2))
Apr_Year <- ddply(dat_one, .(Year), summarise, TempApril=round(geometric.mean(TempApril, na.rm=TRUE), 2))
May_Year <- ddply(dat_one, .(Year), summarise, TempMay=round(geometric.mean(TempMay, na.rm=TRUE), 2))
Jun_Year <- ddply(dat_one, .(Year), summarise, TempJune=round(geometric.mean(TempJune, na.rm=TRUE), 2))
Jun_Year <- ddply(dat_one, .(Year), summarise, TempJune=round(geometric.mean(TempJune, na.rm=TRUE), 2))
Jul_Year <- ddply(dat_one, .(Year), summarise, TempJuly=round(geometric.mean(TempJuly, na.rm=TRUE), 2))
Aug_Year <- ddply(dat_one, .(Year), summarise, TempAugust=round(geometric.mean(TempAugust, na.rm=TRUE), 2))
Sep_Year <- ddply(dat_one, .(Year), summarise, TempSeptember=round(geometric.mean(TempSeptember, na.rm=TRUE), 2))
Oct_Year <- ddply(dat_one, .(Year), summarise, TempOctober=round(geometric.mean(TempOctober, na.rm=TRUE), 2))
Nov_Year <- ddply(dat_one, .(Year), summarise, TempNovember=round(geometric.mean(TempNovember, na.rm=TRUE), 2))
Dec_Year <- ddply(dat_one, .(Year), summarise, TempDecember=round(geometric.mean(TempDecember, na.rm=TRUE), 2))
Farm_Year <- ddply(dat_one, .(Year), summarise, Farmland=round(geometric.mean(mean, na.rm=TRUE), 2))
Farm_Temp <- cbind(Farm_Year, Jan_Year, Feb_Year, Mar_Year, Apr_Year,May_Year, Jun_Year, Jul_Year, Aug_Year, Sep_Year, Oct_Year, Nov_Year, Dec_Year)
Farm_Temp <- Farm_Temp[, !duplicated(colnames(Farm_Temp))]
colnames(Farm_Temp) <- c("Year", "Farmland", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
Farm_Temp <- Farm_Temp[, -2]
#-----------------------------
Spring <- aggregate((TempMarch + TempApril + TempMay)/3~Year, Farm_Temp, mean)
Summer <- aggregate((TempJune + TempJuly + TempAugust)/3 ~ Year, Farm_Temp, geometric.mean)
Autumn <- aggregate((TempSeptember + TempOctober+TempNovember)/3~Year, Farm_Temp, geometric.mean)
Winter <- aggregate((TempDecember + TempJanuary + TempFebruary)/3~Year, Farm_Temp, geometric.mean)
Season_Temp <- cbind(Farm_Year, Spring,Summer, Autumn, Winter)
Season_Temp <- Season_Temp[, !duplicated(colnames(Season_Temp))]
colnames(Season_Temp) <- c("Year", "Farmland", "spring", "Summer", "Autumn", "Winter")
#--------------------------------------------------------------------------------------------------------------

library(reshape2)

Season_practice <- aggregate((Mar+ Apr + May)/3 ~ Year + Species + Farmland, dat_two, geometric.mean)
prac1 <- aggregate((Jun+ Jul + Aug)/3 ~ Year + Species + Farmland, dat_two, geometric.mean)
prac1 <- prac1[, c(-1, -2, -3)]
prac2 <- aggregate((Sep + Oct + Nov)/3 ~ Year + Species + Farmland, dat_two, geometric.mean)
prac2 <- prac2[, c(-1, -2, -3)]
prac3 <- aggregate((Dec+ Jan + Feb)/3 ~ Year + Species + Farmland, dat_two, geometric.mean)
prac3 <- prac3[, c(-1, -2, -3)]
Season_practice <- cbind(Season_practice, prac1, prac2, prac3)
colnames(Season_practice) <- c("Year", "Species", "Farmland", "Spring", "Summer", "Autumn", "Winter")
Seasonal_Temp <- melt(Season_practice, id.vars = c("Year", "Species", "Farmland"), measure = c("Spring", "Summer", "Autumn", "Winter"), variable.name = "Month", value.name = "Temperature")

Practicing_Temp <- melt(dat_two, id.vars = c("Year", "Species"), measure = c('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), variable.name = "Month", value.name = "Temperature")

这是从更大的data.framedata.frame ,试图完成所提到的操作。 正如你所看到的,关于价值观的季节正在重复,这不应该是因为月份有不同的价值观,所以我一定是在路上出错了:

 Year   Species  Month Farmland
1  1994 Blackbird Spring 95.96875
2  1995 Blackbird Spring 95.46875
3  1996 Blackbird Spring 95.64815
4  1997 Blackbird Spring 95.62071
5  1998 Blackbird Spring 95.71925
6  1999 Blackbird Spring 95.74444
7  2000 Blackbird Spring 95.82440
8  2002 Blackbird Spring 95.78333
9  2003 Blackbird Spring 95.61640
10 2004 Blackbird Spring 95.86797
11 2005 Blackbird Spring 95.08452
12 2006 Blackbird Spring 94.66667
13 2007 Blackbird Spring 95.60745
14 2008 Blackbird Spring 93.98383
15 2009 Blackbird Spring 95.08167
16 2010 Blackbird Spring 95.23426
17 2011 Blackbird Spring 95.25000
18 2012 Blackbird Spring 94.75204
19 2013 Blackbird Spring 94.28821
20 1994 Blackbird Summer 95.96875
21 1995 Blackbird Summer 95.46875
22 1996 Blackbird Summer 95.64815
23 1997 Blackbird Summer 95.62071
24 1998 Blackbird Summer 95.71925
25 1999 Blackbird Summer 95.74444
26 2000 Blackbird Summer 95.82440
27 2002 Blackbird Summer 95.78333
28 2003 Blackbird Summer 95.61640
29 2004 Blackbird Summer 95.86797
30 2005 Blackbird Summer 95.08452
31 2006 Blackbird Summer 94.66667
32 2007 Blackbird Summer 95.60745
33 2008 Blackbird Summer 93.98383
34 2009 Blackbird Summer 95.08167
35 2010 Blackbird Summer 95.23426
36 2011 Blackbird Summer 95.25000
37 2012 Blackbird Summer 94.75204
38 2013 Blackbird Summer 94.28821
39 1994 Blackbird Autumn 95.96875
40 1995 Blackbird Autumn 95.46875
41 1996 Blackbird Autumn 95.64815
42 1997 Blackbird Autumn 95.62071
43 1998 Blackbird Autumn 95.71925
44 1999 Blackbird Autumn 95.74444
45 2000 Blackbird Autumn 95.82440
46 2002 Blackbird Autumn 95.78333
47 2003 Blackbird Autumn 95.61640
48 2004 Blackbird Autumn 95.86797
49 2005 Blackbird Autumn 95.08452
50 2006 Blackbird Autumn 94.66667
51 2007 Blackbird Autumn 95.60745
52 2008 Blackbird Autumn 93.98383
53 2009 Blackbird Autumn 95.08167
54 2010 Blackbird Autumn 95.23426
55 2011 Blackbird Autumn 95.25000
56 2012 Blackbird Autumn 94.75204
57 2013 Blackbird Autumn 94.28821
58 1994 Blackbird Winter 95.96875
59 1995 Blackbird Winter 95.46875
60 1996 Blackbird Winter 95.64815
61 1997 Blackbird Winter 95.62071
62 1998 Blackbird Winter 95.71925
63 1999 Blackbird Winter 95.74444
64 2000 Blackbird Winter 95.82440
65 2002 Blackbird Winter 95.78333
66 2003 Blackbird Winter 95.61640
67 2004 Blackbird Winter 95.86797
68 2005 Blackbird Winter 95.08452
69 2006 Blackbird Winter 94.66667
70 2007 Blackbird Winter 95.60745
71 2008 Blackbird Winter 93.98383
72 2009 Blackbird Winter 95.08167
73 2010 Blackbird Winter 95.23426
74 2011 Blackbird Winter 95.25000
75 2012 Blackbird Winter 94.75204
76 2013 Blackbird Winter 94.28821

考虑使用reshape将数据从宽格式重新构建为长格式,然后按年、月或指定的季节aggregate

输入

Year,Species,TempJanuary,TempFebruary,TempMarch,TempApril,TempMay,TempJune,TempJuly,TempAugust,TempSeptember,TempOctober,TempNovember,TempDecember,Farmland
2006,Collared Dove,2.128387,0.9275,1.637742,4.877333,8.73,11.48033,14.86355,12.45226,13.45633,10.243871,4.651,3.764516,100
2007,Linnet,4.233226,3.098929,3.220968,5.889,7.748064,11.20633,11.93387,11.48419,10.09333,7.462903,3.615,2.116774,100
1999,Greenfinch,5.270968,4.674286,7.312258,9.510666,13.090967,13.91167,17.85613,16.54935,15.94333,10.516129,7.246333,4.268064,40
2004,Willow Warbler,4.826452,5.051034,6.444516,9.386333,12.163871,15.774,16.44452,18.31516,15.273,10.847097,7.388667,4.825161,90
1995,Collared Dove,4.390322,6.343214,5.337097,9.005333,11.689355,14.05267,18.92935,19.22484,13.52733,13.059677,7.456,2.017419,80
2011,Meadow Pipit,3.84129,6.414286,6.787742,12.409666,12.830322,14.30733,15.53613,15.80226,15.41933,12.677742,9.371333,5.582903,10
2011,Buzzard,3.98129,6.625357,7.052903,12.501666,13.079677,14.56133,15.75161,16.08387,15.68567,12.839677,9.511333,5.70129,80

电阻

bird_df = read.csv(...)

# RESHAPE WIDE TO LONG
r_df <- reshape(bird_df, varying = colnames(bird_df)[3:14], times = colnames(bird_df)[3:14],
                v.names = "Temperature", timevar = "Month",
                new.row.names = 1:1E5, direction = "long")

# ASSIGN COLUMNS
r_df$Month <- factor(substr(gsub("Temp", "", r_df$Month), 1, 3), levels = month.abb)

r_df$Season <- ifelse(r_df$Month %in% c("Mar", "Apr", "May"), "Spring",
                      ifelse(r_df$Month %in% c("Jun", "Jul", "Aug"), "Summer",
                             ifelse(r_df$Month %in% c("Sep", "Oct", "Nov"), "Autumn",
                                    ifelse(r_df$Month %in% c("Dec", "Jan", "Feb"), "Winter", NA)
                                    )
                             )
                      )

# RE-ORDER ROWS
r_df <- data.frame(with(r_df, r_df[order(Year, Month, Species),]),
                   row.names = NULL)

输出

head(r_df)
#   Year       Species Farmland Month Temperature id Season
# 1 1995 Collared Dove       80   Jan    4.390322  5 Winter
# 2 1995 Collared Dove       80   Feb    6.343214  5 Winter
# 3 1995 Collared Dove       80   Mar    5.337097  5 Spring
# 4 1995 Collared Dove       80   Apr    9.005333  5 Spring
# 5 1995 Collared Dove       80   May   11.689355  5 Spring
# 6 1995 Collared Dove       80   Jun   14.052670  5 Summer
# ...

aggregate(cbind(Temperature, Farmland) ~ Species + Year, r_df, mean)
#   Year        Species Temperature Farmland
# 1 2011        Buzzard   11.114639       80
# 2 1995  Collared Dove   10.419384       80
# 3 2006  Collared Dove    7.434402      100
# 4 1999     Greenfinch   10.512513       40
# 5 2007         Linnet    6.841882      100
# ...

aggregate(cbind(Temperature, Farmland) ~ Species + Year + Month, r_df, mean)
#    Year Month   Species Temperature Farmland
# 1  2011   Jan   Buzzard    3.981290       80
# 2  2011   Feb   Buzzard    6.625357       80
# 3  2011   Mar   Buzzard    7.052903       80
# 4  2011   Apr   Buzzard   12.501666       80
# 5  2011   May   Buzzard   13.079677       80
# ...

aggregate(cbind(Temperature, Farmland) ~ Species + Year + Season, r_df, mean)
#           Species Year Season Temperature Farmland
# 1   Collared Dove 1995 Autumn   11.347669       80
# 2      Greenfinch 1999 Autumn   11.235264       40
# 3  Willow Warbler 2004 Autumn   11.169588       90
# 4   Collared Dove 2006 Autumn    9.450400      100
# 5          Linnet 2007 Autumn    7.057078      100
# ...

我想这就是你要的? 你必须安装 tidyverse

library('tidyverse')
dat %>% 
  pivot_longer(matches('Temp'),
               names_to = 'Month',
               values_to = 'Temp',
               names_prefix = 'Temp')

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM