簡體   English   中英

無論如何,有什么方法可以使R代碼更有效?

[英]Is there anyway to make this R code more efficient?

我正在應用生育力模型,要執行此操作,需要為每個生育強度保存矩陣,我將其稱為mati ,根據其順序。 在這種情況下,i =(1、2、3,... n)下面的數據框是如何顯示我的數據的一個示例。 我的實際數據幀有525行和10列( "AGE" "year" "mat1" "mat2" "mat3" "mat4" "mat5" "mat6" "mat7" "mat8" )。

year <- c(rep(1998:2001, 4))
Age <- c(rep(15:18, 4))
mat1 <- c(rep(0.01, 16))
mat2 <- c(rep(0.012, 16))
mat3 <- c(rep(0.015, 16))
mat <- data.frame(year, Age, mat1, mat2, mat3)

mat

   year Age mat1  mat2  mat3
1  1998  15 0.01 0.012 0.015
2  1999  16 0.01 0.012 0.015
3  2000  17 0.01 0.012 0.015
4  2001  18 0.01 0.012 0.015
5  1998  15 0.01 0.012 0.015
6  1999  16 0.01 0.012 0.015
7  2000  17 0.01 0.012 0.015
8  2001  18 0.01 0.012 0.015
9  1998  15 0.01 0.012 0.015
10 1999  16 0.01 0.012 0.015
11 2000  17 0.01 0.012 0.015
12 2001  18 0.01 0.012 0.015
13 1998  15 0.01 0.012 0.015
14 1999  16 0.01 0.012 0.015
15 2000  17 0.01 0.012 0.015
16 2001  18 0.01 0.012 0.015

要執行以獲得最終的數值矩陣,我已經執行了以下代碼,但是需要很長時間。

##mat1###

library(dlyr)
library(tidyr)

mat1 <- #selecting just intensities of order 1 and creating matrices
  select(mat, Age, year, mat1) %>% 
  spread(year, mat1) 

names(mat1)[c(2:6)] <- paste0("year ", names(mat1[2:6])) #alter colnames
mat1[ ,1] <- paste0("age ", mat1[,1]) #alter the row from column "age"

mat_oe1 <- data.matrix(mat1[2:6])
dimnames(mat_oe1) <- list(c(mat1[,1]),
                          c(names(mat1[2:6])))
#Saving as txt to read i the model
write.table(mat_oe2, file = "mat_oe1.txt", sep = "\t",
            row.names = T, col.names = T)

##mat2
mat2 <- #selecting just intensities of order 1 and creating matrices
  select(mat, Age, year, mat2) %>% 
  spread(year, mat2) 

names(mat2)[c(2:6)] <- paste0("year ", names(mat2[2:6])) #alter colnames
mat2[ ,1] <- paste0("age ", mat2[,1]) #alter the row from column "age"

mat_oe2 <- data.matrix(mat2[2:6])
dimnames(mat_oe2) <- list(c(mat1[,1]),
                          c(names(mat1[2:6])))
#Saving as txt to read i the model
write.table(mat_oe2, file = "mat_oe2.txt", sep = "\t",
            row.names = T, col.names = T)

##mat3
mat3 <- #selecting just intensities of order 1 and creating matrices
  select(mat, Age, year, mat3) %>% 
  spread(year, mat3) 

names(mat3)[c(2:6)] <- paste0("year ", names(mat3[2:6])) #alter colnames
mat3[ ,1] <- paste0("age ", mat3[,1]) #alter the row from column "age"

mat_oe3 <- data.matrix(mat3[2:6])
dimnames(mat_oe3) <- list(c(mat3[,1]),
                          c(names(mat3[2:6])))
#Saving as txt to read i the model
write.table(mat_oe3, file = "mat_oe3.txt", sep = "\t",
            row.names = T, col.names = T)  

我使用點spread是因為我需要以下格式的數據:

mat1 

     1998        1999       2000       2001
15   0.01        0.01       0.01       0.01
16   0.01        0.01       0.01       0.01
17   0.01        0.01       0.01       0.01
18   0.01        0.01       0.01       0.01

我也開始編寫一個循環,但是我已經陷入了第一行。

mat_list <- list()
for(i in names(mat[,3:7])) {
  mat_list[[i]] <- data.frame(
                      spread(
                        select(mat, AGE, year, mat[[paste0("mat",i)]]), year, mat[[paste0("mat", i)]])) 

應用上面的代碼后,我獲得了以下結果:

view(mat1)
        year 1998  year 1999  year 2000  year 2001
age 15   0.01        0.01       0.01       0.01
age 16   0.01        0.01       0.01       0.01
age 17   0.01        0.01       0.01       0.01
age 18   0.01        0.01       0.01       0.01


view(mat2)
        year 1998  year 1999    year 2000    year 2001
age 15   0.012        0.012       0.012       0.012
age 16   0.012        0.012       0.012       0.012
age 17   0.012        0.012       0.012       0.012
age 18   0.012        0.012       0.012       0.012


view(mat3)
        year 1998  year 1999    year 2000    year 2001
age 15   0.015        0.015       0.015       0.015
age 16   0.015        0.015       0.015       0.015
age 17   0.015        0.015       0.015       0.015
age 18   0.015        0.015       0.015       0.015

我相信您想gather然后spread數據。 這使您可以分兩步執行所有操作。

library(dplyr)
library(tidyr)

mat %>%
  gather(key, value, -year, -Age)%>%
  spread(year, value)%>%
  group_split(key)

[[1]]
# A tibble: 4 x 6
    Age key   `1998` `1999` `2000` `2001`
  <int> <chr>        <dbl>        <dbl>        <dbl>        <dbl>
1    15 mat1          0.01         0.01         0.01         0.01
2    16 mat1          0.01         0.01         0.01         0.01
3    17 mat1          0.01         0.01         0.01         0.01
4    18 mat1          0.01         0.01         0.01         0.01

[[2]]
# A tibble: 4 x 6
    Age key   `1998` `1999` `2000` `2001`
  <int> <chr>        <dbl>        <dbl>        <dbl>        <dbl>
1    15 mat2         0.012        0.012        0.012        0.012
2    16 mat2         0.012        0.012        0.012        0.012
3    17 mat2         0.012        0.012        0.012        0.012
4    18 mat2         0.012        0.012        0.012        0.012

[[3]]
# A tibble: 4 x 6
    Age key   `1998` `1999` `2000` `2001`
  <int> <chr>        <dbl>        <dbl>        <dbl>        <dbl>
1    15 mat3         0.015        0.015        0.015        0.015
2    16 mat3         0.015        0.015        0.015        0.015
3    17 mat3         0.015        0.015        0.015        0.015
4    18 mat3         0.015        0.015        0.015        0.015

或者,您可以在基礎中執行此操作:

mats <- reshape(data = data.frame(year = mat$year,Age = mat$Age,  stack(mat, select = c('mat1', 'mat2', 'mat3')))
        , idvar = c('Age', 'ind'), timevar = c('year'), direction = 'wide')

mat_list <- split(mats, mats$ind)

mat_list

$mat1
  Age  ind values.1998 values.1999 values.2000 values.2001
1  15 mat1        0.01        0.01        0.01        0.01
2  16 mat1        0.01        0.01        0.01        0.01
3  17 mat1        0.01        0.01        0.01        0.01
4  18 mat1        0.01        0.01        0.01        0.01

$mat2
   Age  ind values.1998 values.1999 values.2000 values.2001
17  15 mat2       0.012       0.012       0.012       0.012
18  16 mat2       0.012       0.012       0.012       0.012
19  17 mat2       0.012       0.012       0.012       0.012
20  18 mat2       0.012       0.012       0.012       0.012

$mat3
   Age  ind values.1998 values.1999 values.2000 values.2001
33  15 mat3       0.015       0.015       0.015       0.015
34  16 mat3       0.015       0.015       0.015       0.015
35  17 mat3       0.015       0.015       0.015       0.015
36  18 mat3       0.015       0.015       0.015       0.015

數據我略微更改了您的數據,以便每個ID組合都是唯一的。

year <- rep(1998:2001, each = 4) #each was the change.
Age <- rep(15:18, 4)
mat1 <- rep(0.01, 16)
mat2 <- rep(0.012, 16)
mat3 <- rep(0.015, 16)
mat <- data.frame(year, Age, mat1, mat2, mat3)

擴大科爾的答案。

mat %>%
    gather("mat", "val", -year, -Age) %>%
    mutate(Age=paste("age",Age), year=paste("year",year)) %>%
    group_by(mat) %>%
    group_map(~spread(., year, val))

purrr :: group_map將函數應用於每個組,並返回一個列表,其中每個列表元素是應用於每個組的函數的結果。

# A tibble: 4 x 5
  Age    `year 1998` `year 1999` `year 2000` `year 2001`
  <chr>        <dbl>       <dbl>       <dbl>       <dbl>
1 age 15        0.01        0.01        0.01        0.01
2 age 16        0.01        0.01        0.01        0.01
3 age 17        0.01        0.01        0.01        0.01
4 age 18        0.01        0.01        0.01        0.01

[[2]]
# A tibble: 4 x 5
  Age    `year 1998` `year 1999` `year 2000` `year 2001`
  <chr>        <dbl>       <dbl>       <dbl>       <dbl>
1 age 15       0.012       0.012       0.012       0.012
2 age 16       0.012       0.012       0.012       0.012
3 age 17       0.012       0.012       0.012       0.012
4 age 18       0.012       0.012       0.012       0.012

[[3]]
# A tibble: 4 x 5
  Age    `year 1998` `year 1999` `year 2000` `year 2001`
  <chr>        <dbl>       <dbl>       <dbl>       <dbl>
1 age 15       0.015       0.015       0.015       0.015
2 age 16       0.015       0.015       0.015       0.015
3 age 17       0.015       0.015       0.015       0.015
4 age 18       0.015       0.015       0.015       0.015

這是使用Cole稍作修改的數據。

year <- rep(1998:2001, each = 4) #each was the change.
Age <- rep(15:18, 4)
mat1 <- rep(0.01, 16)
mat2 <- rep(0.012, 16)
mat3 <- rep(0.015, 16)
mat <- data.frame(year, Age, mat1, mat2, mat3)

第一次重塑到長

#add unique id to your data
mat$id=1:nrow(mat)
#reshape to long by mat
long1 = reshape_toLong(data = mat,id = "id",j = "all123",value.var.prefix = "mat")
#delet id column
long2=long1[,-1]

第二次重塑

#reshape wide by year
wide=reshape_toWide(data = long2,id = "all123",j = "year",value.var.prefix = "mat")

最后獲取數據

墊1

wide[wide$all123==1,]
   Age all123 mat1998 mat1999 mat2000 mat2001
1   15      1    0.01    0.01    0.01    0.01
4   16      1    0.01    0.01    0.01    0.01
8   17      1    0.01    0.01    0.01    0.01
12  18      1    0.01    0.01    0.01    0.01

墊2

wide[wide$all123==2,]
   Age all123 mat1998 mat1999 mat2000 mat2001
3   15      2   0.012   0.012   0.012   0.012
5   16      2   0.012   0.012   0.012   0.012
7   17      2   0.012   0.012   0.012   0.012
11  18      2   0.012   0.012   0.012   0.012

墊3

wide[wide$all123==3,]
   Age all123 mat1998 mat1999 mat2000 mat2001
2   15      3   0.015   0.015   0.015   0.015
6   16      3   0.015   0.015   0.015   0.015
9   17      3   0.015   0.015   0.015   0.015
10  18      3   0.015   0.015   0.015   0.015

在使用reshape_toLongreshape_toWide函數之前,您需要使用以下命令從我的github yikeshu0611安裝onetree

devtools::install_github("yikeshu0611/onetree")
library(onetree)

注意 :您提供的數據有問題,所以我使用由科爾更改的數據

year <- rep(1998:2001, each = 4) #each was the change.
Age <- rep(15:18, 4)
mat1 <- rep(0.01, 16)
mat2 <- rep(0.012, 16)
mat3 <- rep(0.015, 16)
mat <- data.frame(year, Age, mat1, mat2, mat3)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM