使用 R 中的索引提取數據幀值

Question

我有 100 多個文件，每個文件都有開始和結束坐標。 因此，基於開始和結束坐標，我想從所有數據集中提取區域並希望存儲在文件中。 我使用了以下方法，但它沒有給我預期的輸出。 startco分別具有1st 2nd 3rd文件的起始索引， endco分別具有1st 2nd 3rd文件的結束索引。 如果索引超出了索引想要放置的文件NA

Example:
startco<-c(3,4,1)
endco<-c(5,6,2)
ctc<-c(1,2,3)

for (hm0 in 1:length(ctc)) {
  for (hm1 in 1:length(startco)) {
    for (hm2 in 1:length(endco)) {
  
  methd1<-read.table( paste0("path/to folder/","file_",ctc[hm0],".txt"))
  methd2<- methd1[,startco[hm1]:endco[hm2]]
  
}
  }
}

File_1.txt

V1   V2   V3   V4   V5
41   42    43   45    46
0.31 0.21 0.87 0.65 0.54    
0.32 0.28 0.74 0.87 0.65 
0.19 0.12 0.99 0.99 0.89 

File_2.txt

V1   V2   V3   V4   V5
12    24   13    14   16
0.89 0.78 0.50 0.22 0.34
0.54 0.78 0.50 0.34 0.41
0.78 0.54 0.66 0.26 0.14

File_3.txt

V1   V2   V3   V4   V5
1    2     3    5    6
0.20 0.40 0.50 0.49 0.52 


Expected output :
43   45    46
0.87 0.65 0.54
0.74 0.87 0.65 
0.99 0.99 0.89 
0.22 0.34 NA
0.34 0.41 NA
0.99 0.89 NA
 1 2
 0.20 0.40

Answer 1

在 Base R 中，您可以執行以下操作：

 fun <- function(path, start, end){
  id <- basename(path)
  dat <- read.table(path, header = TRUE)
  p <- ncol(dat)
  n <- nrow(dat)
  neg <- if(start<0) -start else 0 
  add <- matrix(nrow = n, ncol = neg)
  if (start < 1) start <- 1
  if (end > p) end <- p
  d <- cbind(add, dat[, start:end])
  names(d) <- paste0('X', seq(ncol(d)))
  cbind(id,r = seq(nrow(d)),  d)
}

startco<-c(3,4,-2) # TAKES NEGATIVE INDICES
endco<-c(5,6,2)
ctc<-c(1,2,3)

files <- file.path('path/to/folder', ctc)
A <- Map(fun, files, startco, endco)
Reduce(function(x, y)merge(x,y, all =TRUE), A)[, -(1:2)]

     X1    X2    X3  X4
1  43.00 45.00 46.00  NA
2   0.87  0.65  0.54  NA
3   0.74  0.87  0.65  NA
4   0.99  0.99  0.89  NA
5  14.00 16.00    NA  NA
6   0.22  0.34    NA  NA
7   0.34  0.41    NA  NA
8   0.26  0.14    NA  NA
9     NA    NA  1.00 2.0
10    NA    NA  0.20 0.4

沒有底片的那個

startco<-c(3,4,1)
B <- Map(fun, files, startco, endco)
Reduce(function(x, y)merge(x,y, all =TRUE), B)[, -(1:2)]
     X1    X2    X3
1  43.00 45.00 46.00
2   0.87  0.65  0.54
3   0.74  0.87  0.65
4   0.99  0.99  0.89
5  14.00 16.00    NA
6   0.22  0.34    NA
7   0.34  0.41    NA
8   0.26  0.14    NA
9   1.00  2.00    NA
10  0.20  0.40    NA

Answer 2

我會使用readfun ，

readfun <- \(i, s, e) {
  stopifnot(s != 0)
  r <- read.table(paste0("foo1/", "file_", i, ".txt"), header=TRUE)
  if (e > ncol(r)) {  ## inserts cols to the right if e > ncol
    e1 <- e - ncol(r)
    nm <- paste0('V', as.numeric(substring(colnames(r), 2)[ncol(r)]) + seq_len(e1))
    m <- matrix(NA_real_, nrow(r), e1, dimnames=list(NULL, nm))
    r <- cbind(r, m)
  }
  if (s < 0) {  ## inserts cols to the left if s < 0
    m <- matrix(NA_real_, nrow(r), -s)
    r <- cbind(m, r)
    e <- e + -s
    s <- 1
  }
  out <- r[, s:e]
  unname(as.matrix(out))
}

在Map 。

ctc <- c(1, 2, 3); startco <- c(3, 4, -2); endco <- c(5, 6, 2)

Map(readfun, ctc, startco, endco)
# [[1]]
#       [,1]  [,2]  [,3]
# [1,] 43.00 45.00 46.00
# [2,]  0.87  0.65  0.54
# [3,]  0.74  0.87  0.65
# [4,]  0.99  0.99  0.89
# 
# [[2]]
#       [,1]  [,2] [,3]
# [1,] 14.00 16.00   NA
# [2,]  0.22  0.34   NA
# [3,]  0.34  0.41   NA
# [4,]  0.26  0.14   NA
# 
# [[3]]
#      [,1] [,2] [,3] [,4]
# [1,]   NA   NA  1.0  2.0
# [2,]   NA   NA  0.2  0.4

數據：

dir.create('foo1')

write.table(read.table(header=TRUE, text='
V1   V2   V3   V4   V5
41   42    43   45    46
0.31 0.21 0.87 0.65 0.54    
0.32 0.28 0.74 0.87 0.65 
0.19 0.12 0.99 0.99 0.89'), './foo1/file_1.txt', row.names=F)

write.table(read.table(header=TRUE, text='
V1   V2   V3   V4   V5
12    24   13    14   16
0.89 0.78 0.50 0.22 0.34
0.54 0.78 0.50 0.34 0.41
0.78 0.54 0.66 0.26 0.14'), './foo1/file_2.txt', row.names=F)

write.table(read.table(header=TRUE, text='
V1   V2   V3   V4   V5
1    2     3    5    6
0.20 0.40 0.50 0.49 0.52 '), './foo1/file_3.txt', row.names=F)

使用 R 中的索引提取數據幀值

問題描述

2 個解決方案

解決方案1
1 已采納 2022-07-14 19:33:08

解決方案2
1 2022-07-14 19:50:40

使用 R 中的索引提取數據幀值

問題描述

2 個解決方案

解決方案1 1 已采納 2022-07-14 19:33:08

解決方案2 1 2022-07-14 19:50:40

解決方案1
1 已采納 2022-07-14 19:33:08

解決方案2
1 2022-07-14 19:50:40