简体   繁体   中英

Matrix error in code converted from Matlab to R

To cross validation for CCLE (Cancer Cell Line Encyclopedia) drug data I tried to convert the following codes from matlab to R. However, I was unsuccessful. Matlab codes work fine and can create both a *cross.mat that is a group of 10 fold CV data for each data set and a *data.mat that is the grouped data of 10 times of CV of each data set.

I will be appreciate if you can help me find my mistake.

#This function is about 10-fold cross-validation data grouping
getcrossMatrixs <- function(MM){
  library(pracma)
  N <- nnz(MM)
  zeroM <- matrix(0L, nrow = dim(MM)[1], ncol = dim(MM)[2])
  D <- randperm(N)
  first <- floor(N/10) 
  
  w = which(MM != 0, arr.ind=TRUE);
  nrows=w[,1]; ncols=w[,2]
  crossdata  <-  list() 
  
  for (i in 1:10) {
    crossdata[[i]] <- zeroM
  }
  
  for (i in 1:10){
    for (j in (1+(i-1)*first):(i*first)){
      crossdata[[i]][c(nrows[D[j]]),c(ncols[D[j]]) ] <- MM[c(nrows[D[j]]),c(ncols[D[j]])]
      
    }
  }
  k <- (N-(10*first))
  i <- 10*first+1
  for (j in 1:k){
    crossdata[[j]][c(nrows[D[i]]),c(ncols[D[i]]) ] <- MM[c(nrows[D[i]]),c(ncols[D[i]])]
    
    i <- i+1
  }
}

#The following lines is the main for calling above function. 
library(foreach)
n.cores <- parallel::detectCores()
my.cluster <- parallel::makeCluster(
  n.cores, 
  type = "PSOCK"
)
print(my.cluster)
#> socket cluster with 16 nodes on host 'localhost'
doParallel::registerDoParallel(cl = my.cluster)
foreach::getDoParRegistered()
#> [1] TRUE
CCLEdata <- list()
#MM<-matrix(read_csv("MM.csv", col_names = FALSE, show_col_types = FALSE), rownames.force = NA)
MM <- matrix(seq(0, 4.5, length.out = 11784), nrow = 491) #datamatrix like CCLE drug activity area sensitivity matrrix(491*24)
foreach(i = 1:10) %dopar% {
  CCLEcross <- getcrossMatrixs(MM)
  CCLEdata[[i]] <- CCLEcross
}
#> [[1]]
#> NULL
#> 
#> [[2]]
#> NULL
#> 
#> [[3]]
#> NULL
#> 
#> [[4]]
#> NULL
#> 
#> [[5]]
#> NULL
#> 
#> [[6]]
#> NULL
#> 
#> [[7]]
#> NULL
#> 
#> [[8]]
#> NULL
#> 
#> [[9]]
#> NULL
#> 
#> [[10]]
#> NULL

Created on 2022-08-29 with reprex v2.0.2

Actually when I use the original CCLE dataset the error is changing in the main.R:

Error in { : task 1 failed - "is.numeric(x) || is.complex(x) is not TRUE"
or
Error in { : 
  task 1 failed - "attempt to select less than one element in integerOneIndex"
%These are from Matlab
function [crossdata] = getcrossMatrixs(MM)
N = nnz(MM(:)); 
zeroM = zeros(size(MM));
D = randperm(N); 
first = floor(N/10); 
[nrows,ncols] = find(MM); 
crossdata = {};
for i = 1:10
    crossdata{i} = zeroM;
end
for i = 1:10
    for j = 1+(i-1)*first:i*first
        crossdata{i}(nrows(D(j)),ncols(D(j))) = MM(nrows(D(j)),ncols(D(j)));
    end
end
 k=N -10*first ;
 i=10*first+1;
  for j=1:k
 crossdata{j}(nrows(D(i)),ncols(D(i))) = MM(nrows(D(i)),ncols(D(i)));
  i=i+1;
  end
end

load('MM.mat')
parfor i=1:10
    [CCLEcross] = getcrossMatrixs(MM);
    CCLEdata{i}=CCLEcross;
end

I didn't look too closely to figure out what was wrong. I based this function on the Matlab function supplied. Note that for this particular example, going parallel is more expensive due to overhead. Parallel will provide performance with larger matrices and/or more samples.

library(parallel)

MM <- matrix(seq(0, 4.5, length.out = 11784), nrow = 491)

getcrossMatrixs <- function(MM, parts = 10L) {
  D <- sample(which(MM != 0))
  first <- length(D) %/% parts
  last <- length(D) %% parts
  idx <- c(0L, cumsum(c(rep(first + 1L, last), rep(first, parts - last))))
  mZero <- matrix(0, nrow(MM), ncol(MM))
  lapply(1:parts, function(i, m) {m[D[(idx[i] + 1L):idx[i + 1L]]] <- MM[D[(idx[i] + 1L):idx[i + 1L]]]; m}, mZero)
}

reps <- 10L
clust <- makeCluster(min(detectCores() - 1L, reps))
clusterExport(clust, c("getcrossMatrixs", "MM"))
CCLEdata <- parLapply(clust, 1:reps, function(x) getcrossMatrixs(MM))
stopCluster(clust)

# check that each set of matrices returned has all elements of MM
identical(rep(list(MM), reps), lapply(1:reps, function(i) Reduce("+", CCLEdata[[i]], matrix(0, nrow(MM), ncol(MM)))))
#> [1] TRUE

And here's a cleaned-up version of the Matlab function:

function [crossdata] = getcrossMatrixs(MM)
    idx = find(MM);
    N = length(nrows); 
    zeroM = zeros(size(MM));
    idx = idx(randperm(N)); 
    first = floor(N/10); 
    crossdata = cell(10, 1);
    for i = 1:10
        crossdata{i} = zeroM;
    end
    for i = 1:10
        j = 1 + (i - 1)*first:i*first;
        crossdata{i}(idx(j)) = MM(idx(j));
    end
    k = N - 10*first;
    j = 10*first + 1;
    for i = 1:k
        crossdata{i}(idx(j)) = MM(idx(j));
        j = j + 1;
    end
end

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM