简体   繁体   中英

Compiling many nested loops using the rcpp::cppFunction

R-Users,

I'm trying to solve a combinatorial problem brute force in order to assess the quality of an approximate computation. For this purpose I've written a small R function which for a variable r outputs an incredibly ugly rcpp function containing r nested loops plus some break conditions. However, when r starts to become in the range of 20+ compiling the function with rcpp::cppFunction takes incredibly long to compile. ..

Any explanation why rcpp compiling breaks down for this large number of nested loops? When I compile the function as an ordinary C++ program using g++ then it compiles straightaway in less than a second and also runs flawless (using cout instead of Rcout).

I'm probably missing something obvious, because when I remove all but the innermost break condition, then it compiles beautifully with rcpp. However, when I also remove this last break conditions it doesn finish compiling again... Any suggestions?

PS Here's an example program for r=20, which I'm still waiting to finish compiling. Warning: It's ugly, but automatically generated.

cppFunction('
int make_tList_rcpp() {
int r = 20;
std::cout << std::endl;
 for (int t20=0; t20 <= floor(r/20); t20++) {
  for (int t19=0; t19 <= floor(r/19); t19++) {
   for (int t18=0; t18 <= floor(r/18); t18++) {
    for (int t17=0; t17 <= floor(r/17); t17++) {
     for (int t16=0; t16 <= floor(r/16); t16++) {
      for (int t15=0; t15 <= floor(r/15); t15++) {
       for (int t14=0; t14 <= floor(r/14); t14++) {
        for (int t13=0; t13 <= floor(r/13); t13++) {
         for (int t12=0; t12 <= floor(r/12); t12++) {
          for (int t11=0; t11 <= floor(r/11); t11++) {
           for (int t10=0; t10 <= floor(r/10); t10++) {
            for (int t9=0; t9 <= floor(r/9); t9++) {
             for (int t8=0; t8 <= floor(r/8); t8++) {
              for (int t7=0; t7 <= floor(r/7); t7++) {
               for (int t6=0; t6 <= floor(r/6); t6++) {
                for (int t5=0; t5 <= floor(r/5); t5++) {
                 for (int t4=0; t4 <= floor(r/4); t4++) {
                  for (int t3=0; t3 <= floor(r/3); t3++) {
                   for (int t2=0; t2 <= floor(r/2); t2++) {
                    for (int t1=0; t1 <= floor(r/1); t1++) {
                     if ((1*t1+2*t2+3*t3+4*t4+5*t5+6*t6+7*t7+8*t8+9*t9+10*t10+11*t11+12*t12+13*t13+14*t14+15*t15+16*t16+17*t17+18*t18+19*t19+20*t20) == r) {
                      Rcout << t1 << "," << t2 << "," << t3 << "," << t4 << "," << t5 << "," << t6 << "," << t7 << "," << t8 << "," << t9 << "," << t10 << "," << t11 << "," << t12 << "," << t13 << "," << t14 << "," << t15 << "," << t16 << "," << t17 << "," << t18 << "," << t19 << "," << t20 << std::endl;
                     }
                     if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2+1*t1) > r) {
                      break;
                     }
                    }
                    if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2) > r) {
                     break;
                    }
                   }
                   if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3) > r) {
                    break;
                   }
                  }
                  if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4) > r) {
                   break;
                  }
                 }
                 if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5) > r) {
                  break;
                 }
                }
                if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6) > r) {
                 break;
                }
               }
               if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7) > r) {
                break;
               }
              }
              if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8) > r) {
               break;
              }
             }
             if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9) > r) {
              break;
             }
            }
            if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10) > r) {
             break;
            }
           }
           if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11) > r) {
            break;
           }
          }
          if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12) > r) {
           break;
          }
         }
         if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13) > r) {
          break;
         }
        }
        if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14) > r) {
         break;
        }
       }
       if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15) > r) {
        break;
       }
      }
      if ((20*t20+19*t19+18*t18+17*t17+16*t16) > r) {
       break;
      }
     }
     if ((20*t20+19*t19+18*t18+17*t17) > r) {
      break;
     }
    }
    if ((20*t20+19*t19+18*t18) > r) {
     break;
    }
   }
   if ((20*t20+19*t19) > r) {
    break;
   }
  }
  if ((20*t20) > r) {
   break;
  }
 }
 return(0);
}')

As suggested by @spacedman, here is a little more debugging information, when using sourceCpp instead. As written in a further comment to the sourceCpp suggestion, it appears to work on Linux so probably a Mac related problem...:

> sourceCpp(file="foobar.cpp",verbose=TRUE, rebuild=TRUE)

Generated extern "C" functions 
--------------------------------------------------------


#include <Rcpp.h>
// make_tList_rcpp
void make_tList_rcpp();
RcppExport SEXP sourceCpp_1_make_tList_rcpp() {
BEGIN_RCPP
    Rcpp::RNGScope rcpp_rngScope_gen;
    make_tList_rcpp();
    return R_NilValue;
END_RCPP
}

Generated R functions 
-------------------------------------------------------

`.sourceCpp_1_DLLInfo` <- dyn.load('/private/var/folders/bj/k_b2brs5443bmm8699v5fvxw0000gn/T/RtmpDSa3m8/sourceCpp-x86_64-apple-darwin13.4.0-0.12.9/sourcecpp_a7c1e15e92a/sourceCpp_8.so')

make_tList_rcpp <- Rcpp:::sourceCppFunction(function() {}, TRUE, `.sourceCpp_1_DLLInfo`, 'sourceCpp_1_make_tList_rcpp')

rm(`.sourceCpp_1_DLLInfo`)

Building shared library
--------------------------------------------------------

DIR: /private/var/folders/bj/k_b2brs5443bmm8699v5fvxw0000gn/T/RtmpDSa3m8/sourceCpp-x86_64-apple-darwin13.4.0-0.12.9/sourcecpp_a7c1e15e92a

/Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB -o 'sourceCpp_8.so' --preclean  'foobar.cpp'  
clang++ -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG  -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include  -I"/Users/hoehle/Library/R/3.3/library/Rcpp/include" -I"/Users/hoehle/Sandbox/Blog/_source"   -fPIC  -Wall -mtune=core2 -g -O2  -c foobar.cpp -o foobar.o

(and this is where it hangs...)

PS and here is the sessionInfo()

R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.2

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] Rcpp_0.12.9

loaded via a namespace (and not attached):
[1] compiler_3.3.2 tools_3.3.2   

This compiles and runs on every platform but macOS.

After the edit, we have some useful debug info. In particular, the flags being used:

clang++ -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG  -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include  -I"/Users/hoehle/Library/R/3.3/library/Rcpp/include" -I"/Users/hoehle/Sandbox/Blog/_source"   -fPIC  -Wall -mtune=core2 -g -O2  -c foobar.cpp -o foobar.o

The reason for problematic compile of nested for loops on macOS is related to how clang optimizes the nested loop under -O2 vs -Os . Specifically, there wa a regression circa clang v3.0 that directly impacted the ability to optimize said loops. In particular, see:

https://llvm.org/bugs/show_bug.cgi?id=16196

This issue looks like it was fixed within 3.8. The downside is that you will have to manually update to this compiler as this regression ships with the clang version on all macOS machines. You may just want to switch over to gcc on macOS. Regardless, the following post should help with setting up the appropriate compiler via homebrew and ~/.R/Makevars

http://thecoatlessprofessor.com/programming/openmp-in-r-on-os-x/

You may wish to use sourceCpp() instead of cppFunction() as the later is meant for more simplistic functions. The sourceCpp() function is meant for more complicated cases. Also, I'd probably opt to avoid specifying the int return type.

Example of use of sourceCpp()

# In R
sourceCpp("path_to/example_comb.cpp")

File: example_comb.cpp

#include <Rcpp.h>

// [[Rcpp::export]]
void make_tList_rcpp() {
    int r = 20;
    std::cout << std::endl;
    for (int t20=0; t20 <= floor(r/20); t20++) {
        for (int t19=0; t19 <= floor(r/19); t19++) {
            for (int t18=0; t18 <= floor(r/18); t18++) {
                for (int t17=0; t17 <= floor(r/17); t17++) {
                    for (int t16=0; t16 <= floor(r/16); t16++) {
                        for (int t15=0; t15 <= floor(r/15); t15++) {
                            for (int t14=0; t14 <= floor(r/14); t14++) {
                                for (int t13=0; t13 <= floor(r/13); t13++) {
                                    for (int t12=0; t12 <= floor(r/12); t12++) {
                                        for (int t11=0; t11 <= floor(r/11); t11++) {
                                            for (int t10=0; t10 <= floor(r/10); t10++) {
                                                for (int t9=0; t9 <= floor(r/9); t9++) {
                                                    for (int t8=0; t8 <= floor(r/8); t8++) {
                                                        for (int t7=0; t7 <= floor(r/7); t7++) {
                                                            for (int t6=0; t6 <= floor(r/6); t6++) {
                                                                for (int t5=0; t5 <= floor(r/5); t5++) {
                                                                    for (int t4=0; t4 <= floor(r/4); t4++) {
                                                                        for (int t3=0; t3 <= floor(r/3); t3++) {
                                                                            for (int t2=0; t2 <= floor(r/2); t2++) {
                                                                                for (int t1=0; t1 <= floor(r/1); t1++) {
                                                                                    if ((1*t1+2*t2+3*t3+4*t4+5*t5+6*t6+7*t7+8*t8+9*t9+10*t10+11*t11+12*t12+13*t13+14*t14+15*t15+16*t16+17*t17+18*t18+19*t19+20*t20) == r) {
                                                                                        Rcpp::Rcout << t1 << "," << t2 << "," << t3 << "," << t4 << "," << t5 << "," << t6 << "," << t7 << "," << t8 << "," << t9 << "," << t10 << "," << t11 << "," << t12 << "," << t13 << "," << t14 << "," << t15 << "," << t16 << "," << t17 << "," << t18 << "," << t19 << "," << t20 << std::endl;
                                                                                    }
                                                                                    if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2+1*t1) > r) {
                                                                                        break;
                                                                                    }
                                                                                }
                                                                                if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2) > r) {
                                                                                    break;
                                                                                }
                                                                            }
                                                                            if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3) > r) {
                                                                                break;
                                                                            }
                                                                        }
                                                                        if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4) > r) {
                                                                            break;
                                                                        }
                                                                    }
                                                                    if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5) > r) {
                                                                        break;
                                                                    }
                                                                }
                                                                if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6) > r) {
                                                                    break;
                                                                }
                                                            }
                                                            if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7) > r) {
                                                                break;
                                                            }
                                                        }
                                                        if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8) > r) {
                                                            break;
                                                        }
                                                    }
                                                    if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9) > r) {
                                                        break;
                                                    }
                                                }
                                                if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10) > r) {
                                                    break;
                                                }
                                            }
                                            if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11) > r) {
                                                break;
                                            }
                                        }
                                        if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12) > r) {
                                            break;
                                        }
                                    }
                                    if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13) > r) {
                                        break;
                                    }
                                }
                                if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14) > r) {
                                    break;
                                }
                            }
                            if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15) > r) {
                                break;
                            }
                        }
                        if ((20*t20+19*t19+18*t18+17*t17+16*t16) > r) {
                            break;
                        }
                    }
                    if ((20*t20+19*t19+18*t18+17*t17) > r) {
                        break;
                    }
                }
                if ((20*t20+19*t19+18*t18) > r) {
                    break;
                }
            }
            if ((20*t20+19*t19) > r) {
                break;
            }
        }
        if ((20*t20) > r) {
            break;
        }
    }

}


/*** R
# Runs automatically in R after compile
make_tList_rcpp(42)
*/

Output of function:

https://gist.github.com/coatless/aa51267dcda82b42622fdc8e6e566ab7

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM