[英]Compiling many nested loops using the rcpp::cppFunction
R用户
我正在尝试解决组合问题蛮力以评估近似计算的质量。 为此,我编写了一个小的R函数,该函数为变量r输出了一个非常丑陋的rcpp函数,其中包含r个嵌套循环以及一些中断条件。 但是,当r开始在20+范围内时,用rcpp :: cppFunction编译函数将花费很长时间。 ..
有什么解释为什么rcpp编译会因为大量的嵌套循环而中断? 当我使用g ++将函数编译为普通的C ++程序时,它将在不到一秒钟的时间内立即编译完成,并且运行起来也完美无缺(使用cout代替Rcout)。
我可能缺少明显的东西,因为当我除去除最里面的中断条件以外的所有条件时,它可以使用rcpp进行漂亮地编译。 但是,当我也删除了最后一个中断条件时,它不会再次完成编译...有什么建议吗?
PS这是r = 20的示例程序,我仍在等待完成编译。 警告:这很丑陋,但会自动生成。
cppFunction('
int make_tList_rcpp() {
int r = 20;
std::cout << std::endl;
for (int t20=0; t20 <= floor(r/20); t20++) {
for (int t19=0; t19 <= floor(r/19); t19++) {
for (int t18=0; t18 <= floor(r/18); t18++) {
for (int t17=0; t17 <= floor(r/17); t17++) {
for (int t16=0; t16 <= floor(r/16); t16++) {
for (int t15=0; t15 <= floor(r/15); t15++) {
for (int t14=0; t14 <= floor(r/14); t14++) {
for (int t13=0; t13 <= floor(r/13); t13++) {
for (int t12=0; t12 <= floor(r/12); t12++) {
for (int t11=0; t11 <= floor(r/11); t11++) {
for (int t10=0; t10 <= floor(r/10); t10++) {
for (int t9=0; t9 <= floor(r/9); t9++) {
for (int t8=0; t8 <= floor(r/8); t8++) {
for (int t7=0; t7 <= floor(r/7); t7++) {
for (int t6=0; t6 <= floor(r/6); t6++) {
for (int t5=0; t5 <= floor(r/5); t5++) {
for (int t4=0; t4 <= floor(r/4); t4++) {
for (int t3=0; t3 <= floor(r/3); t3++) {
for (int t2=0; t2 <= floor(r/2); t2++) {
for (int t1=0; t1 <= floor(r/1); t1++) {
if ((1*t1+2*t2+3*t3+4*t4+5*t5+6*t6+7*t7+8*t8+9*t9+10*t10+11*t11+12*t12+13*t13+14*t14+15*t15+16*t16+17*t17+18*t18+19*t19+20*t20) == r) {
Rcout << t1 << "," << t2 << "," << t3 << "," << t4 << "," << t5 << "," << t6 << "," << t7 << "," << t8 << "," << t9 << "," << t10 << "," << t11 << "," << t12 << "," << t13 << "," << t14 << "," << t15 << "," << t16 << "," << t17 << "," << t18 << "," << t19 << "," << t20 << std::endl;
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2+1*t1) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18) > r) {
break;
}
}
if ((20*t20+19*t19) > r) {
break;
}
}
if ((20*t20) > r) {
break;
}
}
return(0);
}')
正如@spacedman所建议的,这里是使用sourceCpp时的更多调试信息。 正如对sourceCpp建议的进一步评论所写,它似乎可以在Linux上运行,因此可能是与Mac相关的问题...:
> sourceCpp(file="foobar.cpp",verbose=TRUE, rebuild=TRUE)
Generated extern "C" functions
--------------------------------------------------------
#include <Rcpp.h>
// make_tList_rcpp
void make_tList_rcpp();
RcppExport SEXP sourceCpp_1_make_tList_rcpp() {
BEGIN_RCPP
Rcpp::RNGScope rcpp_rngScope_gen;
make_tList_rcpp();
return R_NilValue;
END_RCPP
}
Generated R functions
-------------------------------------------------------
`.sourceCpp_1_DLLInfo` <- dyn.load('/private/var/folders/bj/k_b2brs5443bmm8699v5fvxw0000gn/T/RtmpDSa3m8/sourceCpp-x86_64-apple-darwin13.4.0-0.12.9/sourcecpp_a7c1e15e92a/sourceCpp_8.so')
make_tList_rcpp <- Rcpp:::sourceCppFunction(function() {}, TRUE, `.sourceCpp_1_DLLInfo`, 'sourceCpp_1_make_tList_rcpp')
rm(`.sourceCpp_1_DLLInfo`)
Building shared library
--------------------------------------------------------
DIR: /private/var/folders/bj/k_b2brs5443bmm8699v5fvxw0000gn/T/RtmpDSa3m8/sourceCpp-x86_64-apple-darwin13.4.0-0.12.9/sourcecpp_a7c1e15e92a
/Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB -o 'sourceCpp_8.so' --preclean 'foobar.cpp'
clang++ -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include -I"/Users/hoehle/Library/R/3.3/library/Rcpp/include" -I"/Users/hoehle/Sandbox/Blog/_source" -fPIC -Wall -mtune=core2 -g -O2 -c foobar.cpp -o foobar.o
(这是它的悬挂位置...)
PS,这是sessionInfo()
R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Sierra 10.12.2
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] Rcpp_0.12.9
loaded via a namespace (and not attached):
[1] compiler_3.3.2 tools_3.3.2
它可以编译并在除 macOS之外的所有平台上运行。
编辑后,我们将获得一些有用的调试信息。 特别是,使用的标志:
clang++ -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include -I"/Users/hoehle/Library/R/3.3/library/Rcpp/include" -I"/Users/hoehle/Sandbox/Blog/_source" -fPIC -Wall -mtune=core2 -g -O2 -c foobar.cpp -o foobar.o
在macOS上嵌套for
循环编译存在问题的原因与clang
如何优化-O2
vs -Os
下的嵌套循环有关。 具体地,存在洼回归大约clang
直接影响,以优化的能力v3.0中所述环。 特别是,请参阅:
https://llvm.org/bugs/show_bug.cgi?id=16196
该问题似乎已在3.8中修复。 缺点是您必须手动更新到此编译器,因为此回归在所有macOS机器上都随clang
版本一起提供。 您可能只想在macOS上切换到gcc
。 无论如何,以下文章应有助于通过自制程序和~/.R/Makevars
设置适当的编译器
http://thecoatlessprofessor.com/programming/openmp-in-r-on-os-x/
您可能希望使用sourceCpp()
而不是cppFunction()
因为后者意味着更简单的功能。 sourceCpp()
函数适用于更复杂的情况。 另外,我可能会选择避免指定int
返回类型。
使用sourceCpp()
示例
# In R
sourceCpp("path_to/example_comb.cpp")
文件: example_comb.cpp
#include <Rcpp.h>
// [[Rcpp::export]]
void make_tList_rcpp() {
int r = 20;
std::cout << std::endl;
for (int t20=0; t20 <= floor(r/20); t20++) {
for (int t19=0; t19 <= floor(r/19); t19++) {
for (int t18=0; t18 <= floor(r/18); t18++) {
for (int t17=0; t17 <= floor(r/17); t17++) {
for (int t16=0; t16 <= floor(r/16); t16++) {
for (int t15=0; t15 <= floor(r/15); t15++) {
for (int t14=0; t14 <= floor(r/14); t14++) {
for (int t13=0; t13 <= floor(r/13); t13++) {
for (int t12=0; t12 <= floor(r/12); t12++) {
for (int t11=0; t11 <= floor(r/11); t11++) {
for (int t10=0; t10 <= floor(r/10); t10++) {
for (int t9=0; t9 <= floor(r/9); t9++) {
for (int t8=0; t8 <= floor(r/8); t8++) {
for (int t7=0; t7 <= floor(r/7); t7++) {
for (int t6=0; t6 <= floor(r/6); t6++) {
for (int t5=0; t5 <= floor(r/5); t5++) {
for (int t4=0; t4 <= floor(r/4); t4++) {
for (int t3=0; t3 <= floor(r/3); t3++) {
for (int t2=0; t2 <= floor(r/2); t2++) {
for (int t1=0; t1 <= floor(r/1); t1++) {
if ((1*t1+2*t2+3*t3+4*t4+5*t5+6*t6+7*t7+8*t8+9*t9+10*t10+11*t11+12*t12+13*t13+14*t14+15*t15+16*t16+17*t17+18*t18+19*t19+20*t20) == r) {
Rcpp::Rcout << t1 << "," << t2 << "," << t3 << "," << t4 << "," << t5 << "," << t6 << "," << t7 << "," << t8 << "," << t9 << "," << t10 << "," << t11 << "," << t12 << "," << t13 << "," << t14 << "," << t15 << "," << t16 << "," << t17 << "," << t18 << "," << t19 << "," << t20 << std::endl;
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2+1*t1) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3+2*t2) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4+3*t3) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5+4*t4) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6+5*t5) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7+6*t6) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8+7*t7) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9+8*t8) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10+9*t9) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11+10*t10) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12+11*t11) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13+12*t12) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14+13*t13) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15+14*t14) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16+15*t15) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17+16*t16) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18+17*t17) > r) {
break;
}
}
if ((20*t20+19*t19+18*t18) > r) {
break;
}
}
if ((20*t20+19*t19) > r) {
break;
}
}
if ((20*t20) > r) {
break;
}
}
}
/*** R
# Runs automatically in R after compile
make_tList_rcpp(42)
*/
功能输出:
https://gist.github.com/coatless/aa51267dcda82b42622fdc8e6e566ab7
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.