記下一個Rcpp功能？

Question

我在R中編寫了一個遞歸函數，並使用memoise來加速它。 我試圖通過在Rcpp中編寫它然后記憶Rcpp函數來進一步加速它，但R函數更快。 為什么會這樣，有什么方法可以加快我的使用速度？

require(microbenchmark)
require(Rcpp)
require(memoise)

Rcpp功能：

cppFunction('
double FunCpp (unsigned int i, double d1, double d2, 
                double p, double s, unsigned int imax, 
                double n, double k, double r, 
                double m, double t) {

  if (i == 0) return 0;
  if (i == 1) return log2(-1*d1);
  if (i == 2) return log2(d2*d1 - p*s);

  double x = log2(fabs(-(((imax - (n - i))/imax)*k*r + m + (n - i)*t)));
  x = x + FunCpp(i-1, d1, d2, p, s, imax, n, k, r, m, t);

  double y = log2((n - i + 1)*t*k*r*((imax - ((n - i + 1) - 1))/imax));
  y = y + FunCpp(i-2, d1, d2, p, s, imax, n, k, r, m, t);

  return x + log2(1 - pow(2,y-x));
}
')
FunCpp = memoise(FunCpp)

R功能：

FunR = memoise(function(i, d1, d2, p, s, imax, n, k, r, m, t) {

  if(i == 0) 0
  else if(i == 1) log2(-1*d1)
  else if(i == 2) log2(d2*d1 - p*s)
  else {
    x = log2(abs(-(((imax - (n - i))/imax)*k*r + m + (n - i)*t)))
    x = x + FunR(i-1, d1, d2, p, s, imax, n, k, r, m, t)

    y = log2((n - i + 1)*t*k*r*((imax - ((n - i + 1) - 1))/imax))
    y = y + FunR(i-2, d1, d2, p, s, imax, n, k, r, m, t)  

    x + log2(1 - 2^(y-x))
  }
})

這種速度比較對我來說是切合實際的。 遞歸函數用於一系列整數，但之后，不會再使用相同的輸入調用它。 所以為了速度比較，這里我從其他函數中調用函數，在我調用遞歸函數之后，我使用forget（）來重置緩存。

TestFunR = function() {
  x = sapply(1:31, function(i) {
    FunR(i = 31-i, d1 = -152, d2 = -147.33, p = 150, s = 0.03, 
         imax = 30, n = 31, k = 1, r = 1, m = 2, t = 5)
  })
  forget(FunR)
}

TestFunCpp = function() {
  x = sapply(1:31, function(i) {
    FunCpp(i = 31-i, d1 = -152, d2 = -147.33, p = 150, s = 0.03, 
           imax = 30, n = 31, k = 1, r = 1, m = 2, t = 5)
  })
  forget(FunCpp)
}

microbenchmark(TestFunR(), TestFunCpp())


Unit: milliseconds
         expr        min       lq      mean    median        uq       max neval cld
   TestFunR()   9.979738  10.4910  12.83228  10.91887  11.89264  61.61513   100  a 
 TestFunCpp() 520.955483 528.6965 547.31103 536.73058 547.66377 729.57631   100   b

編輯：在發布之前我從Dirk的書中得到了一個方法。

includeText = '
#include <algorithm>
#include <vector>
#include <stdexcept>
#include <cmath>
#include <iostream>

class F {

  public:
    F(unsigned int n = 200, double d1 = 0, double d2 = 0, double p = 0, double s = 0) {
      memo.resize(n); 
      std::fill( memo.begin(), memo.end(), NAN ); 
      memo[0] = 0;          
      memo[1] = log2(-1*d1);  
      memo[2] = log2(d2*d1 - p*s);
    }

  double FunIL(int i, double d1, double d2, double p, double s, double imax, 
                  double n, double k, double r, double m, double t) {

      if (i < 0) return((double) NAN);
      if (i >= (int) memo.size()) throw std::range_error(\"i too large\");
      if (!std::isnan(memo[i])) return(memo[i]); 

      double x = log2(fabs(-(((imax - (n - i))/imax)*k*r + m + (n - i)*t)));
      x = x + FunIL(i-1, d1, d2, p, s, imax, n, k, r, m, t);

      double y = log2((n - i + 1)*t*k*r*((imax - ((n - i + 1) - 1))/imax));
      y = y + FunIL(i-2, d1, d2, p, s, imax, n, k, r, m, t);

      memo[i] = x + log2(1 - pow(2,y-x));
      return(memo[i]); 
    }
  private:
    std::vector< double > memo; 
};
'
bodyText = '
  int is = Rcpp::as<int>(i);
  double d1s = Rcpp::as<double>(d1);
  double d2s = Rcpp::as<double>(d2);
  double ps = Rcpp::as<double>(p);
  double ss = Rcpp::as<double>(s);
  double imaxs = Rcpp::as<double>(imax);
  double ns = Rcpp::as<double>(n);
  double ks = Rcpp::as<double>(k);
  double rs = Rcpp::as<double>(r);
  double ms = Rcpp::as<double>(m);
  double ts = Rcpp::as<double>(t);
  F f(ns, d1s, d2s, ps, ss);
  return Rcpp::wrap( f.FunIL(is, d1s, d2s, ps, ss, imaxs, ns, ks, rs, ms, ts) );
'

FunInline = cxxfunction(signature(i = "integer", d1 = "numeric", d2 = "numeric", p = "numeric",
                                  s = "numeric", imax = "numeric", n = "numeric", k = "numeric",
                                  r = "numeric", m = "numeric", t = "numeric"),
                        plugin = "Rcpp",
                        verbose = T,
                        incl = includeText,
                        body = bodyText)

它同樣有效（參見TestFunInline）：

microbenchmark(TestFunR(), TestFunCpp(), TestFunCpp_Mem(), TestFunInline())
Unit: microseconds
             expr        min         lq        mean      median          uq        max neval cld
       TestFunR()   8871.251   9067.758  10301.8003   9287.5725   9593.1310  19270.081   100  b 
     TestFunCpp() 514415.356 517160.251 522431.2980 519321.6130 523811.7640 584812.731   100   c
 TestFunCpp_Mem()    245.474    264.291    284.8908    281.6105    292.0885    526.870   100 a  
  TestFunInline()    279.686    295.723    378.2134    306.8425    316.0370   6621.364   100 a

但是，我無法使用doParallel。 我正在使用optim和optimx包優化每個進程的目標函數，當我使用％do％時，它可以工作，但是當我使用％dopar％時，我看到的是目標函數無法在初始參數上進行評估。 我從他的許多其他帖子中得到了Dirk的建議，並將Coatless的方法放入一個包中，但我不確定如何將Dirk的書中的方法放入包中。 這只是我對C ++的缺乏經驗。

編輯2：它最終點擊了如何將Dirk的方法放在我的包中的源文件中。 我知道還有其他關於將Rcpp與doParallel一起使用的討論，但是我把這個代碼放在這里因為它是解決我的問題的另一個好方法，並且通過將這個代碼添加到我的包中的源文件中，它碰巧變得更容易了對於我來說，我的並行方法比內聯方式更好。

class F {

  public:
    F(unsigned int n = 200, double d1 = 0, double d2 = 0, double p = 0, double s = 0) {
      memo.resize(n); 
      std::fill( memo.begin(), memo.end(), NAN ); 
      memo[0] = 0;          
      memo[1] = log2(-1*d1);  
      memo[2] = log2(d2*d1 - p*s);
    }

    double FunIL(int i, double d1, double d2, double p, double s, double imax, 
      double n, double k, double r, double m, double t) {

      if (i < 0) return((double) NAN);
      if (i >= (int) memo.size()) throw std::range_error("\"i too large\"");
      if (!std::isnan(memo[i])) return(memo[i]); 

      double x = log2(fabs(-(((imax - (n - i))/imax)*k*r + m + (n - i)*t)));
      x = x + FunIL(i-1, d1, d2, p, s, imax, n, k, r, m, t);

      double y = log2((n - i + 1)*t*k*r*((imax - ((n - i + 1) - 1))/imax));
      y = y + FunIL(i-2, d1, d2, p, s, imax, n, k, r, m, t);

      memo[i] = x + log2(1 - pow(2,y-x));
      return(memo[i]); 
    }
  private:
    std::vector< double > memo; 
};

// [[Rcpp::export]]
double FunDirk(int i, double d1, double d2, double p, double s, 
                  double imax, double n, double k, double r, 
                  double m, double t) {
    F f(n, d1, d2, p, s);
    return f.FunIL(i, d1, d2, p, s, imax, n, k, r, m, t);

}

Answer 1

記憶我

好吧，首先讓我們考慮一下memoise的目的。 memoise的目標是緩存函數結果 memoise 用它們 。 因此，在一次計算之后，它不再需要為計算中的任何其他序列再次重新計算該值，它只能從高速緩存中檢索該值。 這與遞歸結構設置特別相關。

關於R和C ++的`memoise`緩存訪問

memoisize的設置是緩存R值函數值。 在這種情況下，它正在緩存這些值。 但是，C ++代碼無法訪問緩存的值。 因此，C ++版本重新計算每個值。 從本質上講，您實際上是在使用：

x = sapply(1:31, function(i) {
    FunCpp(i = 31-i, d1 = -152, d2 = -147.33, p = 150, s = 0.03, 
           imax = 30, n = 31, k = 1, r = 1, m = 2, t = 5)
  })

大O Algo

免責聲明：接下來應該有一個更正式的論點，但它已經有一段時間了。

為了理解算法，有時我們需要使用所謂的Big O表示法，它允許我們觀察代碼如何漸近地執行。 現在，在這種情況下，Big O是O（2 ^ N），因為有兩個計算調用： Fun(i-1)和FunR(i-2) 。 但是， memoise使用哈希映射/查找表，其最壞的O(n)可能是O(n) ，最好是O(1) 。 本質上，我們有恆定的指數漸近結果。

改進微標記 - 用C ++進行W / O記憶

但是，這並不一定意味着C ++函數是垃圾。 R到Rcpp和后橋的缺點之一是在兩個域之間傳輸值之間的滯后時間。 因此，我們可以稍微降低計算時間的一種方法是將循環完全放在C ++中。

例如

// [[Rcpp::export]]
Rcpp::NumericVector FunCpp_loop(unsigned int e, 
                                double d1, double d2, 
                                double p, double s, unsigned int imax, 
                                double n, double k, double r, 
                                double m, double t){

  Rcpp::NumericVector o(e);

  for(unsigned int i = 0; i < e; i++){

    o(i) = FunCpp(31-(i+1), -152, -147.33, 150, 0.03, 30, 31, 1, 1, 2, 5);

  }

  return o;
}

但是，這里的基准並沒有真正改善這種情況（即使預先創建了矢量1:31 ）

Unit: milliseconds
              expr        min         lq       mean     median        uq       max neval
      TestFunR(tv)   8.467568   9.077262   9.986837   9.449952  10.60555  14.91243   100
    TestFunCpp(tv) 476.678391 482.489094 487.687811 486.351087 490.25346 579.38161   100
 TestFunCpp_loop() 478.348070 482.588307 488.234200 486.211347 492.33965 521.10918   100

C ++中的Memoziation

我們可以應用C ++中的memoise中給出的相同的memoziation技術。 實現不是那么漂亮和漂亮，但它有助於顯示相同的原則是適用的。

首先，我們將制作一個hashmap。

// Memoization structure to hold the hash map
struct mem_map{

  // Initializer to create the static (presistent) map
  static std::map<int, double> create_map()
  {
    std::map<int, double> m;
    m.clear();
    return m;
  }

  // Name of the static map for the class
  static std::map<int, double> memo;

};

// Actuall instantiate the class in the global scope (I know, bad me...)
std::map<int, double> mem_map::memo =  mem_map::create_map();

現在，我們可能應該使用一些訪問器來處理map對象。

// Reset the map
// [[Rcpp::export]]
void clear_mem(){
  mem_map::memo.clear();
}

// Get the values of the map.
// [[Rcpp::export]]
std::map<int, double> get_mem(){
  return mem_map::memo;
}

最后，讓我們改變你函數中的一些內部事物。

// Users function
// [[Rcpp::export]]
double FunCpp_Mem (int i, double d1, double d2, 
                   double p, double s, unsigned int imax, 
                   double n, double k, double r, 
                   double m, double t) {

  // We have already computed the value...
  if(mem_map::memo.count(i) > 0)
    return mem_map::memo[i];


  // Otherwise, let us get ready to compute it!
  double res = 0; 

  if (i <= 2){ 
    if (i <= 0) { // i == 1 
      res = 0.0;
    }else if (i == 1) {
      res = log2(-1.0*d1);
    }else { // i == 2
      res = log2(d2*d1 - p*s);
    }

    // Store result in hashmap
    mem_map::memo[i] = res;

    return res;
  }

  // Calculate if not in special case.

  double x = log2(fabs(-(((imax - (n - i))/imax)*k*r + m + (n - i)*t)));
  x = x + FunCpp_Mem(i-1, d1, d2, p, s, imax, n, k, r, m, t);

  double y = log2((n - i + 1)*t*k*r*((imax - ((n - i + 1) - 1))/imax));
  y = y + FunCpp_Mem(i-2, d1, d2, p, s, imax, n, k, r, m, t);


  res = x + log2(1 - pow(2,y-x));


  // Update the hashmap for uncalculated value
  mem_map::memo[i] = res;

  return res;
}

呼大量的工作。 讓我們測試一下，看看它是否值得。

# Benchmark for Rcpp Memoization
TestFunCpp_mem = function(tv) {
  x = sapply(tv, function(i) {
    FunCpp_Mem(i = 31-i, d1 = -152, d2 = -147.33, p = 150, s = 0.03, 
               imax = 30, n = 31, k = 1, r = 1, m = 2, t = 5)
  })
  clear_mem()
}

TestFunR = function(tv) {
  x = sapply(tv, function(i) {
    FunR(i = 31-i, d1 = -152, d2 = -147.33, p = 150, s = 0.03, 
         imax = 30, n = 31, k = 1, r = 1, m = 2, t = 5)
  })
  forget(FunR)
}

# Pre-generate vector
tv = 1:31

microbenchmark(TestFunR(tv),TestFunCpp_mem(tv))

結果......

microbenchmark(TestFunR(tv),TestFunCpp_mem(tv))
Unit: microseconds
               expr      min       lq      mean   median       uq       max neval
       TestFunR(tv) 8246.324 8662.694 9345.6947 9009.868 9797.126 13001.995   100
 TestFunCpp_mem(tv)  203.832  214.939  253.7931  228.898  240.906  1277.325   100

具有記憶功能的Cpp功能比R版本快40.5倍！ 記憶絕對是值得的！

記下一個Rcpp功能？

問題描述

1 個解決方案

解決方案1
6 已采納 2016-05-02 01:31:54

記憶我

關於R和C ++的`memoise`緩存訪問

大O Algo

改進微標記 - 用C ++進行W / O記憶

C ++中的Memoziation

記下一個Rcpp功能？

問題描述

1 個解決方案

解決方案1 6 已采納 2016-05-02 01:31:54

記憶我

關於R和C ++的memoise緩存訪問

大O Algo

改進微標記 - 用C ++進行W / O記憶

C ++中的Memoziation

解決方案1
6 已采納 2016-05-02 01:31:54

關於R和C ++的`memoise`緩存訪問