如何加速系列生成？

Question

該問題需要生成與Fibonacci序列類似的序列n-th元素。 然而，它有點棘手，因為n非常大（1 <= n <= 10 ^ 9）。 然后答案模1000000007.序列定義如下：
在此輸入圖像描述

使用生成函數，我得到以下公式： 在此輸入圖像描述

如果我使用序列方法，那么答案可以是模數，但它運行得非常慢。 事實上，我的time limit exceed很多次。 我還嘗試使用表來預生成一些初始值（緩存），但仍然不夠快。 另外，與10 ^ 9相比，我可以存儲在array/vector （C ++）中的最大元素數量太少，所以我猜這種方法也不起作用。
如果我使用直接公式，那么它運行得非常快，但僅適用於小的n 。 對於n大，double將被截斷，加上我將無法使用該數字修改我的答案，因為modulo僅適用於整數。
我沒有想法，我認為必須有一個非常好的技巧來解決這個問題，不幸的是我只是想不到一個。 任何想法將不勝感激。

這是我最初的方法：

#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <cmath>
#include <cassert>
#include <bitset>
#include <fstream>
#include <iomanip>
#include <set>
#include <stack>
#include <sstream>
#include <cstdio>
#include <map>
#include <cmath>

using namespace std;

typedef unsigned long long ull;

ull count_fair_coins_by_generating_function(ull n) {
    n--;
    return 
        (sqrt(3.0) + 1)/((sqrt(3.0) - 1) * 2 * sqrt(3.0)) * pow(2 / (sqrt(3.0) - 1), n * 1.0) 
        +
        (1 - sqrt(3.0))/((sqrt(3.0) + 1) * 2 * sqrt(3.0)) * pow(-2 / (sqrt(3.0) + 1), n * 1.0);
}

ull count_fair_coins(ull n) {
    if (n == 1) {
        return 1;
    }
    else if (n == 2) {
        return 3;
    }
    else {
        ull a1 = 1;
        ull a2 = 3;
        ull result;
        for (ull i = 3; i <= n; ++i) {
            result = (2*a2 + 2*a1) % 1000000007;
            a1 = a2;
            a2 = result;
        }

        return result;
    }
}

void inout_my_fair_coins() {
    int test_cases;
    cin >> test_cases;

    map<ull, ull> cache;
    ull n;
    while (test_cases--) {
        cin >> n;
        cout << count_fair_coins_by_generating_function(n) << endl;
        cout << count_fair_coins(n) << endl;
    }
}

int main() {
    inout_my_fair_coins();
    return 0;
}

更新自比賽結束以來，我發布了基於tskuzzy想法的解決方案，感興趣的人。 再一次，謝謝tskuzzy 。 您可以在此處查看原始問題陳述： http ： //www.codechef.com/problems/CSUMD
首先，你需要弄清楚那些1 coin和2 coin的概率，然后得到一些初始值來獲得序列。 完整的解決方案如下：

#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
#include <cmath>
#include <cassert>
#include <bitset>
#include <fstream>
#include <iomanip>
#include <set>
#include <stack>
#include <sstream>
#include <cstdio>
#include <map>
#include <cmath>

using namespace std;

typedef unsigned long long ull;

const ull special_prime = 1000000007;

/*
    Using generating function for the recurrence:
           | 1                     if n = 1
    a_n =  | 3                     if n = 2
           | 2a_{n-1} + 2a_{n-2}     if n > 2

    This method is probably the fastest one but it won't work 
    because when n is large, double just can't afford it. Plus,
    using this formula, we can't apply mod for floating point number.
    1 <= n <= 21
*/
ull count_fair_coins_by_generating_function(ull n) {
    n--;
    return 
        (sqrt(3.0) + 1)/((sqrt(3.0) - 1) * 2 * sqrt(3.0)) * pow(2 / (sqrt(3.0) - 1), n * 1.0) 
        +
        (1 - sqrt(3.0))/((sqrt(3.0) + 1) * 2 * sqrt(3.0)) * pow(-2 / (sqrt(3.0) + 1), n * 1.0);
}

/*
    Naive approach, it works but very slow. 
    Useful for testing.
*/
ull count_fair_coins(ull n) {
    if (n == 1) {
        return 1;
    }
    else if (n == 2) {
        return 3;
    }
    else {
        ull a1 = 1;
        ull a2 = 3;
        ull result;
        for (ull i = 3; i <= n; ++i) {
            result = (2*a2 + 2*a1) % 1000000007;
            a1 = a2;
            a2 = result;
        }

        return result;
    }
}

struct matrix_2_by_2 {
    ull m[2][2];
    ull a[2][2];
    ull b[2][2];

    explicit matrix_2_by_2(ull a00, ull a01, ull a10, ull a11) {
        m[0][0] = a00;
        m[0][1] = a01;
        m[1][0] = a10;
        m[1][1] = a11;
    }

    matrix_2_by_2 operator *(const matrix_2_by_2& rhs) const {
        matrix_2_by_2 result(0, 0, 0, 0);
        result.m[0][0] = (m[0][0] * rhs.m[0][0]) + (m[0][1] * rhs.m[1][0]);
        result.m[0][1] = (m[0][0] * rhs.m[0][1]) + (m[0][1] * rhs.m[1][1]);
        result.m[1][0] = (m[1][0] * rhs.m[0][0]) + (m[1][1] * rhs.m[1][0]);
        result.m[1][1] = (m[1][0] * rhs.m[0][1]) + (m[1][1] * rhs.m[1][1]);
        return result;
    }

    void square() {
        a[0][0] = b[0][0] = m[0][0];
        a[0][1] = b[0][1] = m[0][1];
        a[1][0] = b[1][0] = m[1][0];
        a[1][1] = b[1][1] = m[1][1];

        m[0][0] = (a[0][0] * b[0][0]) + (a[0][1] * b[1][0]);
        m[0][1] = (a[0][0] * b[0][1]) + (a[0][1] * b[1][1]);
        m[1][0] = (a[1][0] * b[0][0]) + (a[1][1] * b[1][0]);
        m[1][1] = (a[1][0] * b[0][1]) + (a[1][1] * b[1][1]);
    }

    void mod(ull n) {
        m[0][0] %= n;
        m[0][1] %= n;
        m[1][0] %= n;
        m[1][1] %= n;
    }

    /*
        exponentiation by squaring algorithm
                | 1                    if n = 0 
                | (1/x)^n              if n < 0 
        x^n =   | x.x^({(n-1)/2})^2    if n is odd
                | (x^{n/2})^2          if n is even

        The following algorithm calculate a^p % m
        int modulo(int a, int p, int m){
            long long x = 1;
            long long y = a; 

            while (p > 0) {
                if (p % 2 == 1){
                    x = (x * y) % m;
                }

                // squaring the base
                y = (y * y) % m; 
                p /= 2;
            }

            return x % c;
        }

        To apply for matrix, we need an identity which is
        equivalent to 1, then perform multiplication for matrix 
        in similar manner. Thus the algorithm is defined 
        as follows:
    */
    void operator ^=(ull p) {
        matrix_2_by_2 identity(1, 0, 0, 1);

        while (p > 0) {
            if (p % 2) {
                identity = operator*(identity);
                identity.mod(special_prime);
            }

            this->square();
            this->mod(special_prime);
            p /= 2;
        }

        m[0][0] = identity.m[0][0];
        m[0][1] = identity.m[0][1];
        m[1][0] = identity.m[1][0];
        m[1][1] = identity.m[1][1];
    }

    friend
    ostream& operator <<(ostream& out, const matrix_2_by_2& rhs) {
        out << rhs.m[0][0] << ' ' << rhs.m[0][1] << '\n';
        out << rhs.m[1][0] << ' ' << rhs.m[1][1] << '\n';
        return out;
    }
};

/*
    |a_{n+2}| = |2 2|^n  x |3| 
    |a_{n+1}|   |1 0|      |1|
*/
ull count_fair_coins_by_matrix(ull n) {
    if (n == 1) {
        return 1;
    } else {
        matrix_2_by_2 m(2, 2, 1, 0);
        m ^= (n - 1);
        return (m.m[1][0] * 3 + m.m[1][1]) % 1000000007;
    }
}

void inout_my_fair_coins() {
    int test_cases;
    scanf("%d", &test_cases);

    ull n;
    while (test_cases--) {
        scanf("%llu", &n);
        printf("%d\n", count_fair_coins_by_matrix(n));
    }
}

int main() {
    inout_my_fair_coins();
    return 0;
}

Answer 1

您可以根據矩陣指數來編寫序列的術語：

在此輸入圖像描述

可以通過求平方式使用取冪來快速評估。 這導致了O(log n)解決方案，該解決方案應該在時間限制內很好地解決問題。

僅供將來參考，如果您需要使用大數字進行乘法（在這種情況下不適用，因為答案采用模塊1000000007），您應該查看Karatsuba算法。 這為您提供了次二次時間乘法。

Answer 2

只是想到這里，但看看Duff的count_fair_coins函數的設備，因為它將自動展開循環以加速該功能。

在生成函數中預先計算sqrt似乎是獲得任何加速的最簡單方法。 這將減少到只有一個pow調用和常量的乘法。 除了預先設定sqrt的另一種方法來加速它是刪除分區並使用反向乘法，雖然非常輕微的優化它可能有助於在n非常大時加速。

如何加速系列生成？

問題描述

2 個解決方案

解決方案1
16 已采納 2012-07-08 07:52:25

解決方案2
1 2012-07-08 07:18:25

如何加速系列生成？

問題描述

2 個解決方案

解決方案1 16 已采納 2012-07-08 07:52:25

解決方案2 1 2012-07-08 07:18:25

解決方案1
16 已采納 2012-07-08 07:52:25

解決方案2
1 2012-07-08 07:18:25