Python : Speeding up my Runge-Kutta integration code challenge

Question

I am using the attached code to integrate a version of Fitzhugh-Nagumo model :

from scipy.integrate import odeint
import numpy as np
import time

P = {'epsilon':0.1,
     'a1':1.0,
     'a2':1.0,
     'b':2.0,
     'c':0.2}

def fhn_rhs(V,t,P):
    u,v = V[0],V[1]
    u_t = u - u**3 - v
    v_t = P['epsilon']*(u - P['b']*v - P['c'])
    return np.stack((u_t,v_t))

def integrate(func,V0,t,args,step='RK4'):
    start = time.clock()
    P = args[0]
    result=[V0]
    for i,tmp in enumerate(t[1:]):
        result.append(RK4step(func,result[i],tmp,P,(t[i+1]-t[i])))
    print "Integration took ",time.clock() - start, " s"
    return np.array(result)


def RK4step(rhs,V,t,P,dt):
    k_1 = dt*rhs(V,t,P)
    k_2 = dt*rhs((V+(1.0/2.0)*k_1),t,P)
    k_3 = dt*rhs((V+(1.0/2.0)*k_2),t,P)
    k_4 = dt*rhs((V+k_3),t,P)
    return V+(1.0/6.0)*k_1+(1.0/3.0)*k_2+(1.0/3.0)*k_3+(1.0/6.0)*k_4

Comparing between my integrate and scipy.integrate.odeint gives the following:

In [8]: import cProfile

In [9]: %timeit integrate(fhn_rhs,np.stack((0.1,0.2)),np.linspace(0,100,1000),args=(P,))
10 loops, best of 3: 36.4 ms per loop

In [10]: %timeit odeint(fhn_rhs,np.stack((0.1,0.2)),np.linspace(0,100,1000),args=(P,))
100 loops, best of 3: 3.45 ms per loop

In [11]: cProfile.run('integrate(fhn_rhs,np.stack((0.1,0.2)),np.linspace(0,100,1000),args=(P,))')
         45972 function calls in 0.098 seconds

   Ordered by: standard name

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    0.000    0.000    0.098    0.098 <string>:1(<module>)
     3996    0.011    0.000    0.078    0.000 fhn.py:20(fhn_rhs)
        1    0.002    0.002    0.097    0.097 fhn.py:42(integrate)
      999    0.016    0.000    0.094    0.000 fhn.py:52(RK4step)
        1    0.000    0.000    0.000    0.000 function_base.py:9(linspace)
     7994    0.011    0.000    0.021    0.000 numeric.py:484(asanyarray)
     3997    0.029    0.000    0.067    0.000 shape_base.py:282(stack)
    11991    0.008    0.000    0.008    0.000 shape_base.py:337(<genexpr>)
     3997    0.002    0.000    0.002    0.000 {len}
      999    0.001    0.000    0.001    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'astype' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        1    0.000    0.000    0.000    0.000 {numpy.core.multiarray.arange}
     7995    0.010    0.000    0.010    0.000 {numpy.core.multiarray.array}
     3997    0.006    0.000    0.006    0.000 {numpy.core.multiarray.concatenate}
        1    0.000    0.000    0.000    0.000 {numpy.core.multiarray.result_type}

Any suggestions on how I can make my code run faster? Can I use numba somehow to accelerate it?

Answer 1

You are not comparing the same things. To see at what points odeint actually evaluates the ODE function, put a print t statement in (of course while not timing it). odeint and generally methods with adaptive time steps produce a sparse list of integration samples and interpolate the desired output from them.

You would have to use an error estimator for the RK4 method and based on that replicate this adaptive scheme.

And of course, interpreted python code using vector objects will never be competitive with the compiled FORTRAN code of lsoda called from odeint using simple arrays during its execution.

An example for using RK4 in an adaptive step size scheme with interpolation:

def RK4Step(f, x, y, h, k1):
    k2=f(x+0.5*h, y+0.5*h*k1)
    k3=f(x+0.5*h, y+0.5*h*k2)
    k4=f(x+    h, y+    h*k3)
    return (k1+2*(k2+k3)+k4)/6.0

def RK4TwoStep(f, x, y, h, k1):
    step1 = RK4Step(f, x , y , 0.5*h, k1        )
    x1, y1 = x+0.5*h, y+0.5*h*step1;
    step2 = RK4Step(f, x1, y1, 0.5*h, f(x1, y1) )
    return (step1+step2)/2

def RK4odeint(fin,y0,times, tol=1e-6, args=None):
    # numpy-ify the inputs
    if args:
        f = lambda t,y : np.array(fin(y,t,*args));
    else:
        f = lambda t,y : np.array(fin(y,t))
    y0 = np.array(y0)
    # allocate output structure
    yout = np.array([y0]*len(times));
    # in consequence, yout[0] = y0;
    # initialize integrator variables
    h = times[1]-times[0];
    hmax = abs(times[-1]-times[0]);

    # last and current point of the numerical integration
    ycurr = ylast = qcurr = qlast = y0; 
    tcurr = tlast = times[0];
    fcurr = flast = f(tcurr, ycurr);
    totalerr = 0.0
    totalvar = 0.0
    for i, t in enumerate(times[1:]):
        # remember that t == t[i+1], result goes to yout[i+1]
        while (t-tcurr)*h>0:
            # advance the integration                
            k1, k2 = RK4Step(f,tcurr,ycurr,h, fcurr), RK4TwoStep(f,tcurr,ycurr,h, fcurr);
            # RK4 is of fourth order, that is,
            # k1 = (y(x+h)-y(x))/h + C*h^4
            # k2 = (y(x+h)-y(x))/h + C*h^4/16
            # Using the double step k2 gives  
            # C*h^4/16 = (k2-k1)/15 as local error density
            # change h to match the global relative error density tol
            # use |k2| as scale for the absolute error
            # |k1-k2|/15*hfac^4 = tol*|k2|, h <- h*hfac

            scale = max(abs(k2))
            steperr = max(abs(k1-k2))/2
            # compute the ideal step size factor and sanitize the result to prevent ridiculous changes
            hfac = (  tol*scale / ( 1e-16+steperr)  )**0.25
            hfac = min(10, max(0.01, hfac) )

            # repeat the step if there is a significant step size correction
            if ( abs(h*hfac)<hmax and (0.6 > hfac or hfac > 3 )):
                # recompute with new step size
                h *= hfac;
                k2 = RK4TwoStep(f, tcurr, ycurr, h, fcurr) ;
            # update and cycle the integration points
            ylast = ycurr; ycurr = ycurr + h*k2;
            tlast = tcurr; tcurr += h;
            flast = fcurr; fcurr = f(tcurr, ycurr);
            # cubic Bezier control points
            qlast = ylast + (tcurr-tlast)/3*flast;
            qcurr = ycurr - (tcurr-tlast)/3*fcurr;

            totalvar += h*scale;
            totalerr = (1+h*scale)*totalerr + h*steperr;
            reportstr = "internal step to t=%12.8f \t" % tcurr;

        #now tlast <= t <= tcurr, can interpolate the value for yout[i+1] using the cubic Bezier formula
        s = (t - tlast)/(tcurr - tlast);
        yout[i+1] = (1-s)**2*((1-s)*ylast + 3*s*qlast) + s**2*(3*(1-s)*qcurr + s*ycurr)

    return np.array(yout)

This can be called similar to scipy.integrate.odeint with the same argument order conventions for the derivatives function (the interface was constructed with that requirement, there is no inherent necessity, change as you like)

P = {'epsilon':0.1,
     'a1':1.0,
     'a2':1.0,
     'b':2.0,
     'c':0.2}

def fhn_rhs(V,t,P):
    u,v = V
    u_t = u - u**3 - v
    v_t = P['epsilon']*(u - P['b']*v - P['c'])
    return np.array([u_t,v_t])


V0 = np.array([0.1,0.2])
time = np.linspace(0,100,1000)

V = RK4odeint(fhn_rhs,V0,time,args=(P,))

u,v = V.T
plt.plot(time, v) # or plt.plot(u,v) or ...

Python : Speeding up my Runge-Kutta integration code challenge

Question

1 answers

solution1
4 2016-11-29 13:14:06

Python : Speeding up my Runge-Kutta integration code challenge

Question

1 answers

solution1 4 2016-11-29 13:14:06

solution1
4 2016-11-29 13:14:06