简体   繁体   中英

Python multiprocessing of ARIMA model

Is it possible execute the ARIMA model with multiprocessing in python? I've got an error in the following code I'm using:

import warnings
from pandas import read_csv
from pandas import datetime
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
import multiprocessing 
from multiprocessing import pool 



# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
        # prepare training dataset
        train_size = int(len(X) * 0.66)
        train, test = X[0:train_size], X[train_size:]
        history = [x for x in train]
        # make predictions
        predictions = list()
        for t in range(len(test)):
                model = ARIMA(history, order=arima_order)
                model_fit = model.fit(disp=0)
                yhat = model_fit.forecast()[0]
                predictions.append(yhat)
                history.append(test[t])
        # calculate out of sample error
        error = mean_squared_error(test, predictions)
        return error
# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
        dataset = dataset.astype('float32')
        best_score, best_cfg = float("inf"), None
        for p in p_values:
                for d in d_values:
                        for q in q_values:
                                order = (p,d,q)
                                start = time.time()
                                try:
                                        mse = evaluate_arima_model(dataset, order)
                                        if mse < best_score:
                                                best_score, best_cfg = mse, order
                                        print('ARIMA%s MSE=%.3f' % (order,mse))
                                except:
                                        continue
                                end = time.time()
                                cycletime = end - start
                                print('Time in secs to complete calcultion : %.2f' % (cycletime))
        print('Best ARIMA%s MSE=%.3f' % (best_cfg, best_score))
        return best_cfg

class Bar(object):
        def __init__(self,x):
         self.x = x

def parser(x):
        return datetime.strptime(''+x, '%Y-%m-%d %H:%M:%S')
series = read_csv('clean_cpu.csv', usecols=['SAMPLE_TIME','VALUE'],header=0, parse_dates=[0], index_col=0, squeeze=True, date_parser=parser)
# evaluate parameters
p_values = [0]
d_values = range(0, 3)
q_values = range(0, 1)
warnings.filterwarnings("ignore")
res = pool().map(evaluate_models,[Bar(series.value),Bar(p_values),Bar(d_values),Bar(q_values)])
#arima_lag = evaluate_models(series.values, p_values, d_values, q_values)

print('Best LAG is %s' % str(arima_lag))

The error I got is:

res = pool().map(evaluate_models,[Bar(series.value),Bar(p_values),Bar(d_values),Bar(q_values)]) TypeError: 'module' object is not callable

Any help would be greatly appreciated.

PS : I'm using Python v3.5.2

This is all related to one line :

In [1]: from multiprocessing import pool 

In [2]: pool
Out[2]: <module 'multiprocessing.pool' from '/usr/lib/python3.6/multiprocessing/pool.py'>

In [3]: pool()
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-3-c16521659604> in <module>()
----> 1 pool()

TypeError: 'module' object is not callable

You are trying to call a module as if it was a function.

You have to read the documentation of the multiprocessing module .

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM