如何做約束線性回歸 - scikit 學習？

Question

我正在嘗試使用一些約束來執行線性回歸主題以獲得某個預測。 我想讓模型預測一半的線性預測，並使用類似於圖中綠線的非常窄的范圍（使用約束）使模型預測前半部分的最后一個值附近的后半部分線性預測。

完整代碼：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
pd.options.mode.chained_assignment = None  # default='warn'
data = [5.269, 5.346, 5.375, 5.482, 5.519, 5.57, 5.593999999999999, 5.627000000000001, 5.724, 5.818, 5.792999999999999, 5.817, 5.8389999999999995, 5.882000000000001, 5.92, 6.025, 6.064, 6.111000000000001, 6.1160000000000005, 6.138, 6.247000000000001, 6.279, 6.332000000000001, 6.3389999999999995, 6.3420000000000005, 6.412999999999999, 6.442, 6.519, 6.596, 6.603, 6.627999999999999, 6.76, 6.837000000000001, 6.781000000000001, 6.8260000000000005, 6.849, 6.875, 6.982, 7.018, 7.042000000000001, 7.068, 7.091, 7.204, 7.228, 7.261, 7.3420000000000005, 7.414, 7.44, 7.516, 7.542000000000001, 7.627000000000001, 7.667000000000001, 7.821000000000001, 7.792999999999999, 7.756, 7.871, 8.006, 8.078, 7.916, 7.974, 8.074, 8.119, 8.228, 7.976, 8.045, 8.312999999999999, 8.335, 8.388, 8.437999999999999, 8.456, 8.227, 8.266, 8.277999999999999, 8.289, 8.299, 8.318, 8.332, 8.34, 8.349, 8.36, 8.363999999999999, 8.368, 8.282, 8.283999999999999]
time = range(1,85,1)   
x=int(0.7*len(data))
df = pd.DataFrame(list(zip(*[time, data])))
df.columns = ['time', 'data']
# print df
x=int(0.7*len(df))
train = df[:x]
valid = df[x:]
models = []
names = []
tr_x_ax = []
va_x_ax = []
pr_x_ax = []
tr_y_ax = []
va_y_ax = []
pr_y_ax = []
time_model = []
models.append(('LR', LinearRegression()))

for name, model in models:
    x_train=df.iloc[:, 0][:x].values
    y_train=df.iloc[:, 1][:x].values
    x_valid=df.iloc[:, 0][x:].values
    y_valid=df.iloc[:, 1][x:].values

    model = LinearRegression()
    # poly = PolynomialFeatures(5)
    x_train= x_train.reshape(-1, 1)
    y_train= y_train.reshape(-1, 1)
    x_valid = x_valid.reshape(-1, 1)
    y_valid = y_valid.reshape(-1, 1)
    # model.fit(x_train,y_train)
    model.fit(x_train,y_train.ravel())
    # score = model.score(x_train,y_train.ravel())
    # print 'score', score
    preds = model.predict(x_valid)
    tr_x_ax.extend(train['data'])
    va_x_ax.extend(valid['data'])
    pr_x_ax.extend(preds)

    valid['Predictions'] = preds
    valid.index = df[x:].index
    train.index = df[:x].index
    plt.figure(figsize=(5,5))
    # plt.plot(train['data'],label='data')
    # plt.plot(valid[['Close', 'Predictions']])
    x = valid['data']
    # print x
    # plt.plot(valid['data'],label='validation')
    plt.plot(valid['Predictions'],label='Predictions before',color='orange')



y =range(0,58)
y1 =range(58,84)
for index, item in enumerate(pr_x_ax):
    if index >13:
        pr_x_ax[index] = pr_x_ax[13]
pr_x_ax = list([float(i) for i in pr_x_ax])
va_x_ax = list([float(i) for i in va_x_ax])
tr_x_ax = list([float(i) for i in tr_x_ax])
plt.plot(y,tr_x_ax,  label='train' , color='red',  linewidth=2)
plt.plot(y1,va_x_ax,  label='validation1' , color='blue',  linewidth=2)
plt.plot(y1,pr_x_ax,  label='Predictions after' , color='green',  linewidth=2)
plt.xlabel("time")
plt.ylabel("data")
plt.xticks(rotation=45)
plt.legend()
plt.show()

如果你看到這個圖：

label: Predictions before ，模型在沒有任何約束的情況下預測它（我不需要這個結果）。

標簽： Predictions after ，模型在約束內預測它，但這是在模型預測之后並且所有值都等於index = 71 , item 8.56處的最后一個值index = 71 , item 8.56 。

我使用 for 循環for index, item in enumerate(pr_x_ax): in line:64，並且曲線是從時間 71 到 85 秒的直線，如您所見，以向您展示我如何需要模型工作。

我可以構建模型給出相同的結果而不是 for 循環嗎？？？

請您的建議

Answer 1

我希望在您的問題中通過繪制綠線，您真的希望訓練有素的模型能夠預測線性水平向右轉彎。 但是當前訓練的模型只繪制了橙色的直線。

對於任何算法和類型的任何訓練模型來說，為了學習行為模型中的某些異常變化，至少需要具有該異常變化的一些樣本，這是正確的。 或者至少觀察到的數據中的某些隱藏含義應該指向具有這種不尋常的變化。

換句話說，為了讓您的模型學習綠線右轉，模型應該在訓練數據集中具有右轉點。 但是您首先（最左邊）70% 的數據是通過train = df[:int(0.7 * len(df))]來獲取訓練數據的，並且該訓練數據沒有這樣的右轉，而這個訓練數據看起來接近一條直線線。

因此，您需要以不同的方式將數據重新采樣到訓練和驗證中——從整個X范圍中隨機抽取 70% 的樣本，其余的用於驗證。 因此，在您的訓練數據中還包括右轉的樣本。

第二件事是LinearRegression模型總是只用一條直線對預測進行建模，而且這條線不能有右轉彎。 為了右轉，您需要一些更復雜的模型。

模型右轉的一種方法是分段線性，即具有多個連接的直線。 我在sklearn里面沒有找到現成的分段線性模型，只使用了其他pip模型。 所以我決定實現我自己的簡單類PieceWiseLinearRegression ，它使用np.piecewise()和scipy.optimize.curve_fit()來建模分段線性函數。

下圖顯示了應用上面提到的兩個東西的結果，代碼在后面，以不同的方式重新采樣數據集並建模分段線性函數。 您當前的線性模型LR仍然僅使用一條藍色直線進行預測，而我的分段線性PWLR2橙色線由兩段組成並正確預測右轉：

為了清楚地看到一個PWLR2圖，我也做了下一張圖：

我的類PieceWiseLinearRegression在創建對象時只接受一個參數n - 用於預測的線性段數。 對於上面的圖片，使用了n = 2 。

import sys, numpy as np, pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
np.random.seed(0)

class PieceWiseLinearRegression:
    @classmethod
    def nargs_func(cls, f, n):
        return eval('lambda ' + ', '.join([f'a{i}'for i in range(n)]) + ': f(' + ', '.join([f'a{i}'for i in range(n)]) + ')', locals())
        
    @classmethod
    def piecewise_linear(cls, n):
        condlist = lambda xs, xa: [(lambda x: (
            (xs[i] <= x if i > 0 else np.full_like(x, True, dtype = np.bool_)) &
            (x < xs[i + 1] if i < n - 1 else np.full_like(x, True, dtype = np.bool_))
        ))(xa) for i in range(n)]
        funclist = lambda xs, ys: [(lambda i: (
            lambda x: (
                (x - xs[i]) * (ys[i + 1] - ys[i]) / (
                    (xs[i + 1] - xs[i]) if abs(xs[i + 1] - xs[i]) > 10 ** -7 else 10 ** -7 * (-1, 1)[xs[i + 1] - xs[i] >= 0]
                ) + ys[i]
            )
        ))(j) for j in range(n)]
        def f(x, *pargs):
            assert len(pargs) == (n + 1) * 2, (n, pargs)
            xs, ys = pargs[0::2], pargs[1::2]
            xa = x.ravel().astype(np.float64)
            ya = np.piecewise(x = xa, condlist = condlist(xs, xa), funclist = funclist(xs, ys)).ravel()
            #print('xs', xs, 'ys', ys, 'xa', xa, 'ya', ya)
            return ya
        return cls.nargs_func(f, 1 + (n + 1) * 2)
        
    def __init__(self, n):
        self.n = n
        self.f = self.piecewise_linear(self.n)

    def fit(self, x, y):
        from scipy import optimize
        self.p, self.e = optimize.curve_fit(self.f, x, y, p0 = [j for i in range(self.n + 1) for j in (np.amin(x) + i * (np.amax(x) - np.amin(x)) / self.n, 1)])
        #print('p', self.p)
        
    def predict(self, x):
        return self.f(x, *self.p)

data = [5.269, 5.346, 5.375, 5.482, 5.519, 5.57, 5.593999999999999, 5.627000000000001, 5.724, 5.818, 5.792999999999999, 5.817, 5.8389999999999995, 5.882000000000001, 5.92, 6.025, 6.064, 6.111000000000001, 6.1160000000000005, 6.138, 6.247000000000001, 6.279, 6.332000000000001, 6.3389999999999995, 6.3420000000000005, 6.412999999999999, 6.442, 6.519, 6.596, 6.603, 6.627999999999999, 6.76, 6.837000000000001, 6.781000000000001, 6.8260000000000005, 6.849, 6.875, 6.982, 7.018, 7.042000000000001, 7.068, 7.091, 7.204, 7.228, 7.261, 7.3420000000000005, 7.414, 7.44, 7.516, 7.542000000000001, 7.627000000000001, 7.667000000000001, 7.821000000000001, 7.792999999999999, 7.756, 7.871, 8.006, 8.078, 7.916, 7.974, 8.074, 8.119, 8.228, 7.976, 8.045, 8.312999999999999, 8.335, 8.388, 8.437999999999999, 8.456, 8.227, 8.266, 8.277999999999999, 8.289, 8.299, 8.318, 8.332, 8.34, 8.349, 8.36, 8.363999999999999, 8.368, 8.282, 8.283999999999999]
time = list(range(1, 85))
df = pd.DataFrame(list(zip(time, data)), columns = ['time', 'data'])

choose_train = np.random.uniform(size = (len(df),)) < 0.8
choose_valid = ~choose_train

x_all = df.iloc[:, 0].values
y_all = df.iloc[:, 1].values
x_train = df.iloc[:, 0][choose_train].values
y_train = df.iloc[:, 1][choose_train].values
x_valid = df.iloc[:, 0][choose_valid].values
y_valid = df.iloc[:, 1][choose_valid].values
x_all_lin = np.linspace(np.amin(x_all), np.amax(x_all), 500)

models = []
models.append(('LR', LinearRegression()))
models.append(('PWLR2', PieceWiseLinearRegression(2)))
        
for imodel, (name, model) in enumerate(models):
    model.fit(x_train[:, None], y_train)
    x_all_lin_pred = model.predict(x_all_lin[:, None])
    plt.plot(x_all_lin, x_all_lin_pred, label = f'pred {name}')

plt.plot(x_train, y_train, label='train')
plt.plot(x_valid, y_valid, label='valid')
plt.xlabel('time')
plt.ylabel('data')
plt.legend()
plt.show()

如何做約束線性回歸 - scikit 學習？

問題描述

1 個解決方案

解決方案1
2 已采納 2020-10-14 03:10:36

如何做約束線性回歸 - scikit 學習？

問題描述

1 個解決方案

解決方案1 2 已采納 2020-10-14 03:10:36

解決方案1
2 已采納 2020-10-14 03:10:36