一元线性拟合

现有两组数据,求y=a*x+c的系数

X =[12.46, 0.25, 5.22, 11.3, 6.81, 4.59, 0.66, 14.53, 15.49, 14.43,
    2.19, 1.35, 10.02, 12.93, 5.93, 2.92, 12.81, 4.88, 13.11, 5.8]
Y =[29.01, 4.7, 22.33, 24.99, 18.85, 14.89, 10.58, 36.84, 42.36, 39.73,
    11.92, 7.45, 22.9, 36.62, 16.04, 16.56, 31.55, 20.04, 35.26, 23.59]
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import numpy as np

if __name__ == '__main__':
    X =[12.46, 0.25, 5.22, 11.3, 6.81, 4.59, 0.66, 14.53, 15.49, 14.43,
        2.19, 1.35, 10.02, 12.93, 5.93, 2.92, 12.81, 4.88, 13.11, 5.8]
    Y =[29.01, 4.7, 22.33, 24.99, 18.85, 14.89, 10.58, 36.84, 42.36, 39.73,
        11.92, 7.45, 22.9, 36.62, 16.04, 16.56, 31.55, 20.04, 35.26, 23.59]

    #转换成numpy的ndarray数据格式,n行1列,LinearRegression需要列格式数据,如下:
    X_train = np.array(X).reshape((len(X), 1))
    Y_train = np.array(Y).reshape((len(Y), 1))
    # 转换后数据格式如下
    # X_train = [[12.46], [0.25], [5.22], [11.3], [6.81], [4.59], [0.66], [14.53], [15.49], [14.43], [2.19], [1.35],
    #            [10.02], [12.93], [5.93], [2.92], [12.81], [4.88], [13.11], [5.8]]
    # Y_train = [[29.01], [4.7], [22.33], [24.99], [18.85], [14.89], [10.58], [36.84], [42.36], [39.73], [11.92], [7.45],
    #            [22.9], [36.62], [16.04], [16.56], [31.55], [20.04], [35.26], [23.59]]


    #新建一个线性回归模型,并把数据放进去对模型进行训练
    lineModel = LinearRegression()
    lineModel.fit(X_train, Y_train)

    #用训练后的模型,进行预测
    Y_predict = lineModel.predict(X_train)

    #coef_是系数,intercept_是截距
    a1 = lineModel.coef_[0][0]
    b = lineModel.intercept_[0]
    print("y=%.4f*x+%.4f" % (a1,b))

    #对回归模型进行评分,这里简单使用训练集进行评分,实际很多时候用其他的测试集进行评分
    print("得分", lineModel.score(X_train, Y_train))

    #简单画图显示
    plt.scatter(X, Y, c="blue")
    plt.plot(X_train,Y_predict, c="red")
    plt.show()

结果:

y=2.0532*x+7.1234
得分 0.9149096589144883

 

多元线性回归

这里随机创建X1,X2,X3,Y四个数组,使Y=2*X1-3*X2+X3+8。然后加入一些干扰噪声,再尝试做线性回归。

from sklearn.linear_model import LinearRegression
import numpy as np
import random

if __name__ == '__main__':
    # 随机创建X1,X2,X3,Y。使Y=2*X1-3*X2+X3+8
    X1 = [random.randint(0,100) for i in range(0, 50)]
    X2 = [random.randint(0,50) for i in range(0, 50)]
    X3 = [random.randint(0, 25) for i in range(0, 50)]
    Y =[2*x1-3*x2+x3+8 for x1,x2,x3 in zip(X1,X2,X3)]

    # 组合X1,X2成n行2列数据
    X_train = np.array(X1+X2+X3).reshape((len(X1), 3), order="F")
    Y_train = np.array(Y).reshape((len(Y), 1))

    # 加入噪声干扰
    noise = np.random.randn(50, 1)
    noise = noise - np.mean(noise)

    Y_train = Y_train+noise

    #新建一个线性回归模型,并把数据放进去对模型进行训练
    lineModel = LinearRegression()
    lineModel.fit(X_train, Y_train)

    #用训练后的模型,进行预测
    Y_predict = lineModel.predict(X_train)

    #coef_是系数,intercept_是截距
    a_arr = lineModel.coef_[0]
    b = lineModel.intercept_[0]
    f=""
    for i in range(0,len(a_arr)):
        ai=a_arr[i]
        if ai>=0:
            ai = "+%.4f" %(ai)
        else:
            ai = "%.4f" % (ai)
        f = f+"%s*x%s"%(ai, str(i+1))
    f="y=%s+%.4f" % (f[1:],b)

    print("拟合方程",f)
    #对回归模型进行评分,这里简单使用训练集进行评分,实际很多时候用其他的测试集进行评分
    print("得分", lineModel.score(X_train, Y_train))

结果:

拟合方程 y=1.9972*x1-3.0115*x2+1.0597*x3+7.7271
得分 0.9997880910740103

结果与预先设定的Y=2*X1-3*X2+X3+8相近,这里采用随机产生样本,每次运行结果不尽相同。

由于是多维,不方便画图展示。

 

 

author:蓝何忠

email:lanhezhong@163.com

Logo

旨在为数千万中国开发者提供一个无缝且高效的云端环境,以支持学习、使用和贡献开源项目。

更多推荐