《机器学习》课后习题3.3 对率回归编程实现
·
参考了han同学的答案,西瓜数据集也可在han同学的github上下载。
3.3 编程实现对率回归,并给出西瓜数据集 3.0α 上的结果.
代码
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn import linear_model
# 下面这两行是为了让matplotlib画图时的汉字正常显示
import matplotlib
matplotlib.rc("font",family='YouYuan')
# sigmoid函数
def sigmoid(s):
s = 1 / (1 + np.exp(-s))
return s
# 公式 3.27,最小化对数似然的相反数
def J_cost(X, y, beta):
"""
:param X: array , shape(num_sample, num_features)
:param y: array, shape(num_sample, 1)
:param beta: array, shape(num_features+1, 1)
:return: 公式3.27的结果,即对数似然相反数的值
"""
X_hat = np.c_[X, np.ones((X.shape[0], 1))]
y = y.reshape(-1, 1)
beta = beta.reshape(-1, 1)
lbeta = -y*np.dot(X_hat, beta) + np.log(1 + np.exp(np.dot(X_hat, beta)))
return lbeta.sum()
# 公式3.30,beta的一阶导数
def gradient(X, y, beta):
'''
:param X:
:param y:
:param beta:
:return: result of formula 3.30
'''
X_hat = np.c_[X, np.ones((X.shape[0], 1))]
beta = beta.reshape(-1, 1)
y = y.reshape(-1, 1)
p1 = np.exp(np.dot(X_hat, beta)) / (1 + np.exp(np.dot(X_hat, beta)))
grad = (-X_hat*(y - p1)).sum(0)
return grad.reshape(-1, 1)
# 梯度下降法,更新beta
def update_parameters_gradDesc(X, y, beta, learning_rate, num_iterations, print_cost):
'''
使用梯度下降法更新beta,输出对数似然相反数 公式3.27 的值
:param X:
:param y:
:param beta:
:param learning_rate:
:param num_iterations:
:param print_cost:
:return:
'''
for i in range(num_iterations):
grad = gradient(X, y, beta)
beta -= learning_rate * grad
if (i % 100 == 0) & print_cost:
print('{}th iteration, cost is {} '.format(i, J_cost(X, y, beta)))
return beta
# 牛顿法,海森因矩阵
def hessian(X, y, beta):
'''
根据公式3.31求Lbeta二阶导
:param X:
:param y:
:param beta:
:return:
'''
X_hat = np.c_[X, np.ones((X.shape[0], 1))]
y = y.reshape(-1, 1)
beta = beta.reshape(-1, 1)
p1 = np.exp(np.dot(X_hat, beta)) / (1 + np.exp(np.dot(X_hat, beta)))
m, n = X.shape
# np.eye(m)是一个单位阵,P是方阵
P = np.eye(m) * p1 * (1 -p1)
assert P.shape[0] == P.shape[1]
hess = np.dot(np.dot(X_hat.T, P), X_hat)
return hess
# 牛顿法更新参数
def update_parameters_Newton(X, y, beta, num_iterations, print_cost):
'''
根据公式3.29更新beta
:param X:
:param y:
:param beta:
:param learning_rate:
:param num_iterations:
:param print_cost:
:return: beta
'''
for i in range(num_iterations):
grad = gradient(X, y, beta)
hess = hessian(X, y, beta)
beta -= np.dot(np.linalg.inv(hess), grad)
if (i % 100 == 0) & print_cost:
print('{}th iteration, cost is {}'.format(i, J_cost(X, y, beta)))
return beta
# 初始化beta
def initialize_beta(n):
# randn产生的样本符合正态分布
beta = np.random.randn(n + 1, 1) * 0.5 + 1
# print(beta)
return beta
# 对数几率模型
def logistic_beta(X, y, num_iterations=100, learning_rate=1.2, print_cost=False, method='gradDesc'):
'''
:param X: array shape(num_sample, num_feature)
:param y:
:param num_iterations: 迭代次数
:param learning_rate: 学习率
:param print_cost: 是否打印对数似然相反数
:param method: 所用方法
:return: beta array shape(num_feature + 1, 1)
'''
m, n = X.shape
beta = initialize_beta(n)
if method == 'gradDesc':
return update_parameters_gradDesc(X, y, beta, learning_rate, num_iterations, print_cost)
elif method == 'Newton':
return update_parameters_Newton(X, y, beta, num_iterations, print_cost)
else:
raise ValueError('Unknown error of %s' % method)
# 预测
def predict(X, beta):
X_hat = np.c_[X, np.ones((X.shape[0], 1))]
beta.reshape(-1, 1)
p1 = np.exp(np.dot(X_hat, beta)) / (1 + np.exp(np.dot(X_hat, beta)))
p1[p1 > 0.5] = 1
p1[p1 <= 0.5] = 0
temp = np.c_[X, p1]
print(temp)
return p1
if __name__ == '__main__':
data_path = r'C:\Users\***\3.3\watermelon3_0_Ch.csv '
# 加载数据
data = pd.read_csv(data_path).values
# print(data)
is_good = data[:, 9] == '是'
# print(is_good)
is_bad = data[:, 9] == '否'
# print(is_bad)
# 7、8列数据,密度和含糖量
X = data[:, 7:9].astype(float)
# print(X)
# 第九列数据,样本标记‘是’或‘否’
y = data[:, 9]
# print(y)
y[y == '是'] = 1
y[y == '否'] = 0
y = y.astype(int)
# 绘图:密度为行,含糖量为列,好瓜为黑圈,坏瓜为红×
plt.scatter(data[:, 7][is_good], data[:, 8][is_good], c='k', marker='o')
plt.scatter(data[:, 7][is_bad], data[:, 8][is_bad], c='r', marker='x')
plt.xlabel('密度')
plt.ylabel('含糖量')
# 可视化模型结果
beta = logistic_beta(X, y, num_iterations=1000, learning_rate=2.0, print_cost=True, method='gradDesc')
w1, w2, intercept = beta
# 预测结果
predict(X, beta)
x1 = np.linspace(0, 1)
# 下面这个式子我不是很明白,我觉得y1也是一个属性,对应参数w2,w2y1 = -(w1x + b),两边相减结果为0
y1 = -(w1*x1 + intercept) / w2
# print(x1)
# print(y1)
plt.plot(x1, y1, label=r'my_logistic_gradDesc')
beta = logistic_beta(X, y, num_iterations=1000, learning_rate=2.0, print_cost=True, method='Newton')
w1, w2, intercept = beta
# 预测结果
predict(X, beta)
x1 = np.linspace(0, 1)
# 下面这个式子我不是很明白,我觉得y1也是一个属性,对应参数w2,w2y1 = -(w1x + b),两边相减结果为0
y1 = -(w1*x1 + intercept) / w2
plt.plot(x1, y1, label=r'my_logistic_Newton')
# 可视化sklearn Logistic regression 模型结果
# 注意sklearn的逻辑回归中,C越大表示正则化程度越低
lr = linear_model.LogisticRegression(solver='lbfgs', C=100)
lr.fit(X, y)
lr_beta = np.c_[lr.coef_, lr.intercept_]
print('cost of sklearn logistic: {}'.format(J_cost(X, y, lr_beta)))
w1_sk, w2_sk = lr.coef_[0, :]
x2 = np.linspace(0, 1)
y2 = -(w1_sk*x2 + lr.intercept_) / w2_sk
plt.plot(x2, y2, label=r'sklearn logistic')
plt.legend(loc='upper right')
plt.show()
可视化呈现
更多推荐
已为社区贡献3条内容
所有评论(0)