Day 2 编程实战:线性回归与正则化
·
Day 2 编程实战:线性回归与正则化
实战目标
- 手动实现最小二乘法和梯度下降
- 使用sklearn实现线性回归、Ridge、Lasso
- 理解正则化对过拟合的影响
- 应用到金融数据预测股票收益率
1. 导入必要的库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import warnings
warnings.filterwarnings('ignore')
# 设置全局样式
sns.set_style("whitegrid") # 可选:whitegrid, darkgrid, white, dark, ticks
sns.set_palette("husl") # 设置调色板
plt.rcParams['font.sans-serif'] = ['SimHei', 'DejaVu Sans'] # 中文字体
plt.rcParams['axes.unicode_minus'] = False # 解决负号问题
# 启用LaTeX渲染(如果系统安装了LaTeX)
plt.rcParams['text.usetex'] = False # 设为False避免LaTeX依赖
2. 手搓最小二乘法线性回归
2.1 生成模拟数据
# 生成数据:y = 2*x1 + 3*x2 + 5 + 噪声
np.random.seed(42)
n_samples = 100
X = np.random.randn(n_samples, 2)
true_beta = np.array([2, 3])
true_intercept = 5
y = true_intercept + X @ true_beta + np.random.randn(n_samples) * 0.5
print("数据形状:")
print(f"X: {X.shape}")
print(f"y: {y.shape}")
print(f"真实参数: 截距={true_intercept}, 系数={true_beta}")
数据形状:
X: (100, 2)
y: (100,)
真实参数: 截距=5, 系数=[2 3]
@是Python 3.5+引入的矩阵乘法运算符
等价于np.dot(X, true_beta)或X.dot(true_beta)
2.2 手搓最小二乘法
class OrdinaryLeastSquares:
"""手动实现最小二乘法"""
def __init__(self):
self.coef_ = None
self.intercept_ = None
def fit(self, X, y):
# 添加截距项(一列1)
X_with_intercept = np.column_stack([np.ones(X.shape[0]), X])
# 正规方程:β = (X^T X)^(-1) X^T y
# 使用伪逆处理奇异矩阵
beta = np.linalg.pinv(X_with_intercept.T @ X_with_intercept) @ X_with_intercept.T @ y
self.intercept_ = beta[0]
self.coef_ = beta[1:]
return self
def predict(self, X):
return self.intercept_ + X @ self.coef_
def score(self, X, y):
"""计算R²"""
y_pred = self.predict(X)
ss_res = np.sum((y - y_pred) ** 2)
ss_tot = np.sum((y - np.mean(y)) ** 2)
return 1 - (ss_res / ss_tot)
# 训练模型
ols_manual = OrdinaryLeastSquares()
ols_manual.fit(X, y)
print("最小二乘法结果:")
print(f"截距: {ols_manual.intercept_:.4f} (真实: {true_intercept})")
print(f"系数: {ols_manual.coef_} (真实: {true_beta})")
print(f"R²: {ols_manual.score(X, y):.4f}")
# 与sklearn对比验证
from sklearn.linear_model import LinearRegression
lr_sklearn = LinearRegression()
lr_sklearn.fit(X, y)
print("\nsklearn LinearRegression结果:")
print(f"截距: {lr_sklearn.intercept_:.4f}")
print(f"系数: {lr_sklearn.coef_}")
print(f"R²: {lr_sklearn.score(X, y):.4f}")
# 验证一致性
print("\n两者是否一致?")
print(f"截距差异: {abs(ols_manual.intercept_ - lr_sklearn.intercept_):.10f}")
print(f"系数差异: {np.abs(ols_manual.coef_ - lr_sklearn.coef_).sum():.10f}")
最小二乘法结果:
截距: 5.0464 (真实: 5)
系数: [2.09536017 2.91392895] (真实: [2 3])
R²: 0.9772
sklearn LinearRegression结果:
截距: 5.0464
系数: [2.09536017 2.91392895]
R²: 0.9772
两者是否一致?
截距差异: 0.0000000000
系数差异: 0.0000000000
3. 手搓梯度下降线性回归
3.1 批量梯度下降实现
class GradientDescentLinearRegression:
"""
手动实现梯度下降线性回归
该类实现了使用梯度下降算法求解线性回归的完整过程,
包括参数初始化、损失函数计算、梯度更新等核心步骤
"""
def __init__(self, learning_rate=0.01, n_iterations=1000, tolerance=1e-6):
"""
初始化梯度下降线性回归模型
Parameters
----------
- learning_rate: 学习率,控制参数更新的步长,默认0.01
学习率过大可能导致震荡,过小则收敛缓慢
- n_iterations: 最大迭代次数,默认1000次
控制算法最多运行多少轮参数更新
- tolerance: 收敛容忍度,默认1e-6
当损失函数变化小于该值时停止迭代
"""
self.learning_rate = learning_rate
self.n_iterations = n_iterations
self.tolerance = tolerance
self.coef_ = None
self.intercept_ = None
self.loss_history = []
def fit(self, X, y):
n_samples, n_features = X.shape
# 初始化参数
self.intercept_ = 0
self.coef_ = np.zeros(n_features)
# 梯度下降迭代
for i in range(self.n_iterations):
# 预测值
y_pred = self.intercept_ + X @ self.coef_
# 计算梯度
# ∂J/∂intercept = (1/m) * Σ(y_pred - y)
gradient_intercept = (1/n_samples) * np.sum(y_pred - y)
# ∂J/∂coef = (1/m) * X^T (y_pred - y)
gradient_coef = (1/n_samples) * X.T @ (y_pred - y)
# 更新参数
self.intercept_ -= self.learning_rate * gradient_intercept
self.coef_ -= self.learning_rate * gradient_coef
# 记录损失
loss = np.mean((y_pred - y) ** 2) / 2
self.loss_history.append(loss)
# 早停条件
if i > 0 and abs(self.loss_history[-1] - self.loss_history[-2]) < self.tolerance:
print(f"在第{i}次迭代收敛")
break
return self
def predict(self, X):
return self.intercept_ + X @ self.coef_
# 训练梯度下降模型
gd_model = GradientDescentLinearRegression(learning_rate=0.1, n_iterations=500)
gd_model.fit(X, y)
print("梯度下降结果:")
print(f"截距: {gd_model.intercept_:.4f}")
print(f"系数: {gd_model.coef_}")
print(f"R²: {r2_score(y, gd_model.predict(X)):.4f}")
# 绘制损失曲线
plt.figure(figsize=(10, 5))
plt.plot(gd_model.loss_history)
plt.xlabel('迭代次数')
plt.ylabel('损失值')
plt.title('梯度下降损失收敛曲线')
plt.yscale('log')
plt.grid(True)
plt.show()
在第94次迭代收敛
梯度下降结果:
截距: 5.0450
系数: [2.09187444 2.9142329 ]
R²: 0.9772

3.2 不同学习率的对比实验
def compare_learning_rates(X, y, learning_rates):
"""对比不同学习率的收敛效果"""
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
axes = axes.ravel()
for idx, lr in enumerate(learning_rates):
model = GradientDescentLinearRegression(learning_rate=lr, n_iterations=200)
model.fit(X, y)
axes[idx].plot(model.loss_history)
axes[idx].set_title(f'学习率 = {lr}')
axes[idx].set_xlabel('迭代次数')
axes[idx].set_ylabel('损失值')
axes[idx].set_yscale('log')
axes[idx].grid(True)
final_loss = model.loss_history[-1]
axes[idx].text(0.7, 0.8, f'最终损失: {final_loss:.4f}',
transform=axes[idx].transAxes)
plt.tight_layout()
plt.show()
# 测试不同学习率
learning_rates = [0.001, 0.1, 0.5, 2.0]
compare_learning_rates(X, y, learning_rates)
在第94次迭代收敛
在第19次迭代收敛

观察结论:
- 学习率太小(0.001): 收敛慢
- 学习率适中(0.1-0.5): 快速稳定收敛
- 学习率太大(2.0): 可能震荡或发散
4. 特征缩放的重要性
4.1 创建量纲差异大的数据
# 生成量纲差异大的数据
np.random.seed(42)
X_unscaled = np.random.randn(100, 2)
X_unscaled[:, 0] = X_unscaled[:, 0] * 1000 # 特征1范围 ~ [-3000, 3000]
X_unscaled[:, 1] = X_unscaled[:, 1] * 0.01 # 特征2范围 ~ [-0.03, 0.03]
y_unscaled = 2 * X_unscaled[:, 0] + 3 * X_unscaled[:, 1] + np.random.randn(100) * 10
print("未缩放特征的统计:")
print(f"特征1: 均值={X_unscaled[:, 0].mean():.2f}, 标准差={X_unscaled[:, 0].std():.2f}")
print(f"特征2: 均值={X_unscaled[:, 1].mean():.4f}, 标准差={X_unscaled[:, 1].std():.4f}")
未缩放特征的统计:
特征1: 均值=-115.56, 标准差=852.02
特征2: 均值=0.0003, 标准差=0.0099
4.2 对比缩放前后的收敛速度
# 标准缩放
# StandardScaler 是 scikit-learn 中最常用的特征标准化工具,用于将特征缩放到零均值和单位方差
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_unscaled)
print("缩放后特征统计:")
print(f"特征1: 均值={X_scaled[:, 0].mean():.4f}, 标准差={X_scaled[:, 0].std():.4f}")
print(f"特征2: 均值={X_scaled[:, 1].mean():.4f}, 标准差={X_scaled[:, 1].std():.4f}")
# 对比训练
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
# 未缩放数据
model_unscaled = GradientDescentLinearRegression(learning_rate=0.01, n_iterations=2000)
model_unscaled.fit(X_unscaled, y_unscaled)
axes[0].plot(model_unscaled.loss_history)
axes[0].set_title('未缩放特征')
axes[0].set_xlabel('迭代次数')
axes[0].set_ylabel('损失值')
axes[0].set_yscale('log')
axes[0].grid(True)
# 缩放数据
model_scaled = GradientDescentLinearRegression(learning_rate=0.01, n_iterations=2000)
model_scaled.fit(X_scaled, y_unscaled)
axes[1].plot(model_scaled.loss_history)
axes[1].set_title('缩放后特征')
axes[1].set_xlabel('迭代次数')
axes[1].set_ylabel('损失值')
axes[1].set_yscale('log')
axes[1].grid(True)
plt.tight_layout()
plt.show()
print("结论:特征缩放后梯度下降收敛更快、更稳定!")
缩放后特征统计:
特征1: 均值=0.0000, 标准差=1.0000
特征2: 均值=0.0000, 标准差=1.0000
在第1213次迭代收敛
结论:特征缩放后梯度下降收敛更快、更稳定!

5. 正则化:Ridge vs Lasso
5.1 创建过拟合场景
# 生成高维数据(特征数 > 样本数,易过拟合)
np.random.seed(42)
n_samples_small = 30
n_features_many = 50
X_high_dim = np.random.randn(n_samples_small, n_features_many)
true_coef = np.zeros(n_features_many)
true_coef[:5] = [2, -1, 3, 0.5, -2] # 只有5个有效特征
y_high_dim = X_high_dim @ true_coef + np.random.randn(n_samples_small) * 0.5
# 划分数据集
X_train_h, X_test_h, y_train_h, y_test_h = train_test_split(
X_high_dim, y_high_dim, test_size=0.3, random_state=42
)
# 训练普通线性回归(会过拟合)
lr_overfit = LinearRegression()
lr_overfit.fit(X_train_h, y_train_h)
train_pred = lr_overfit.predict(X_train_h)
test_pred = lr_overfit.predict(X_test_h)
print("普通线性回归(过拟合):")
print(f"训练集R²: {r2_score(y_train_h, train_pred):.4f}")
print(f"测试集R²: {r2_score(y_test_h, test_pred):.4f}")
print(f"非零系数个数: {np.sum(np.abs(lr_overfit.coef_) > 1e-6)}/{n_features_many}")
普通线性回归(过拟合):
训练集R²: 1.0000
测试集R²: 0.7287
非零系数个数: 50/50
5.2 Ridge回归(L2正则化)
# 不同alpha值的Ridge回归
alphas = [0.001, 0.01, 0.1, 1, 10, 100]
ridge_results = []
plt.figure(figsize=(12, 5))
# 系数随alpha的变化
plt.subplot(1, 2, 1)
for alpha in alphas:
ridge = Ridge(alpha=alpha)
ridge.fit(X_train_h, y_train_h)
plt.plot(range(n_features_many), ridge.coef_, 'o-', alpha=0.7, label=f'α={alpha}')
plt.xlabel('特征索引')
plt.ylabel('系数值')
plt.title('Ridge系数随α的变化')
plt.legend(bbox_to_anchor=(1.05, 1))
plt.grid(True)
# 性能对比
plt.subplot(1, 2, 2)
train_scores = []
test_scores = []
for alpha in alphas:
ridge = Ridge(alpha=alpha)
ridge.fit(X_train_h, y_train_h)
train_scores.append(r2_score(y_train_h, ridge.predict(X_train_h)))
test_scores.append(r2_score(y_test_h, ridge.predict(X_test_h)))
plt.semilogx(alphas, train_scores, 'o-', label='训练集')
plt.semilogx(alphas, test_scores, 'o-', label='测试集')
plt.xlabel('α (正则化强度)')
plt.ylabel('$R^2$')
plt.title('Ridge回归性能随α的变化')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()
print("\nRidge观察结论:")
print("- α很小时: 接近普通线性回归,训练集表现好但可能过拟合")
print("- α增大: 测试集表现提升,过拟合缓解")
print("- α太大: 欠拟合,训练和测试表现都下降")

Ridge观察结论:
- α很小时: 接近普通线性回归,训练集表现好但可能过拟合
- α增大: 测试集表现提升,过拟合缓解
- α太大: 欠拟合,训练和测试表现都下降
5.3 Lasso回归(L1正则化)
# Lasso回归的特征选择效果
alphas_lasso = [0.001, 0.01, 0.05, 0.1, 0.5, 1]
plt.figure(figsize=(12, 5))
# 系数随alpha的变化
plt.subplot(1, 2, 1)
for alpha in alphas_lasso:
lasso = Lasso(alpha=alpha, max_iter=5000)
lasso.fit(X_train_h, y_train_h)
plt.plot(range(n_features_many), lasso.coef_, 'o-', alpha=0.7, label=f'α={alpha}')
plt.xlabel('特征索引')
plt.ylabel('系数值')
plt.title('Lasso系数随α的变化(稀疏性)')
plt.legend(bbox_to_anchor=(1.05, 1))
plt.grid(True)
# 非零系数数量
plt.subplot(1, 2, 2)
nonzero_counts = []
for alpha in alphas_lasso:
lasso = Lasso(alpha=alpha, max_iter=5000)
lasso.fit(X_train_h, y_train_h)
nonzero_counts.append(np.sum(np.abs(lasso.coef_) > 1e-6))
plt.plot(alphas_lasso, nonzero_counts, 'o-')
plt.xlabel('α')
plt.ylabel('非零系数个数')
plt.title('Lasso特征选择效果')
plt.grid(True)
plt.tight_layout()
plt.show()
print("\nLasso观察结论:")
print("- Lasso自动进行特征选择")
print("- α越大,保留的特征越少")
print("- 适合从高维特征中选择重要变量")

Lasso观察结论:
- Lasso自动进行特征选择
- α越大,保留的特征越少
- 适合从高维特征中选择重要变量
5.4 Ridge vs Lasso 详细对比
# 在同一数据集上对比
alpha_ridge = 1.0 # 最优Ridge参数
alpha_lasso = 0.01 # 最优Lasso参数
ridge = Ridge(alpha=alpha_ridge)
lasso = Lasso(alpha=alpha_lasso, max_iter=5000)
lr = LinearRegression()
models = {
'Linear': lr,
'Ridge': ridge,
'Lasso': lasso
}
results = []
for name, model in models.items():
model.fit(X_train_h, y_train_h)
train_pred = model.predict(X_train_h)
test_pred = model.predict(X_test_h)
results.append({
'Model': name,
'Train R²': r2_score(y_train_h, train_pred),
'Test R²': r2_score(y_test_h, test_pred),
'Non-zero Coef': np.sum(np.abs(model.coef_) > 1e-6) if hasattr(model, 'coef_') else n_features_many
})
results_df = pd.DataFrame(results)
print(results_df.to_string(index=False))
# 可视化系数对比
plt.figure(figsize=(12, 4))
for idx, (name, model) in enumerate(models.items()):
plt.subplot(1, 3, idx+1)
coefs = model.coef_ if hasattr(model, 'coef_') else np.zeros(n_features_many)
plt.bar(range(n_features_many), coefs)
plt.xlabel('特征索引')
plt.ylabel('系数值')
plt.title(f'{name}\n非零系数: {np.sum(np.abs(coefs) > 1e-6)}/{n_features_many}')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
Model Train R² Test R² Non-zero Coef
Linear 1.000000 0.728663 50
Ridge 0.999485 0.724517 50
Lasso 0.999918 0.842230 18

6. 金融应用:预测股票收益率
6.1 生成模拟金融数据
# 生成模拟股票数据
np.random.seed(42)
n_days = 1000
# 构造特征(技术指标)
returns = np.random.randn(n_days) * 0.02 # 日收益率
# 将 NumPy 数组转换为 Pandas Series 以便使用 rolling 方法
returns_series = pd.Series(returns)
volatility = returns_series.abs().rolling(20).mean().fillna(0.02) # 波动率
volume = np.random.randn(n_days) * 1000000 + 5000000 # 成交量
volume_series = pd.Series(volume)
# 构造特征矩阵
features = pd.DataFrame({
'ret_1': returns_series.shift(1), # 昨日收益率
'ret_2': returns_series.shift(2), # 2日前收益率
'ret_5': returns_series.rolling(5).mean(), # 5日均线
'volume_change': volume_series.pct_change(), # 成交量变化率
'volatility': volatility # 波动率
})
# 目标变量:未来5日收益率
target = returns_series.rolling(5).sum().shift(-5)
# 删除缺失值
df_stock = pd.concat([features, target], axis=1)
df_stock.columns = ['ret_1', 'ret_2', 'ret_5', 'volume_change', 'volatility', 'future_return']
df_stock = df_stock.dropna()
print("金融数据形状:", df_stock.shape)
df_stock.head()
金融数据形状: (991, 6)
| ret_1 | ret_2 | ret_5 | volume_change | volatility | future_return | |
|---|---|---|---|---|---|---|
| 4 | 0.030461 | 0.012954 | 0.009180 | 0.309015 | 0.02 | 0.043712 |
| 5 | -0.004683 | 0.030461 | 0.006257 | -0.053479 | 0.02 | 0.039126 |
| 6 | -0.004683 | -0.004683 | 0.013127 | 0.093021 | 0.02 | -0.001773 |
| 7 | 0.031584 | -0.004683 | 0.013606 | -0.044107 | 0.02 | -0.012282 |
| 8 | 0.015349 | 0.031584 | 0.005636 | 0.073535 | 0.02 | -0.041158 |
6.2 数据划分与模型训练
# 按时间顺序划分
split_idx = int(len(df_stock) * 0.7)
train_df = df_stock[:split_idx]
test_df = df_stock[split_idx:]
X_train_f = train_df.drop('future_return', axis=1)
y_train_f = train_df['future_return']
X_test_f = test_df.drop('future_return', axis=1)
y_test_f = test_df['future_return']
# 标准化特征
scaler_f = StandardScaler()
X_train_scaled = scaler_f.fit_transform(X_train_f)
X_test_scaled = scaler_f.transform(X_test_f)
print(f"训练集大小: {len(train_df)}")
print(f"测试集大小: {len(test_df)}")
# 训练不同模型
models_finance = {
'Linear Regression': LinearRegression(),
'Ridge (α=1)': Ridge(alpha=1),
'Ridge (α=10)': Ridge(alpha=10),
'Lasso (α=0.01)': Lasso(alpha=0.01),
'Lasso (α=0.1)': Lasso(alpha=0.1),
'Elastic Net': ElasticNet(alpha=0.01, l1_ratio=0.5)
}
# 评估
results_finance = []
for name, model in models_finance.items():
model.fit(X_train_scaled, y_train_f)
train_pred = model.predict(X_train_scaled)
test_pred = model.predict(X_test_scaled)
results_finance.append({
'Model': name,
'Train MSE': mean_squared_error(y_train_f, train_pred),
'Test MSE': mean_squared_error(y_test_f, test_pred),
'Train R²': r2_score(y_train_f, train_pred),
'Test R²': r2_score(y_test_f, test_pred)
})
results_finance_df = pd.DataFrame(results_finance)
print(results_finance_df.to_string(index=False))
训练集大小: 693
测试集大小: 298
Model Train MSE Test MSE Train R² Test R²
Linear Regression 0.001757 0.002146 0.030278 -0.095801
Ridge (α=1) 0.001757 0.002146 0.030278 -0.095681
Ridge (α=10) 0.001757 0.002143 0.030270 -0.094621
Lasso (α=0.01) 0.001812 0.002048 0.000000 -0.046115
Lasso (α=0.1) 0.001812 0.002048 0.000000 -0.046115
Elastic Net 0.001794 0.002052 0.010203 -0.048057
6.3 特征重要性分析
# 提取最佳模型的系数(以Ridge α=1为例)
best_ridge = Ridge(alpha=1)
best_ridge.fit(X_train_scaled, y_train_f)
# 可视化特征重要性
feature_names = X_train_f.columns
coef_df = pd.DataFrame({
'Feature': feature_names,
'Coefficient': best_ridge.coef_
})
coef_df = coef_df.sort_values('Coefficient', key=abs, ascending=False)
plt.figure(figsize=(10, 6))
plt.barh(coef_df['Feature'], coef_df['Coefficient'])
plt.xlabel('系数值')
plt.ylabel('特征')
plt.title('Ridge回归特征重要性(绝对值越大越重要)')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
print("\n特征重要性排序:")
for idx, row in coef_df.iterrows():
print(f"{row['Feature']}: {row['Coefficient']:.4f}")

特征重要性排序:
volatility: -0.0068
ret_5: -0.0036
volume_change: -0.0009
ret_2: 0.0004
ret_1: 0.0004
6.4 预测可视化
# 使用最佳模型进行预测
final_model = Ridge(alpha=1)
final_model.fit(X_train_scaled, y_train_f)
test_pred_final = final_model.predict(X_test_scaled)
# 绘制预测vs真实
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.scatter(y_test_f, test_pred_final, alpha=0.5)
plt.plot([y_test_f.min(), y_test_f.max()],
[y_test_f.min(), y_test_f.max()], 'r--', lw=2)
plt.xlabel('真实收益率')
plt.ylabel('预测收益率')
plt.title(f'预测 vs 真实 (测试集 R²={r2_score(y_test_f, test_pred_final):.4f})')
plt.grid(True, alpha=0.3)
plt.subplot(1, 2, 2)
plt.plot(y_test_f.values, label='真实值', alpha=0.7)
plt.plot(test_pred_final, label='预测值', alpha=0.7)
plt.xlabel('样本序号')
plt.ylabel('未来5日收益率')
plt.title('时间序列预测效果')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

7. 超参数调优实践
7.1 使用交叉验证选择最佳
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
# 创建时间序列交叉验证
tscv = TimeSeriesSplit(n_splits=5)
# Ridge调参
param_grid_ridge = {'alpha': [0.001, 0.01, 0.1, 1, 10, 50, 100]}
ridge_cv = GridSearchCV(Ridge(), param_grid_ridge, cv=tscv, scoring='r2')
ridge_cv.fit(X_train_scaled, y_train_f)
# Lasso调参
param_grid_lasso = {'alpha': [0.0001, 0.001, 0.01, 0.05, 0.1, 0.5]}
lasso_cv = GridSearchCV(Lasso(max_iter=5000), param_grid_lasso, cv=tscv, scoring='r2')
lasso_cv.fit(X_train_scaled, y_train_f)
print("Ridge最佳参数:")
print(f"α = {ridge_cv.best_params_['alpha']}")
print(f"最佳CV R² = {ridge_cv.best_score_:.4f}")
print("\nLasso最佳参数:")
print(f"α = {lasso_cv.best_params_['alpha']}")
print(f"最佳CV R² = {lasso_cv.best_score_:.4f}")
# 使用最佳模型
best_ridge = ridge_cv.best_estimator_
best_lasso = lasso_cv.best_estimator_
test_ridge_pred = best_ridge.predict(X_test_scaled)
test_lasso_pred = best_lasso.predict(X_test_scaled)
print("\n测试集表现:")
print(f"Ridge (α={ridge_cv.best_params_['alpha']}): R²={r2_score(y_test_f, test_ridge_pred):.4f}")
print(f"Lasso (α={lasso_cv.best_params_['alpha']}): R²={r2_score(y_test_f, test_lasso_pred):.4f}")
Ridge最佳参数:
α = 100
最佳CV R² = -0.0382
Lasso最佳参数:
α = 0.05
最佳CV R² = -0.0434
测试集表现:
Ridge (α=100): R²=-0.0862
Lasso (α=0.05): R²=-0.0461
8. 今日总结
核心要点回顾
线性回归基础:
- 最小二乘法:解析解,直接计算
- 梯度下降:迭代优化,适合大数据
- 特征缩放:必不可少
正则化:
- Ridge (L2):系数收缩,处理共线性
- Lasso (L1):稀疏解,特征选择
- Elastic Net:结合两者
调参:
- α越大,正则化越强
- 使用交叉验证选择最优α
- 时间序列数据用TimeSeriesSplit
量化应用:
- 多因子模型
- 收益率预测
- 风险控制
今日练习
# 练习1:修改梯度下降的学习率和迭代次数,观察收敛曲线
# 练习2:在金融数据上尝试不同的α值,找出最优参数
# 练习3:比较Ridge和Lasso在特征选择上的差异
# 练习4:尝试Elastic Net,调整l1_ratio参数
思考题
- 为什么金融时间序列数据不能使用随机交叉验证?
- 在什么情况下Lasso会比Ridge更好?
- 如何判断正则化强度是否过大?
AtomGit 是由开放原子开源基金会联合 CSDN 等生态伙伴共同推出的新一代开源与人工智能协作平台。平台坚持“开放、中立、公益”的理念,把代码托管、模型共享、数据集托管、智能体开发体验和算力服务整合在一起,为开发者提供从开发、训练到部署的一站式体验。
更多推荐


所有评论(0)