0
点赞
收藏
分享

微信扫一扫

(Java企业 / 公司项目)阿里云aliyun-对象存储OSS详细从开通到配置(微服务架构选用)

目录

3.2.1        原理简介

1.普通线性回归

2.广义线性模型

3.逻辑回归

3.2.2        算法步骤

3.2.3        实战

1.数据集

2.sklearn实现

3.算法实现

3.2.4        实验

1.实验目的

2.实验数据

3.实验要求

3.2.1        原理简介

1.普通线性回归

2.广义线性模型

3.逻辑回归

3.2.2        算法步骤

3.2.3        实战

1.数据集

2.sklearn实现

from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split
import numpy as np
import pytest


def load_data():
diabetes = datasets.load_diabetes()
return train_test_split(diabetes.data, diabetes.target, test_size=0.25, random_state=0)


@pytest.fixture
def data():
"""Fixture to load data."""
return load_data()


def test_LinearRegression(data):
X_train, X_test, y_train, y_test = data
regression = linear_model.LinearRegression()
regression.fit(X_train, y_train)
print('\nCoefficients:%s, intercept:%.2f' % (regression.coef_, regression.intercept_))
print("Residual sum of squares:%.2f" % np.mean((regression.predict(X_test) - y_test) ** 2))
print('Score:%.2f' % regression.score(X_test, y_test))

代码结果如下: 

def load_iris():
iris = datasets.load_iris()
x = iris.data
y = iris.target
return model_selection.train_test_split(x, y, test_size=0.3, random_state=1, shuffle=True, stratify=y)


@pytest.fixture
def data2():
"""Fixture to load data."""
return load_iris()


def test_LogisticRegression(data2):
X_train, X_test, y_train, y_test = data2
regression = linear_model.LogisticRegression()
regression.fit(X_train, y_train)
print('\nCoefficients:%s, intercept:%s' % (regression.coef_, regression.intercept_))
print('Score:%.2f' % regression.score(X_test, y_test))

 代码结果如下

def test_LogisticRegression_multinomial(data2):
X_train, X_test, y_train, y_test = data2
regression = linear_model.LogisticRegression(multi_class='multinomial', solver='lbfgs')
regression.fit(X_train, y_train)
print('\nCoefficients:%s, intercept:%s' % (regression.coef_, regression.intercept_))
print('Score:%.2f' % regression.score(X_test, y_test))

代码结果如下 

def test_LogisticRegression_C(data2):
X_train, X_test, y_train, y_test = data2
Cs = np.logspace(-2, 4, num=100)
scores = []
for C in Cs:
regression = linear_model.LogisticRegression(C=C)
regression.fit(X_train, y_train)
scores.append(regression.score(X_test, y_test))
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(Cs, scores)
ax.set_xlabel(r"C")
ax.set_ylabel(r"score")
ax.set_xscale('log')
ax.set_title(r"Logistic Regression")
plt.show()

 代码结果如下

3.算法实现

from sklearn.datasets import load_iris
import matplotlib.pyplot as plt
import numpy as np


iris = load_iris()
data = iris.data
target = iris.target
X = data[0:100, [0, 2]]
y = target[0:100]
label = np.array(y)
index_0 = np.where(label == 0)
plt.scatter(X[index_0, 0], X[index_0, 1], marker='x', color='b', label='0', s=15)
index_1 = np.where(label == 1)
plt.scatter(X[index_1, 0], X[index_1, 1], marker='o', color='r', label='1', s=15)
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend(loc='upper left')
plt.show()

代码结果如下 

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris


# Iris 数据集中的目标变量 y 是整数向量(类标签),
# 但逻辑回归模型期望 y 为二进制格式(0 和 1)以进行二进制分类。需要相应地转换标签。
class LogisticRegressionBinary(object):
def __init__(self):
self.W = None

def train(self, X, y, lr=0.01, num_iters=5000):
num_train, num_feature = X.shape
self.W = 0.001 * np.random.randn(num_feature, 1).reshape((-1, 1))
loss = []
for i in range(num_iters):
error, dW = self.compute_loss(X, y)
self.W += - lr * dW
loss.append(error)
if i % 200 == 0:
print('i= %d, error= %f' % (i, error))
return loss

def compute_loss(self, X, y):
num_train = X.shape[0]
h = self.output(X)
loss = - np.sum((y * np.log(h) + (1 - y) * np.log(1 - h))) / num_train
dW = X.T.dot(h - y) / num_train
return loss, dW

def output(self, X):
g = np.dot(X, self.W)
return self.sigmoid(g)

def sigmoid(self, X):
return 1 / (1 + np.exp(-X))

def predict(self, X_test):
h = self.output(X_test)
return np.where(h >= 0.5, 1, 0)


# 加载 Iris 数据集
iris = load_iris()
X = iris.data
y = iris.target

# 鸢尾花数据集有三个类(0,1,2),筛选出类 0 和 1 的数据
# 对于二元分类,筛选类 0 和 1 的数据
binary_filter = y < 2
X = X[binary_filter]
y = y[binary_filter].reshape((-1, 1))

# 在 X 矩阵左侧添加全 1 的列,说明模型中的截距项
one = np.ones((X.shape[0], 1))
X_train = np.hstack((one, X))

# 训练 Logistic 回归模型,使用 Logistic 回归进行二进制分类。
classify = LogisticRegressionBinary()
loss = classify.train(X_train, y)

# 输出学习的权重
print("Learned weights:\n", classify.W)

# 绘制迭代的损失曲线
plt.plot(loss)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.title('Loss curve for Logistic Regression')
plt.show()

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_iris


class logistic(object):
def __init__(self):
self.W = None

def train(self, X, y, lr=0.01, num_iters=5000):
num_train, num_feature = X.shape
self.W = 0.001 * np.random.randn(num_feature, 1).reshape((-1, 1))
loss = []
for i in range(num_iters):
error, dW = self.compute_loss(X, y)
self.W += - lr * dW
loss.append(error)
if i % 200 == 0:
print('i= %d, error= %f' % (i, error))
return loss

def compute_loss(self, X, y):
num_train = X.shape[0]
h = self.output(X)
loss = - np.sum((y * np.log(h) + (1 - y) * np.log(1 - h))) / num_train
dW = X.T.dot(h - y) / num_train
return loss, dW

def output(self, X):
g = np.dot(X, self.W)
return self.sigmoid(g)

def sigmoid(self, X):
return 1 / (1 + np.exp(-X))

def predict(self, X_test):
h = self.output(X_test)
return np.where(h >= 0.5, 1, 0)


iris = load_iris()
data = iris.data
target = iris.target
X = data[0:100, [0, 2]]
y = target[0:100]
y = y.reshape((-1, 1))
one = np.ones((X.shape[0], 1))
X_train = np.hstack((one, X))
classify = logistic()
loss = classify.train(X_train, y)
label = np.array(y)
index_0 = np.where(label == 0)
plt.scatter(X[index_0, 0], X[index_0, 1], marker='x', c='b', label='0', s=15)
index_1 = np.where(label == 1)
plt.scatter(X[index_1, 0], X[index_1, 1], marker='o', c='r', label='1', s=15)
# 绘制分类边界线
x1 = np.arange(4, 7.5, 0.5)
x2 = (- classify.W[0] - classify.W[1] * x1) / classify.W[2]
plt.plot(x1, x2, color='black')
plt.xlabel('X1')
plt.ylabel('X2')
plt.legend(loc='upper left')
plt.show()

3.2.4        实验

1.实验目的

2.实验数据

3.实验要求

4.实验代码 

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
import pandas as pd

# 加载鸢尾花数据集
iris = load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
iris_df['species'] = iris.target

# 将数字标签转换为类别标签
iris_df['species'] = iris_df['species'].map({0: 'setosa', 1: 'versicolor', 2: 'virginica'})

# 绘制散点图
sns.scatterplot(data=iris_df, x='sepal length (cm)', y='sepal width (cm)', hue='species')
plt.title('Iris Dataset Scatter Plot')
plt.xlabel('Sepal Length (cm)')
plt.ylabel('Sepal Width (cm)')
plt.legend(title='Species')
plt.show()

代码结果如下:

import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

# 加载糖尿病数据集
diabetes = load_diabetes()
X = diabetes.data # 特征数据
y = diabetes.target # 目标数据

# 将y调整成列向量
y = y.reshape(-1, 1)

# 将数据集拆分为训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# 初始化权重 W 和偏置 b
W = np.zeros((X_train.shape[1], 1)) # 初始化为0,X_train.shape[1]是特征数
b = 0

# 初始预测值 h(x) 全为 0
y_pred = np.dot(X_train, W) + b # 由于 W 和 b 都是 0,所以预测值为 0

# 计算初始损失值 (MSE)
initial_loss = (1 / (2 * len(y_train))) * np.sum((y_pred - y_train) ** 2)

print("糖尿病数据集的初始损失值:", initial_loss)

代码结果如图: 

# 加载鸢尾花数据集
iris = load_iris()
X = iris.data
y = iris.target

# 只保留 Setosa 和 Versicolor 两类 (target == 0 是 Setosa, target == 1 是 Versicolor)
mask = y < 2
X = X[mask]
y = y[mask]

# 目标变量转换为 0 和 1
y = (y == 0).astype(int)

# 标准化数据
scaler = StandardScaler()
X = scaler.fit_transform(X)


# 定义 Sigmoid 函数
def sigmoid(z):
return 1 / (1 + np.exp(-z))


# 定义梯度计算函数
def compute_gradient(X, y, theta):
m = X.shape[0]
h = sigmoid(X @ theta)
gradient = (X.T @ (h - y)) / m
return gradient


# 梯度下降法实现
def gradient_descent(X, y, theta, learning_rate, num_iterations):
for i in range(num_iterations):
gradient = compute_gradient(X, y, theta)
theta = theta - learning_rate * gradient
return theta


# 定义 Hessian 计算函数
def compute_hessian(X, theta):
m = X.shape[0]
h = sigmoid(X @ theta)
R = np.diag(h * (1 - h))
H = X.T @ R @ X / m
return H


# 牛顿法实现
def newton_method(X, y, theta, num_iterations):
for i in range(num_iterations):
gradient = compute_gradient(X, y, theta)
hessian = compute_hessian(X, theta)
theta = theta - np.linalg.inv(hessian) @ gradient
return theta
# 预测函数
def predict(X, theta):
return (sigmoid(X @ theta) >= 0.5).astype(int)


# 交叉验证
def cross_validate_model(X, y, method, num_folds=5, learning_rate=0.01, num_iterations=1000):
kf = KFold(n_splits=num_folds)
accuracies = []

for train_index, val_index in kf.split(X):
X_train, X_val = X[train_index], X[val_index]
y_train, y_val = y[train_index], y[val_index]

theta = np.zeros(X_train.shape[1])

# 根据选择的方法优化参数
if method == 'gd':
theta_optimal = gradient_descent(X_train, y_train, theta, learning_rate, num_iterations)
elif method == 'newton':
theta_optimal = newton_method(X_train, y_train, theta, num_iterations=10)

# 在验证集上进行预测
y_pred = predict(X_val, theta_optimal)

# 计算准确率
accuracy = np.mean(y_pred == y_val)
accuracies.append(accuracy)

avg_accuracy = np.mean(accuracies)
return avg_accuracy
# 绘制分类边界
def plot_decision_boundary(X, y, theta, title):
# 创建网格范围
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
np.arange(y_min, y_max, 0.01))

# 计算网格点的预测值
Z = sigmoid(np.c_[xx.ravel(), yy.ravel()] @ theta)
Z = Z.reshape(xx.shape)

plt.rcParams['font.sans-serif'] = ['SimHei'] # 设置字体
plt.rcParams['axes.unicode_minus'] = False

# 绘制决策边界
plt.contourf(xx, yy, Z, levels=[0, 0.5, 1], alpha=0.5, cmap='coolwarm')
plt.scatter(X[:, 0], X[:, 1], c=y, edgecolor='k', cmap='coolwarm')
plt.xlabel('花萼长度')
plt.ylabel('花萼宽度')
plt.title(title)
plt.show()
# 主程序l
if __name__ == "__main__":
# 使用交叉验证对梯度下降和牛顿法进行验证
accuracy_gd = cross_validate_model(X, y, method='gd', num_folds=5)
accuracy_newton = cross_validate_model(X, y, method='newton', num_folds=5)

print(f'梯度下降法在交叉验证中的平均准确率: {accuracy_gd:.9f}')
print(f'牛顿迭代法在交叉验证中的平均准确率: {accuracy_newton:.9f}')

# 选择前两个特征进行可视化
X_selected = X[:, :2] # 取前两个特征
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.3, random_state=42)

# 使用梯度下降法优化
theta = np.zeros(X_train.shape[1])
theta_optimal_gd = gradient_descent(X_train, y_train, theta, learning_rate=0.01, num_iterations=1000)

# 使用牛顿法优化
theta_optimal_newton = newton_method(X_train, y_train, theta, num_iterations=10)

# 绘制分类边界
plot_decision_boundary(X_train, y_train, theta_optimal_gd, '梯度下降法分类边界')
plot_decision_boundary(X_train, y_train, theta_optimal_newton, '牛顿迭代法分类边界')

 结果:

 

举报

相关推荐

0 条评论