文章目录
 
  
 
 
🧡🧡实验内容🧡🧡
 
 
🧡🧡数据预处理🧡🧡
 
代码
 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
iris = datasets.load_iris()
print("Feature names: {}".format(iris['feature_names']))
print("Target names: {}".format(iris["target_names"]))
print("target:\n{}".format(iris['target'])) 
print("shape of data: {}".format(iris['data'].shape))
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['label'] = iris.target
df.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'label']
feature_df=df.drop('label',axis=1,inplace=False) 
print(df)
corr_matrix = feature_df.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()
ax = pd.plotting.radviz(df, 'label', colormap='brg')
ax.add_artist(plt.Circle((0,0), 1, color='r', fill = False))
g = sns.pairplot(data=df, palette="pastel", hue= 'label')
 
认识数据
 
 
相关性分析
 
 
径向可视化
 
 
各个特征之间的关系图
 
 
🧡🧡支持向量机SVM求解🧡🧡
 
直觉理解:
 
 
数学推导
 
 
 
 
 
 
代码
 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
class SMO:
    def __init__(self, X, y, C, kernel, tol, max_passes=10):
        self.X = X  
        self.y = y  
        self.C = C  
        self.kernel = kernel  
        self.tol = tol  
        self.max_passes = max_passes  
        self.m, self.n = X.shape
        self.alpha = np.zeros(self.m)
        self.b = 0
        self.w = np.zeros(self.n)
    
    def K(self, i, j):
        if self.kernel == 'linear':
            return np.dot(self.X[i].T, self.X[j])
        elif self.kernel == 'rbf':
            gamma = 0.5
            return np.exp(-gamma * np.linalg.norm(self.X[i] - self.X[j]) ** 2)
        else:
            raise ValueError('Invalid kernel specified')
    def predict(self, X_test):
        pred = np.zeros_like(X_test[:, 0])
        pred = np.dot(X_test, self.w) + self.b
        return np.sign(pred)
    def train(self):
        """
        训练模型
        :return:
        """
        passes = 0
        while passes < self.max_passes:
            num_changed_alphas = 0
            for i in range(self.m):
                
                
                E_i = 0
                for ii in range(self.m):
                    E_i += self.alpha[ii] * self.y[ii] * self.K(ii, i)
                E_i += self.b - self.y[i]
                
                if (self.y[i] * E_i < -self.tol and self.alpha[i] < self.C) or (self.y[i] * E_i > self.tol and self.alpha[i] > 0):
                    
                    j = np.random.choice(list(range(i)) + list(range(i + 1, self.m)), size=1)[0]
                    
                    
                    E_j = 0
                    for jj in range(self.m):
                        E_j += self.alpha[jj] * self.y[jj] * self.K(jj, j)
                    E_j += self.b - self.y[j]
                    alpha_i_old = self.alpha[i].copy()
                    alpha_j_old = self.alpha[j].copy()
                    
                    if self.y[i] != self.y[j]:
                        L = max(0, self.alpha[j] - self.alpha[i])
                        H = min(self.C, self.C + self.alpha[j] - self.alpha[i])
                    else:
                        L = max(0, self.alpha[i] + self.alpha[j] - self.C)
                        H = min(self.C, self.alpha[i] + self.alpha[j])
                    
                    if L == H:
                        continue
                    
                    eta = 2 * self.K(i, j) - self.K(i, i) - self.K(j, j)
                    
                    if eta >= 0:
                        continue
                    
                    self.alpha[j] -= (self.y[j] * (E_i - E_j)) / eta
                    
                    self.alpha[j] = np.clip(self.alpha[j], L, H)
                    
                    if abs(self.alpha[j] - alpha_j_old) < 1e-5:
                        continue
                    
                    self.alpha[i] += self.y[i] * self.y[j] * (alpha_j_old - self.alpha[j])
                    
                    b1 = self.b - E_i - self.y[i] * (self.alpha[i] - alpha_i_old) * self.K(i, i) \
                         - self.y[j] * (self.alpha[j] - alpha_j_old) * self.K(i, j)
                    b2 = self.b - E_j - self.y[i] * (self.alpha[i] - alpha_i_old) * self.K(i, j) \
                         - self.y[j] * (self.alpha[j] - alpha_j_old) * self.K(j, j)
                    
                    if 0 < self.alpha[i] and self.alpha[i] < self.C:
                        self.b = b1
                    elif 0 < self.alpha[j] and self.alpha[j] < self.C:
                        self.b = b2
                    else:
                        self.b = (b1 + b2) / 2
                    num_changed_alphas += 1
            if num_changed_alphas == 0:
                passes += 1
            else:
                passes = 0
        
        idx = self.alpha > 0  
        
        selected_idx = np.where(idx)[0]
        SVs = self.X[selected_idx]
        SV_labels = self.y[selected_idx]
        SV_alphas = self.alpha[selected_idx]
        
        self.w = np.sum(SV_alphas[:, None] * SV_labels[:, None] * SVs, axis=0)
        self.b = np.mean(SV_labels - np.dot(SVs, self.w))
        print("w", self.w)
        print("b", self.b)
    def score(self, X, y):
        predict = self.predict(X)
        print("predict", predict)
        print("target", y)
        return np.mean(predict == y)
        
iris = datasets.load_iris()
X = iris.data
y = iris.target
y[y != 0] = -1
y[y == 0] = 1 
X2 = X[:,:2]
plt.scatter(X2[y == 1, 0], X2[y == 1, 1], color='red',label="class 1")
plt.scatter(X2[y == -1, 0], X2[y == -1, 1], color='blue',label="class -1")
plt.xlabel("Speal Width")
plt.ylabel("Speal Length")
plt.legend()
plt.show()
scaler = StandardScaler()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=3706)
X_train_std = scaler.fit_transform(X_train)
svm = SMO(X_train_std, y_train, C=0.6, kernel='rbf', tol=0.001)
svm.train()
X_test_std = scaler.transform(X_test)
accuracy = svm.score(X_test_std, y_test)
print('正确率: {:.2%}'.format(accuracy))
from sklearn.metrics import confusion_matrix, roc_curve, auc
y_pred=svm.predict(X_test_std)
def cal_ConfusialMatrix(y_true_labels, y_pred_labels):
    cm = np.zeros((2, 2))
    y_true_labels = [0 if x == -1 else x for x in y_true_labels]
    y_pred_labels = [0 if x == -1 else x for x in y_pred_labels]
    for i in range(len(y_true_labels)):
        cm[ y_true_labels[i], y_pred_labels[i] ] += 1
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='g', cmap='Blues', xticklabels=['Predicted Negative', 'Predicted Positive'], yticklabels=['Actual Negative', 'Actual Positive'])
    plt.xlabel('Predicted label')
    plt.ylabel('True label')
    plt.title('Confusion Matrix')
    plt.show()
y_pred=[int(x) for x in y_pred]
y_test=[int(x) for x in y_test]
cal_ConfusialMatrix(y_test, y_pred)
 
运行结果
 
 
 
 
 
🧡🧡总结🧡🧡