前言

Softmax回归也称多项或多类的Logistic回归，是Logistic回归在多分类问题上的推广。

一、训练集和测试集

使用上一节获取得到的数据集Fashion-MNIST。

二、步骤

1.引入库

import torch
import torchvision
import numpy as np
import sys
sys.path.append("..") # 为了导入上层目录的d2lzh_pytorch
from d2lzh_pytorch import *
import d2lzh_pytorch as d2l

2.读取数据

batch_size =256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

d2l.load_data_fashion_mnist(batch_size)

3.初始化模型参数

num_inputs =784
num_outputs = 10

W = torch.tensor(np.random.normal(0, 0.01, (num_inputs,num_outputs)),
                 dtype=torch.float)
b = torch.zeros(num_outputs, dtype=torch.float)

W.requires_grad_(requires_grad=True)
b.requires_grad_(requires_grad=True)

4.定义模型

def softmax(X):
    X_exp = X.exp()
    partition = X_exp.sum(dim=1, keepdim=True)
    return X_exp / partition

def net(X):
    return softmax(torch.mm(X.view((-1, num_inputs)),W) + b)

X.exp()

X_exp.sum(dim=1, keepdim=True)

5.定义损失函数

y_hat = torch.tensor([[0.1, 0.3, 0.6], [0.3, 0.2, 0.5]])
y = torch.LongTensor([0, 2])
y_hat.gather(1, y.view(-1, 1))

def cross_entropy(y_hat,y):
    return - torch.log(y_hat.gather(1, y.view(-1,1)))

.LongTensor()

torch.gather(input, dim, index, out=None) → Tensor

6.计算分类准确率

def accuracy(y_hat,y):
    return (y_hat.argmax(dim=1) == y).float().mean().item()

print(accuracy(y_hat, y))

.argmax(dim=1)

.item()

# 本函数已保存在d2lzh_pytorch包中方便以后使用。该函数将被逐步改进：它的完整实现将在“图像增广”一节中描述
def evaluate_accuracy(data_iter, net):
    acc_sum, n = 0.0, 0
    for X, y in data_iter:
        acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
        n += y.shape[0]
    return acc_sum / n

7.训练模型

num_epochs, lr = 4, 0.1
train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)

train_ch3(net, train_iter, test_iter, cross_entropy, num_epochs, batch_size, [W, b], lr)

8.预测

X ,y = iter(test_iter).next()

true_labels = d2l.get_fashion_mnist_labels(y.numpy())
pred_labels = d2l.get_fashion_mnist_labels(net(X).argmax(dim=1).numpy())
titles = [true +'\n' + pred for true, pred in zip(true_labels, pred_labels)]

d2l.show_fashion_mnist(X[0:9], titles[0:9])