从零和使用mxnet实现softmax分类

2022-11-19,,

from mxnet.gluon import data as gdata
from sklearn import datasets
from mxnet import nd,autograd
# 加载数据集
digits = datasets.load_digits()
features,labels = nd.array(digits['data']),nd.array(digits['target'])
print(features.shape,labels.shape)
labels_onehot = nd.one_hot(labels,10)
print(labels_onehot.shape)
(1797, 64) (1797,)
(1797, 10)
class softmaxClassifier:
    def __init__(self,inputs,outputs):
        self.inputs = inputs
        self.outputs = outputs
        
        self.weight = nd.random.normal(scale=0.01,shape=(inputs,outputs))
        self.bias = nd.zeros(shape=(1,outputs))
        self.weight.attach_grad()
        self.bias.attach_grad()
        
    def forward(self,x):
        output = nd.dot(x,self.weight) + self.bias
        return self._softmax(output)
        
    def _softmax(self,x):
        step1 = x.exp()
        step2 = step1.sum(axis=1,keepdims=True)
        return step1 / step2
    
    def _bgd(self,params,learning_rate,batch_size):
        '''
        批量梯度下降
        '''
        for param in params:       # 直接使用mxnet的自动求梯度
            param[:] = param - param.grad * learning_rate / batch_size
            
    def loss(self,y_pred,y):
        return nd.sum((-y * y_pred.log())) / len(y)
            
    def dataIter(self,x,y,batch_size):
        dataset = gdata.ArrayDataset(x,y)
        return gdata.DataLoader(dataset,batch_size,shuffle=True)
    
    def fit(self,x,y,learning_rate,epoches,batch_size):
        for epoch in range(epoches):
            for x_batch,y_batch in self.dataIter(x,y,batch_size):
                with autograd.record():
                    y_pred = self.forward(x_batch)
                    l = self.loss(y_pred,y_batch)
                l.backward()
                self._bgd([self.weight,self.bias],learning_rate,batch_size)
            if epoch % 50 == 0:
                y_all_pred = self.forward(x)
                print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,self.loss(y_all_pred,y),self.accuracyScore(y_all_pred,y)))
            
    def predict(self,x):
        y_pred = self.forward(x)
        return y_pred.argmax(axis=0)
    
    def accuracyScore(self,y_pred,y):
        acc_sum = (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().asscalar()
        return acc_sum / len(y)
sfm_clf = softmaxClassifier(64,10)
sfm_clf.fit(features,labels_onehot,learning_rate=0.1,epoches=500,batch_size=200)
epoch:50,loss:
[1.9941667]
<NDArray 1 @cpu(0)>,accuracy:0.3550361713967724
epoch:100,loss:
[0.37214527]
<NDArray 1 @cpu(0)>,accuracy:0.9393433500278241
epoch:150,loss:
[0.25443634]
<NDArray 1 @cpu(0)>,accuracy:0.9549248747913188
epoch:200,loss:
[0.20699367]
<NDArray 1 @cpu(0)>,accuracy:0.9588202559821926
epoch:250,loss:
[0.1799827]
<NDArray 1 @cpu(0)>,accuracy:0.9660545353366722
epoch:300,loss:
[0.1619963]
<NDArray 1 @cpu(0)>,accuracy:0.9677239844184753
epoch:350,loss:
[0.14888664]
<NDArray 1 @cpu(0)>,accuracy:0.9716193656093489
epoch:400,loss:
[0.13875261]
<NDArray 1 @cpu(0)>,accuracy:0.9738452977184195
epoch:450,loss:
[0.13058177]
<NDArray 1 @cpu(0)>,accuracy:0.9760712298274903
epoch:500,loss:
[0.12379646]
<NDArray 1 @cpu(0)>,accuracy:0.9777406789092933
print('预测结果:',sfm_clf.predict(features[:10]))
print('真实结果:',labels[:10])
预测结果: 
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
<NDArray 10 @cpu(0)>
真实结果: 
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
<NDArray 10 @cpu(0)>

2.使用mxnet实现softmax分类

from mxnet import gluon,nd,autograd,init
from mxnet.gluon import nn,trainer,loss as gloss,data as gdata
# 定义模型
net = nn.Sequential()
net.add(nn.Dense(10))

# 初始化模型
net.initialize(init=init.Normal(sigma=0.01))

# 损失函数
loss = gloss.SoftmaxCrossEntropyLoss(sparse_label=False)

# 优化算法
optimizer = trainer.Trainer(net.collect_params(),'sgd',{'learning_rate':0.1})

# 训练
epoches = 500
batch_size = 200

dataset = gdata.ArrayDataset(features, labels_onehot)
data_iter = gdata.DataLoader(dataset,batch_size,shuffle=True)
for epoch in range(epoches):
    for x_batch,y_batch in data_iter:
        with autograd.record():
            l = loss(net.forward(x_batch), y_batch).sum() / batch_size
        l.backward()
        optimizer.step(batch_size)
    if epoch % 50 == 0:
        y_all_pred = net.forward(features)
        acc_sum = (y_all_pred.argmax(axis=1) == labels_onehot.argmax(axis=1)).sum().asscalar()
        print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,loss(y_all_pred,labels_onehot).sum() / len(labels_onehot),acc_sum/len(y_all_pred)))
epoch:50,loss:
[2.1232333]
<NDArray 1 @cpu(0)>,accuracy:0.24652198107957707
epoch:100,loss:
[0.37193483]
<NDArray 1 @cpu(0)>,accuracy:0.9410127991096272
epoch:150,loss:
[0.25408813]
<NDArray 1 @cpu(0)>,accuracy:0.9543683917640512
epoch:200,loss:
[0.20680156]
<NDArray 1 @cpu(0)>,accuracy:0.9627156371730662
epoch:250,loss:
[0.1799252]
<NDArray 1 @cpu(0)>,accuracy:0.9666110183639399
epoch:300,loss:
[0.16203885]
<NDArray 1 @cpu(0)>,accuracy:0.9699499165275459
epoch:350,loss:
[0.14899409]
<NDArray 1 @cpu(0)>,accuracy:0.9738452977184195
epoch:400,loss:
[0.13890252]
<NDArray 1 @cpu(0)>,accuracy:0.9749582637729549
epoch:450,loss:
[0.13076076]
<NDArray 1 @cpu(0)>,accuracy:0.9755147468002225
epoch:500,loss:
[0.1239901]
<NDArray 1 @cpu(0)>,accuracy:0.9777406789092933