需求:

  • 从零和使用mxnet实现dropout

数据集:

  • 使用load_digits()手写数字数据集

要求:

  • 使用1个掩藏层n_hidden1 = 36,激活函数为relu,损失函数为softmax交叉熵损失函数

注意:

  • drop函数的实现方法
  • 训练和测试时drop的区别

1.从零实现dropout

from sklearn import datasets
from mxnet import gluon,nd,autograd,init
from mxnet.gluon import nn,data as gdata,loss as gloss,trainer
# 加载数据集
digits = datasets.load_digits()
features,labels = nd.array(digits['data']),nd.array(digits['target'])
print(features.shape,labels.shape)
labels_onehot = nd.one_hot(labels,10)
print(labels_onehot.shape)
(1797, 64) (1797,)
(1797, 10)
class NeuroNet:
def __init__(self,n_inputs,n_hidden1,n_outputs):
hidden_layer = Layer(n_inputs,n_hidden1)
output_layer = Layer(n_hidden1,n_outputs)
self.layers = [hidden_layer,output_layer] for layer in self.layers:
for param in layer.params:
param.attach_grad() def softmax(self,x):
step1 = x.exp()
step2 = step1 / step1.sum(axis=1,keepdims=True)
return step2 def softmaxCrossEntropyLoss(self,y_pred,y):
step1 = -y * y_pred.log()
step2 = step1.sum(axis=1)
loss = step2.sum(axis=0) / len(y)
return loss def drop(self,x,drop_probability,train=True):
'''
神经元被丢弃的概率为p
'''
if train:
mask = nd.random.uniform(0,1,shape=x.shape,dtype='float32') > drop_probability
return mask * x / (1 - drop_probability)
else:
return x def forward(self,x,train=True):
for layer in self.layers[:-1]:
step1 = layer.forward(x)
step2 = self.drop(step1,0.2,train)
x = step2
output_layer = self.layers[-1]
return self.softmax(output_layer.forward(x)) def sgd(self,learning_rate,batch_size):
'''
使用随机梯度下降更新所有权重和偏置
'''
for layer in self.layers:
layer.sgd(learning_rate,batch_size) def dataIter(self,x,y,batch_size):
dataset = gdata.ArrayDataset(x,y)
return gdata.DataLoader(dataset,batch_size,shuffle=True) def fit(self,x,y,epoches,batch_size,learning_rate):
for epoch in range(epoches):
for x_batch,y_batch in self.dataIter(x,y,batch_size):
with autograd.record():
y_pred = self.forward(x_batch,train=True)
loss = self.softmaxCrossEntropyLoss(y_pred,y_batch)
loss.backward()
self.sgd(learning_rate,batch_size)
if epoch % 50 == 0:
y_pred_all = self.forward(x,train=False)
loss_all = self.softmaxCrossEntropyLoss(y_pred_all,y)
accuracy_score = self.accuracyScore(y_pred_all,y)
print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,loss_all,accuracy_score)) def predict(self,x):
y_pred = self.forward(x)
return y_pred.argmax(axis=0) def accuracyScore(self,y_pred,y):
acc_sum = (y_pred.argmax(axis=1) == y.argmax(axis=1)).sum().asscalar()
return acc_sum / len(y) class Layer:
def __init__(self,n_inputs,n_outputs):
weight = nd.random.normal(scale=0.01,shape=(n_inputs,n_outputs))
bias = nd.zeros(shape=(n_outputs))
self.params = [weight,bias] def relu(self,x):
return nd.maximum(x,0) def forward(self,x):
step1 = nd.dot(x,self.params[0]) + self.params[1]
return self.relu(step1) def sgd(self,learning_rate,batch_size):
for param in self.params:
param[:] = param - learning_rate * param.grad / batch_size def print_params(self):
for param in self.params:
print(param)
net = NeuroNet(64,36,10)
net.fit(features,labels_onehot,epoches=500,batch_size=200,learning_rate=0.5)
epoch:50,loss:
[2.2988722]
<NDArray 1 @cpu(0)>,accuracy:0.18308291597106288
epoch:100,loss:
[1.4126126]
<NDArray 1 @cpu(0)>,accuracy:0.7395659432387313
epoch:150,loss:
[0.46316707]
<NDArray 1 @cpu(0)>,accuracy:0.9259877573734001
epoch:200,loss:
[0.24678323]
<NDArray 1 @cpu(0)>,accuracy:0.9493600445186422
epoch:250,loss:
[0.17839472]
<NDArray 1 @cpu(0)>,accuracy:0.9610461880912632
epoch:300,loss:
[0.14298467]
<NDArray 1 @cpu(0)>,accuracy:0.9688369504730105
epoch:350,loss:
[0.1198809]
<NDArray 1 @cpu(0)>,accuracy:0.9738452977184195
epoch:400,loss:
[0.10388324]
<NDArray 1 @cpu(0)>,accuracy:0.9782971619365609
epoch:450,loss:
[0.0917427]
<NDArray 1 @cpu(0)>,accuracy:0.9827490261547023
epoch:500,loss:
[0.08237094]
<NDArray 1 @cpu(0)>,accuracy:0.9849749582637729
print('预测结果:',net.predict(features[:10]))
print('真实结果:',labels[:10])
预测结果:
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
<NDArray 10 @cpu(0)>
真实结果:
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
<NDArray 10 @cpu(0)>

2.使用mxnet实现dropout

n_inputs = 64
n_hiddens = 36
n_outputs = 10 # 定义模型
net = nn.Sequential()
net.add(nn.Dense(n_hiddens,activation='relu'))
net.add(nn.Dropout(rate=0.2))
net.add(nn.Dense(n_outputs)) # 初始化模型
net.initialize(init.Normal(sigma=0.01)) # 损失函数
loss = gloss.SoftmaxCrossEntropyLoss(sparse_label=False) optimizer = trainer.Trainer(net.collect_params(), 'sgd', {'learning_rate':0.5}) # 训练模型
epoches = 500
batch_size = 200 dataset = gdata.ArrayDataset(features,labels_onehot)
dataIter = gdata.DataLoader(dataset,batch_size,shuffle=True)
for epoch in range(epoches):
for x_batch,y_batch in dataIter:
with autograd.record():
y_pred = net.forward(x_batch)
l = loss(y_pred, y_batch).sum() / batch_size
l.backward()
optimizer.step(batch_size)
if epoch % 50 == 0:
y_all_pred = net.forward(features)
acc_sum = (y_all_pred.argmax(axis=1) == labels_onehot.argmax(axis=1)).sum().asscalar()
print('epoch:{},loss:{},accuracy:{}'.format(epoch+50,loss(y_all_pred,labels_onehot).sum() / len(labels_onehot),acc_sum/len(y_all_pred)))
epoch:50,loss:
[2.2981045]
<NDArray 1 @cpu(0)>,accuracy:0.16304952698942682
epoch:100,loss:
[0.97166663]
<NDArray 1 @cpu(0)>,accuracy:0.867557039510295
epoch:150,loss:
[0.3836201]
<NDArray 1 @cpu(0)>,accuracy:0.9243183082915971
epoch:200,loss:
[0.24329802]
<NDArray 1 @cpu(0)>,accuracy:0.9449081803005008
epoch:250,loss:
[0.18068495]
<NDArray 1 @cpu(0)>,accuracy:0.9577072899276572
epoch:300,loss:
[0.14546551]
<NDArray 1 @cpu(0)>,accuracy:0.9660545353366722
epoch:350,loss:
[0.1219953]
<NDArray 1 @cpu(0)>,accuracy:0.9727323316638843
epoch:400,loss:
[0.10563282]
<NDArray 1 @cpu(0)>,accuracy:0.9760712298274903
epoch:450,loss:
[0.09357208]
<NDArray 1 @cpu(0)>,accuracy:0.9788536449638287
epoch:500,loss:
[0.08368526]
<NDArray 1 @cpu(0)>,accuracy:0.9816360601001669

最新文章

  1. 搭建spring mvc项目
  2. for+next()实现数组的遍历及while list each 的使用
  3. glusterFS分布式文件系统的搭建
  4. 2016年11月2日--Window.document对象
  5. EasyMock
  6. Xcode 只有iOS device一个选项的解决办法
  7. 原生的ajax(json)
  8. 深入浅出学习HTTP协议
  9. 阅读MDN文档之层叠与继承(二)
  10. git 代码服务器的网页版gitweb的搭建
  11. mysql5.7 root用户默认密码
  12. scrapy框架Selector提取数据
  13. webpack配置接口路径
  14. php 会话控制(关于session的维护与生命周期)
  15. jquery的json对象与字符串之间转换
  16. 自动化运维_Ansible
  17. 《Spring2之站立会议4》
  18. MySQL- INSTR 函数的用法
  19. python3精简笔记(三)——高级特性
  20. Windows7下搭建Python2.7环境

热门文章

  1. IDEA 常用命令
  2. SpringBoot之RESTful风格
  3. springboot: xercesImpl.jar和xml-apis.jar (系统找不到指定的文件)
  4. js、jscore与webkit、nodejs的关系
  5. WPF 精修篇 管理资源字典
  6. STM32最小系统设计
  7. C# winform中组合键奇怪不响应问题
  8. 费劲周折的Haskell开发环境搭建过程
  9. iOS - 逆向调试自用工具(reveal 14 Hopper Go2Shell ifunboxmac MachOView Alfred Go2Shell iTerm)
  10. OpenSSL生成私钥和公钥