TensorFlow使用记录 (十二): ℓ1 and ℓ2 Regularization
2024-09-05 10:50:00
实现方式
以 ℓ2 Regularization 为例,主要有两种实现方式
1. 手动累加
with tf.name_scope('loss'):
loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot
l2_reg_loss = tf.constant(0.0, tf.float32)
for vv in tf.trainable_variables():
if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name \
or 'batch_normalization' in vv.name or 'gn' in vv.name:
continue
else:
l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
l2_reg_loss *= 0.001
loss = loss + l2_reg_loss
2. 借助于 kernel_regularizer
with tf.name_scope('dnn'):
hidden1 = tf.layers.dense(X, 300, kernel_initializer=he_init, name='hidden1',
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
...... with tf.name_scope('loss'):
loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
loss = tf.add_n([loss] + reg_losses)
实例验证
import tensorflow as tf # 1. create data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../MNIST_data', one_hot=True) X = tf.placeholder(tf.float32, shape=(None, 784), name='X')
y = tf.placeholder(tf.int32, shape=(None), name='y')
is_training = tf.placeholder(tf.bool, None, name='is_training') # 2. define network
he_init = tf.contrib.layers.variance_scaling_initializer()
with tf.name_scope('dnn'):
hidden1 = tf.layers.dense(X, 300, kernel_initializer=he_init, name='hidden1',
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
hidden1 = tf.layers.batch_normalization(hidden1, momentum=0.9)
hidden1 = tf.nn.relu(hidden1)
hidden2 = tf.layers.dense(hidden1, 100, kernel_initializer=he_init, name='hidden2',
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))
hidden2 = tf.layers.batch_normalization(hidden2, training=is_training, momentum=0.9)
hidden2 = tf.nn.relu(hidden2)
logits = tf.layers.dense(hidden2, 10, kernel_initializer=he_init, name='output',
kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001)) # 3. define loss
with tf.name_scope('loss'):
loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot
# =================
reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
reg_loss = tf.reduce_sum(reg_losses)
# loss = tf.add_n([loss] + reg_losses)
# =================
l2_reg_loss = tf.constant(0.0, tf.float32)
for vv in tf.trainable_variables():
if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name \
or 'batch_normalization' in vv.name or 'gn' in vv.name:
continue
else:
l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))
l2_reg_loss *= 0.001
# loss = loss + l2_reg_loss
# ================= # 4. define optimizer
learning_rate_init = 0.01
global_step = tf.Variable(0, trainable=False)
with tf.name_scope('train'):
learning_rate = tf.train.polynomial_decay( # 多项式衰减
learning_rate=learning_rate_init, # 初始学习率
global_step=global_step, # 当前迭代次数
decay_steps=22000, # 在迭代到该次数实际,学习率衰减为 learning_rate * dacay_rate
end_learning_rate=learning_rate_init / 10, # 最小的学习率
power=0.9,
cycle=False
)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # for batch normalization
with tf.control_dependencies(update_ops):
optimizer_op = tf.train.MomentumOptimizer(
learning_rate=learning_rate, momentum=0.9).minimize(
loss=loss,
var_list=tf.trainable_variables(),
global_step=global_step # 不指定的话学习率不更新
) with tf.name_scope('eval'):
correct = tf.nn.in_top_k(logits, tf.argmax(y, axis=1), 1) # 目标是否在前K个预测中, label's dtype is int*
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) # 5. initialize
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
saver = tf.train.Saver() # 5. train & test
n_epochs = 1
batch_size = 55000
with tf.Session() as sess:
saver.restore(sess, './my_model_final.ckpt')
for epoch in range(n_epochs):
for iteration in range(mnist.train.num_examples // batch_size):
X_batch, y_batch = mnist.train.next_batch(batch_size)
loss_, l2_reg_loss_, reg_loss_ = sess.run([loss, l2_reg_loss, reg_loss], feed_dict={X: X_batch, y: y_batch, is_training:True})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch, is_training:False}) # 最后一个 batch 的 accuracy
acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})
loss_test = loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})
l2_reg_loss_test = l2_reg_loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})
reg_loss_test = reg_loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})
print("Train loss:", loss_, "Train l2_reg_loss:", l2_reg_loss_, "Train reg_loss:", reg_loss_, "Train accuracy:", acc_train)
print("Test loss:", loss_test, "Test l2_reg_loss:", l2_reg_loss_test, "Test reg_loss:", reg_loss_test, "Test accuracy:", acc_test) """
# =================
Train loss: 0.000636433 Train l2_reg_loss: 0.48696715 Train reg_loss: 0.48683384 Train accuracy: 1.0
Test loss: 0.059231624 Test l2_reg_loss: 0.48696715 Test reg_loss: 0.48683384 Test accuracy: 0.983
"""
最新文章
- 如何处理json数据
- loadView加载(变换成ScrollView)
- 开源工作流引擎CCFlow 学习专区
- android开源项目---项目篇
- hadoop集群中的日志文件
- Linux SSH 互信
- Scala入门系列(十三):类型参数
- splay模板(BZOJ3224)
- listview下拉刷新上拉加载扩展(一)
- cocos2d-js(二)cocos2d-js的基本语法与类的简介
- Python_正则表达式一
- 进击Node.js基础(一)
- C# Post方式下,取得其它端传过来的数据
- Tesseract训练
- Django请求生命周期
- python:实例化configparser模块读写配置文件
- 百度的富文本编辑器UEditor批量添加图片自动加上宽度和高度的属性
- Qt的Radio Button(单选按钮)
- TP框架做网页静态化
- Java 集合系列Stack详细介绍(源码解析)和使用示例