tensorflow 在 checkpoint 中记录 global_step
·
新增加一个变量:
...
global_step = tf.Variable(0, name='global_step', trainable=False)
saver = tf.train.Saver(tf.global_variables(),max_to_keep=1)
加载 checkpoint 时读入变量 restored_global_step :
restored_global_step = 0
# sess = tf.InteractiveSession()
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
ckpt = tf.train.get_checkpoint_state(model_dir) # 注意此处是checkpoint存在的目录
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess,ckpt.model_checkpoint_path) # 自动恢复model_checkpoint_path保存模型,一般是最新
print("Model restored ...")
restored_global_step = global_step.eval()
print('restored_global_step: ',restored_global_step)
else:
print('Start from scratch ...')
for step in range(max_steps):
...
if step % 10 == 0 or (step + 1) == max_steps:
checkpoint_path = os.path.join(model_dir, 'model.ckpt')
# 计算新的 global_step
g_step = restored_global_step+step
# 更新 global_step
sess.run(tf.assign(global_step,g_step))
# 保存
saver.save(sess, checkpoint_path, global_step=g_step)
这样一来每次保存 checkpoint 时,都会记录下 global_step,而且 checkpoint 的文件名也是累计次数的
更多推荐
已为社区贡献6条内容
所有评论(0)