From 2edb75912124eaaa4e0eef332d4c2078bf9e252f Mon Sep 17 00:00:00 2001 From: Vinh Nguyen Date: Wed, 24 Jul 2019 11:48:59 +1000 Subject: [PATCH 1/3] add auto mixed precision training support for dqn --- dopamine/agents/dqn/configs/dqn.gin | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dopamine/agents/dqn/configs/dqn.gin b/dopamine/agents/dqn/configs/dqn.gin index 13b5727d..0456f01e 100644 --- a/dopamine/agents/dqn/configs/dqn.gin +++ b/dopamine/agents/dqn/configs/dqn.gin @@ -1,6 +1,7 @@ # Hyperparameters follow the classic Nature DQN, but we modify as necessary to # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples # comparison. +import os import dopamine.discrete_domains.atari_lib import dopamine.discrete_domains.run_experiment import dopamine.agents.dqn.dqn_agent @@ -16,7 +17,11 @@ DQNAgent.epsilon_train = 0.01 DQNAgent.epsilon_eval = 0.001 DQNAgent.epsilon_decay_period = 250000 # agent steps DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version -DQNAgent.optimizer = @tf.train.RMSPropOptimizer() +if os.environ.get('TF_ENABLE_AUTO_MIXED_PRECISION', default='0') == '1': + optimizer = tf.train.RMSPropOptimizer() + DQNAgent.optimizer = @tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer) +else: + DQNAgent.optimizer = @tf.train.RMSPropOptimizer() tf.train.RMSPropOptimizer.learning_rate = 0.00025 tf.train.RMSPropOptimizer.decay = 0.95 From 1ff6ee2761a73c24d487e73ee7aea1064825e260 Mon Sep 17 00:00:00 2001 From: Vinh Nguyen Date: Wed, 24 Jul 2019 12:22:48 +1000 Subject: [PATCH 2/3] add auto mixed precision training support for rainbow agent --- dopamine/agents/dqn/configs/dqn.gin | 7 +------ dopamine/agents/rainbow/rainbow_agent.py | 4 +++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/dopamine/agents/dqn/configs/dqn.gin b/dopamine/agents/dqn/configs/dqn.gin index 0456f01e..13b5727d 100644 --- a/dopamine/agents/dqn/configs/dqn.gin +++ b/dopamine/agents/dqn/configs/dqn.gin @@ -1,7 +1,6 @@ # Hyperparameters follow the classic Nature DQN, but we modify as necessary to # match those used in Rainbow (Hessel et al., 2018), to ensure apples-to-apples # comparison. -import os import dopamine.discrete_domains.atari_lib import dopamine.discrete_domains.run_experiment import dopamine.agents.dqn.dqn_agent @@ -17,11 +16,7 @@ DQNAgent.epsilon_train = 0.01 DQNAgent.epsilon_eval = 0.001 DQNAgent.epsilon_decay_period = 250000 # agent steps DQNAgent.tf_device = '/gpu:0' # use '/cpu:*' for non-GPU version -if os.environ.get('TF_ENABLE_AUTO_MIXED_PRECISION', default='0') == '1': - optimizer = tf.train.RMSPropOptimizer() - DQNAgent.optimizer = @tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer) -else: - DQNAgent.optimizer = @tf.train.RMSPropOptimizer() +DQNAgent.optimizer = @tf.train.RMSPropOptimizer() tf.train.RMSPropOptimizer.learning_rate = 0.00025 tf.train.RMSPropOptimizer.decay = 0.95 diff --git a/dopamine/agents/rainbow/rainbow_agent.py b/dopamine/agents/rainbow/rainbow_agent.py index 7e3e4ebb..fb7c734d 100644 --- a/dopamine/agents/rainbow/rainbow_agent.py +++ b/dopamine/agents/rainbow/rainbow_agent.py @@ -38,7 +38,7 @@ from __future__ import print_function import collections - +import os from dopamine.agents.dqn import dqn_agent @@ -127,6 +127,8 @@ def __init__(self, self._replay_scheme = replay_scheme # TODO(b/110897128): Make agent optimizer attribute private. self.optimizer = optimizer + if os.environ.get('TF_ENABLE_AUTO_MIXED_PRECISION', default='0') == '1': + self.optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer) dqn_agent.DQNAgent.__init__( self, From 2ca68bbbd6936931d5749eaac26761cd5bf06e4c Mon Sep 17 00:00:00 2001 From: Vinh Nguyen Date: Wed, 24 Jul 2019 12:35:06 +1000 Subject: [PATCH 3/3] add auto mixed precision training support for dqn agent --- dopamine/agents/dqn/dqn_agent.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dopamine/agents/dqn/dqn_agent.py b/dopamine/agents/dqn/dqn_agent.py index 2c2c9808..795d2ed1 100644 --- a/dopamine/agents/dqn/dqn_agent.py +++ b/dopamine/agents/dqn/dqn_agent.py @@ -181,6 +181,8 @@ def __init__(self, self.eval_mode = eval_mode self.training_steps = 0 self.optimizer = optimizer + if os.environ.get('TF_ENABLE_AUTO_MIXED_PRECISION', default='0') == '1': + self.optimizer = tf.train.experimental.enable_mixed_precision_graph_rewrite(optimizer) self.summary_writer = summary_writer self.summary_writing_frequency = summary_writing_frequency self.allow_partial_reload = allow_partial_reload