diff --git a/tensorflow_probability/python/optimizer/BUILD b/tensorflow_probability/python/optimizer/BUILD index 6662a9c33c..d57fcc27cd 100644 --- a/tensorflow_probability/python/optimizer/BUILD +++ b/tensorflow_probability/python/optimizer/BUILD @@ -30,9 +30,9 @@ py_library( srcs = ["__init__.py"], srcs_version = "PY2AND3", deps = [ - ":hager_zhang", ":sgld", ":variational_sgd", + "//tensorflow_probability/python/optimizer/linesearch", ], ) @@ -45,26 +45,6 @@ py_library( ], ) -py_library( - name = "hager_zhang", - srcs = ["linesearch/hager_zhang.py"], - srcs_version = "PY2AND3", - deps = [ - # tensorflow dep, - ], -) - -py_test( - name = "hager_zhang_test", - size = "medium", - srcs = ["linesearch/hager_zhang_test.py"], - deps = [ - # numpy dep, - # tensorflow dep, - "//tensorflow_probability", - ], -) - py_test( name = "sgld_test", size = "small", diff --git a/tensorflow_probability/python/optimizer/__init__.py b/tensorflow_probability/python/optimizer/__init__.py index 289bdfe0f5..9cb0ebede4 100644 --- a/tensorflow_probability/python/optimizer/__init__.py +++ b/tensorflow_probability/python/optimizer/__init__.py @@ -18,7 +18,7 @@ from __future__ import division from __future__ import print_function -from tensorflow_probability.python.optimizer.linesearch.hager_zhang import line_search as hz_line_search +from tensorflow_probability.python.optimizer import linesearch from tensorflow_probability.python.optimizer.sgld import StochasticGradientLangevinDynamics from tensorflow_probability.python.optimizer.variational_sgd import VariationalSGD @@ -26,7 +26,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'hz_line_search', + 'linesearch', 'StochasticGradientLangevinDynamics', 'VariationalSGD', ] diff --git a/tensorflow_probability/python/optimizer/linesearch/BUILD b/tensorflow_probability/python/optimizer/linesearch/BUILD new file mode 100644 index 0000000000..9dd532c15f --- /dev/null +++ b/tensorflow_probability/python/optimizer/linesearch/BUILD @@ -0,0 +1,53 @@ +# Copyright 2018 The TensorFlow Probability Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +# Line search optimization methods + +package( + default_visibility = [ + "//tensorflow_probability:__subpackages__", + ], +) + +licenses(["notice"]) # Apache 2.0 + +exports_files(["LICENSE"]) + +py_library( + name = "linesearch", + srcs = ["__init__.py"], + deps = [ + ":hager_zhang", + ], +) + +py_library( + name = "hager_zhang", + srcs = ["hager_zhang.py"], + srcs_version = "PY2AND3", + deps = [ + # tensorflow dep, + ], +) + +py_test( + name = "hager_zhang_test", + size = "medium", + srcs = ["hager_zhang_test.py"], + deps = [ + # numpy dep, + # tensorflow dep, + "//tensorflow_probability", + ], +) diff --git a/tensorflow_probability/python/optimizer/linesearch/__init__.py b/tensorflow_probability/python/optimizer/linesearch/__init__.py index 30ddc4e382..080acc5b56 100644 --- a/tensorflow_probability/python/optimizer/linesearch/__init__.py +++ b/tensorflow_probability/python/optimizer/linesearch/__init__.py @@ -12,3 +12,18 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ +"""Line-search optimizers package.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow_probability.python.optimizer.linesearch.hager_zhang import hager_zhang + +from tensorflow.python.util.all_util import remove_undocumented + +_allowed_symbols = [ + 'hager_zhang', +] + +remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow_probability/python/optimizer/linesearch/hager_zhang.py b/tensorflow_probability/python/optimizer/linesearch/hager_zhang.py index 282a7c7b65..f545accde1 100644 --- a/tensorflow_probability/python/optimizer/linesearch/hager_zhang.py +++ b/tensorflow_probability/python/optimizer/linesearch/hager_zhang.py @@ -36,6 +36,9 @@ from tensorflow.python.framework import smart_cond +__all__ = [ + 'hager_zhang', +] # Container to hold the function value and the derivative at a given point. # Each entry is a scalar tensor of real dtype. Used for internal data passing. @@ -76,7 +79,7 @@ ]) -def line_search(value_and_gradients_function, +def hager_zhang(value_and_gradients_function, initial_step_size=None, objective_at_zero=None, grad_objective_at_zero=None, @@ -137,8 +140,8 @@ def line_search(value_and_gradients_function, value_and_gradients_function = lambda x: ((x - 1.3) ** 2, 2 * (x-1.3)) # Set initial step size. step_size = tf.constant(0.1) - ls_result = tfp.optimizer.hz_line_search(value_and_gradients_function, - initial_step_size=step_size) + ls_result = tfp.optimizer.linesearch.hager_zhang( + value_and_gradients_function, initial_step_size=step_size) # Evaluate the results. with tf.Session() as session: results = session.run(ls_result) @@ -209,7 +212,7 @@ def line_search(value_and_gradients_function, by `1.`. Corresponds to 'sigma' in the terminology of [Hager and Zhang (2006)][2]. name: (Optional) Python str. The name prefixed to the ops created by this - function. If not supplied, the default name 'line_search' is used. + function. If not supplied, the default name 'hager_zhang' is used. Returns: results: A namedtuple containing the following attributes. @@ -244,7 +247,7 @@ def line_search(value_and_gradients_function, If converged is True, it is equal to the dfn_step. Otherwise it corresponds to the last interval computed. """ - with tf.name_scope(name, 'line_search', + with tf.name_scope(name, 'hager_zhang', [initial_step_size, objective_at_zero, grad_objective_at_zero, diff --git a/tensorflow_probability/python/optimizer/linesearch/hager_zhang_test.py b/tensorflow_probability/python/optimizer/linesearch/hager_zhang_test.py index a102a608fe..f54a65e431 100644 --- a/tensorflow_probability/python/optimizer/linesearch/hager_zhang_test.py +++ b/tensorflow_probability/python/optimizer/linesearch/hager_zhang_test.py @@ -45,7 +45,7 @@ def test_quadratic(self): # Case 1: The starting value is close to 0 and doesn't bracket the min. with self.test_session() as session: close_start, far_start = tf.constant(0.1), tf.constant(7.0) - results_close = session.run(tfp.optimizer.hz_line_search( + results_close = session.run(tfp.optimizer.linesearch.hager_zhang( fdf, initial_step_size=close_start)) self.assertTrue(results_close.converged) self.assertAlmostEqual(results_close.left_pt, results_close.right_pt) @@ -57,7 +57,7 @@ def test_quadratic(self): df0, 0.1, 0.9)) - results_far = session.run(tfp.optimizer.hz_line_search( + results_far = session.run(tfp.optimizer.linesearch.hager_zhang( fdf, initial_step_size=far_start)) self.assertTrue(results_far.converged) self.assertAlmostEqual(results_far.left_pt, results_far.right_pt) @@ -81,7 +81,7 @@ def fdf(x): starts = (tf.constant(0.1), tf.constant(1.5), tf.constant(2.0), tf.constant(4.0)) for start in starts: - results = session.run(tfp.optimizer.hz_line_search( + results = session.run(tfp.optimizer.linesearch.hager_zhang( fdf, initial_step_size=start)) self.assertTrue(results.converged) self.assertAlmostEqual(results.left_pt, results.right_pt) @@ -130,8 +130,8 @@ def fdf(t): coord = x0 + t * dirn ft, df = rosenbrock(coord) return ft, tf.reduce_sum(df * dirn) - results = session.run(tfp.optimizer.hz_line_search(fdf, - initial_step_size=1.0)) + results = session.run(tfp.optimizer.linesearch.hager_zhang( + fdf, initial_step_size=1.0)) self.assertTrue(results.converged) def test_eval_count(self): @@ -152,7 +152,7 @@ def _fdf(x): for start in starts: fdf, counter = get_fn() - results = tfp.optimizer.hz_line_search( + results = tfp.optimizer.linesearch.hager_zhang( fdf, initial_step_size=tf.constant(start)) init = tf.global_variables_initializer() with self.test_session() as session: @@ -178,7 +178,7 @@ def fdf(x): with self.test_session() as session: start = tf.constant(dtype(1e-8)) results = session.run( - tfp.optimizer.hz_line_search( + tfp.optimizer.linesearch.hager_zhang( fdf, initial_step_size=start, sufficient_decrease_param=0.1, @@ -209,7 +209,7 @@ def fdf(x): def get_results(): with self.test_session() as session: start = tf.constant(0.9) - results = tfp.optimizer.hz_line_search( + results = tfp.optimizer.linesearch.hager_zhang( fdf, initial_step_size=start, sufficient_decrease_param=0.1, @@ -235,7 +235,7 @@ def rastrigin(x, use_np=False): with self.test_session() as session: start = tf.constant(0.1, dtype=tf.float64) results = session.run( - tfp.optimizer.hz_line_search( + tfp.optimizer.linesearch.hager_zhang( rastrigin, initial_step_size=start, sufficient_decrease_param=0.1,