diff --git a/tensorflow_probability/python/optimizer/BUILD b/tensorflow_probability/python/optimizer/BUILD
index 6662a9c33c..d57fcc27cd 100644
--- a/tensorflow_probability/python/optimizer/BUILD
+++ b/tensorflow_probability/python/optimizer/BUILD
@@ -30,9 +30,9 @@ py_library(
     srcs = ["__init__.py"],
     srcs_version = "PY2AND3",
     deps = [
-        ":hager_zhang",
         ":sgld",
         ":variational_sgd",
+        "//tensorflow_probability/python/optimizer/linesearch",
     ],
 )
 
@@ -45,26 +45,6 @@ py_library(
     ],
 )
 
-py_library(
-    name = "hager_zhang",
-    srcs = ["linesearch/hager_zhang.py"],
-    srcs_version = "PY2AND3",
-    deps = [
-        # tensorflow dep,
-    ],
-)
-
-py_test(
-    name = "hager_zhang_test",
-    size = "medium",
-    srcs = ["linesearch/hager_zhang_test.py"],
-    deps = [
-        # numpy dep,
-        # tensorflow dep,
-        "//tensorflow_probability",
-    ],
-)
-
 py_test(
     name = "sgld_test",
     size = "small",
diff --git a/tensorflow_probability/python/optimizer/__init__.py b/tensorflow_probability/python/optimizer/__init__.py
index 289bdfe0f5..9cb0ebede4 100644
--- a/tensorflow_probability/python/optimizer/__init__.py
+++ b/tensorflow_probability/python/optimizer/__init__.py
@@ -18,7 +18,7 @@
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow_probability.python.optimizer.linesearch.hager_zhang import line_search as hz_line_search
+from tensorflow_probability.python.optimizer import linesearch
 from tensorflow_probability.python.optimizer.sgld import StochasticGradientLangevinDynamics
 from tensorflow_probability.python.optimizer.variational_sgd import VariationalSGD
 
@@ -26,7 +26,7 @@
 from tensorflow.python.util.all_util import remove_undocumented
 
 _allowed_symbols = [
-    'hz_line_search',
+    'linesearch',
     'StochasticGradientLangevinDynamics',
     'VariationalSGD',
 ]
diff --git a/tensorflow_probability/python/optimizer/linesearch/BUILD b/tensorflow_probability/python/optimizer/linesearch/BUILD
new file mode 100644
index 0000000000..9dd532c15f
--- /dev/null
+++ b/tensorflow_probability/python/optimizer/linesearch/BUILD
@@ -0,0 +1,53 @@
+# Copyright 2018 The TensorFlow Probability Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+# Line search optimization methods
+
+package(
+    default_visibility = [
+        "//tensorflow_probability:__subpackages__",
+    ],
+)
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+py_library(
+    name = "linesearch",
+    srcs = ["__init__.py"],
+    deps = [
+        ":hager_zhang",
+    ],
+)
+
+py_library(
+    name = "hager_zhang",
+    srcs = ["hager_zhang.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        # tensorflow dep,
+    ],
+)
+
+py_test(
+    name = "hager_zhang_test",
+    size = "medium",
+    srcs = ["hager_zhang_test.py"],
+    deps = [
+        # numpy dep,
+        # tensorflow dep,
+        "//tensorflow_probability",
+    ],
+)
diff --git a/tensorflow_probability/python/optimizer/linesearch/__init__.py b/tensorflow_probability/python/optimizer/linesearch/__init__.py
index 30ddc4e382..080acc5b56 100644
--- a/tensorflow_probability/python/optimizer/linesearch/__init__.py
+++ b/tensorflow_probability/python/optimizer/linesearch/__init__.py
@@ -12,3 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ============================================================================
+"""Line-search optimizers package."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_probability.python.optimizer.linesearch.hager_zhang import hager_zhang
+
+from tensorflow.python.util.all_util import remove_undocumented
+
+_allowed_symbols = [
+    'hager_zhang',
+]
+
+remove_undocumented(__name__, _allowed_symbols)
diff --git a/tensorflow_probability/python/optimizer/linesearch/hager_zhang.py b/tensorflow_probability/python/optimizer/linesearch/hager_zhang.py
index 282a7c7b65..f545accde1 100644
--- a/tensorflow_probability/python/optimizer/linesearch/hager_zhang.py
+++ b/tensorflow_probability/python/optimizer/linesearch/hager_zhang.py
@@ -36,6 +36,9 @@
 
 from tensorflow.python.framework import smart_cond
 
+__all__ = [
+    'hager_zhang',
+]
 
 # Container to hold the function value and the derivative at a given point.
 # Each entry is a scalar tensor of real dtype. Used for internal data passing.
@@ -76,7 +79,7 @@
     ])
 
 
-def line_search(value_and_gradients_function,
+def hager_zhang(value_and_gradients_function,
                 initial_step_size=None,
                 objective_at_zero=None,
                 grad_objective_at_zero=None,
@@ -137,8 +140,8 @@ def line_search(value_and_gradients_function,
     value_and_gradients_function = lambda x: ((x - 1.3) ** 2, 2 * (x-1.3))
     # Set initial step size.
     step_size = tf.constant(0.1)
-    ls_result = tfp.optimizer.hz_line_search(value_and_gradients_function,
-                                             initial_step_size=step_size)
+    ls_result = tfp.optimizer.linesearch.hager_zhang(
+        value_and_gradients_function, initial_step_size=step_size)
     # Evaluate the results.
     with tf.Session() as session:
       results = session.run(ls_result)
@@ -209,7 +212,7 @@ def line_search(value_and_gradients_function,
       by `1.`. Corresponds to 'sigma' in the terminology of
       [Hager and Zhang (2006)][2].
     name: (Optional) Python str. The name prefixed to the ops created by this
-      function. If not supplied, the default name 'line_search' is used.
+      function. If not supplied, the default name 'hager_zhang' is used.
 
   Returns:
     results: A namedtuple containing the following attributes.
@@ -244,7 +247,7 @@ def line_search(value_and_gradients_function,
         If converged is True, it is equal to the dfn_step.
         Otherwise it corresponds to the last interval computed.
   """
-  with tf.name_scope(name, 'line_search',
+  with tf.name_scope(name, 'hager_zhang',
                      [initial_step_size,
                       objective_at_zero,
                       grad_objective_at_zero,
diff --git a/tensorflow_probability/python/optimizer/linesearch/hager_zhang_test.py b/tensorflow_probability/python/optimizer/linesearch/hager_zhang_test.py
index a102a608fe..f54a65e431 100644
--- a/tensorflow_probability/python/optimizer/linesearch/hager_zhang_test.py
+++ b/tensorflow_probability/python/optimizer/linesearch/hager_zhang_test.py
@@ -45,7 +45,7 @@ def test_quadratic(self):
     # Case 1: The starting value is close to 0 and doesn't bracket the min.
     with self.test_session() as session:
       close_start, far_start = tf.constant(0.1), tf.constant(7.0)
-      results_close = session.run(tfp.optimizer.hz_line_search(
+      results_close = session.run(tfp.optimizer.linesearch.hager_zhang(
           fdf, initial_step_size=close_start))
       self.assertTrue(results_close.converged)
       self.assertAlmostEqual(results_close.left_pt, results_close.right_pt)
@@ -57,7 +57,7 @@ def test_quadratic(self):
                                       df0,
                                       0.1,
                                       0.9))
-      results_far = session.run(tfp.optimizer.hz_line_search(
+      results_far = session.run(tfp.optimizer.linesearch.hager_zhang(
           fdf, initial_step_size=far_start))
       self.assertTrue(results_far.converged)
       self.assertAlmostEqual(results_far.left_pt, results_far.right_pt)
@@ -81,7 +81,7 @@ def fdf(x):
       starts = (tf.constant(0.1), tf.constant(1.5), tf.constant(2.0),
                 tf.constant(4.0))
       for start in starts:
-        results = session.run(tfp.optimizer.hz_line_search(
+        results = session.run(tfp.optimizer.linesearch.hager_zhang(
             fdf, initial_step_size=start))
         self.assertTrue(results.converged)
         self.assertAlmostEqual(results.left_pt, results.right_pt)
@@ -130,8 +130,8 @@ def fdf(t):
         coord = x0 + t * dirn
         ft, df = rosenbrock(coord)
         return ft, tf.reduce_sum(df * dirn)
-      results = session.run(tfp.optimizer.hz_line_search(fdf,
-                                                         initial_step_size=1.0))
+      results = session.run(tfp.optimizer.linesearch.hager_zhang(
+          fdf, initial_step_size=1.0))
       self.assertTrue(results.converged)
 
   def test_eval_count(self):
@@ -152,7 +152,7 @@ def _fdf(x):
 
     for start in starts:
       fdf, counter = get_fn()
-      results = tfp.optimizer.hz_line_search(
+      results = tfp.optimizer.linesearch.hager_zhang(
           fdf, initial_step_size=tf.constant(start))
       init = tf.global_variables_initializer()
       with self.test_session() as session:
@@ -178,7 +178,7 @@ def fdf(x):
     with self.test_session() as session:
       start = tf.constant(dtype(1e-8))
       results = session.run(
-          tfp.optimizer.hz_line_search(
+          tfp.optimizer.linesearch.hager_zhang(
               fdf,
               initial_step_size=start,
               sufficient_decrease_param=0.1,
@@ -209,7 +209,7 @@ def fdf(x):
     def get_results():
       with self.test_session() as session:
         start = tf.constant(0.9)
-        results = tfp.optimizer.hz_line_search(
+        results = tfp.optimizer.linesearch.hager_zhang(
             fdf,
             initial_step_size=start,
             sufficient_decrease_param=0.1,
@@ -235,7 +235,7 @@ def rastrigin(x, use_np=False):
     with self.test_session() as session:
       start = tf.constant(0.1, dtype=tf.float64)
       results = session.run(
-          tfp.optimizer.hz_line_search(
+          tfp.optimizer.linesearch.hager_zhang(
               rastrigin,
               initial_step_size=start,
               sufficient_decrease_param=0.1,