diff --git a/pdftotree/TreeExtract.py b/pdftotree/TreeExtract.py
index 7588337..9ae9ce3 100644
--- a/pdftotree/TreeExtract.py
+++ b/pdftotree/TreeExtract.py
@@ -2,7 +2,7 @@
 import logging
 import os
 from functools import cmp_to_key
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 from xml.dom.minidom import Document, Element
 
 import numpy as np
@@ -29,6 +29,8 @@
 from pdftotree.utils.pdf.pdf_utils import CustomPDFPageAggregator, PDFElems
 from pdftotree.utils.pdf.vector_utils import column_order, reading_order
 
+logger = logging.getLogger(__name__)
+
 
 class TreeExtractor(object):
     """
@@ -36,7 +38,6 @@ class TreeExtractor(object):
     """
 
     def __init__(self, pdf_file):
-        self.log = logging.getLogger(__name__)
         self.pdf_file = pdf_file
         self.elems: Dict[int, PDFElems] = {}  # key represents page_num
         self.font_stats: Dict[int, Any] = {}  # key represents page_num
@@ -165,7 +166,7 @@ def get_candidates_and_features_page_num(self, page_num):
 
         boxes = alignments_bboxes
         if len(boxes) == 0:
-            self.log.info("No boxes were found on page {}.".format(page_num))
+            logger.info("No boxes were found on page {}.".format(page_num))
             return [], []
 
         lines_features = get_lines_features(boxes, elems)
@@ -197,7 +198,7 @@ def get_candidates_alignments(self, page_num, elems):
         try:
             nodes, features = parse_layout(elems, font_stat)
         except Exception as e:
-            self.log.exception(e)
+            logger.exception(e)
             nodes, features = [], []
         return (
             [
@@ -348,7 +349,7 @@ def get_word_boundaries(
                     char_idx += 1
                     continue
                 if word[len_idx] != mention_chars[char_idx][0]:
-                    self.log.warning(
+                    logger.warning(
                         "Out of order ({}, {})".format(word, mention_chars[char_idx][0])
                     )
                 curr_word[1] = min(curr_word[1], mention_chars[char_idx][1])
@@ -402,42 +403,72 @@ def get_html_others(self, tag: str, box: List[float], page_num: int) -> Element:
                 word_element.appendChild(self.doc.createTextNode(text))
         return element
 
-    def get_html_table(self, table, page_num) -> Element:
-        table_str = [str(i) for i in table]
+    def get_html_table(self, table: List[float], page_num) -> Optional[Element]:
+        """Recognize a table using tabula and return a DOM element.
+
+        :param table: bbox for a table (top,left,bottom,right)
+        :param page_num: 1-based page number
+        :return: DOM element for a table
+        """
+        logger.debug(f"Calling tabula at page: {page_num} and area: {table}.")
         table_json = tabula.read_pdf(
-            self.pdf_file, pages=page_num, area=table_str, output_format="json"
+            self.pdf_file, pages=page_num, area=table, output_format="json"
         )
-        if len(table_json) > 0:
-            table_element = self.doc.createElement("table")
-            for i, row in enumerate(table_json[0]["data"]):
-                row_element = self.doc.createElement("tr")
-                table_element.appendChild(row_element)
-                for j, column in enumerate(row):
-                    col_element = self.doc.createElement("td")
-                    row_element.appendChild(col_element)
-                    box = [
-                        column["top"],
-                        column["left"],
-                        column["top"] + column["height"],
-                        column["left"] + column["width"],
-                    ]
-                    elems = get_mentions_within_bbox(box, self.elems[page_num].mentions)
-                    elems.sort(key=cmp_to_key(reading_order))
-                    for elem in elems:
-                        words = self.get_word_boundaries(elem)
-                        for word in words:
-                            top = int(word[1])
-                            left = int(word[2])
-                            bottom = int(word[3])
-                            right = int(word[4])
-                            # escape special HTML chars
-                            text = html.escape(word[0])
-
-                            word_element = self.doc.createElement("span")
-                            col_element.appendChild(word_element)
-                            word_element.setAttribute("class", "ocrx_word")
-                            word_element.setAttribute(
-                                "title", f"bbox {left} {top} {right} {bottom}"
-                            )
-                            word_element.appendChild(self.doc.createTextNode(text))
+        logger.debug(f"Tabula recognized {len(table_json)} table(s).")
+        if len(table_json) == 0:
+            return None
+        table_element = self.doc.createElement("table")
+        table_element.setAttribute("class", "ocr_table")
+        top = int(table_json[0]["top"])
+        left = int(table_json[0]["left"])
+        bottom = int(table_json[0]["bottom"])
+        right = int(table_json[0]["right"])
+        table_element.setAttribute("title", f"bbox {left} {top} {right} {bottom}")
+        for i, row in enumerate(table_json[0]["data"]):
+            row_element = self.doc.createElement("tr")
+            table_element.appendChild(row_element)
+            for j, cell in enumerate(row):
+                # It is not explicitly stated anywhere but tabula seems to use the cell
+                # bbox to represent that of cell itself rather than that of text inside.
+                # Note: bbox could be [0, 0, 0, 0] if tabula recognizes no text inside.
+                box: List[float] = [
+                    cell["top"],
+                    cell["left"],
+                    cell["top"] + cell["height"],
+                    cell["left"] + cell["width"],
+                ]
+                cell_element = self.doc.createElement("td")
+                row_element.appendChild(cell_element)
+                elems = get_mentions_within_bbox(box, self.elems[page_num].mentions)
+                if len(elems) == 0:
+                    continue
+                cell_element.setAttribute(
+                    "title",
+                    f"bbox {int(box[1])} {int(box[0])} {int(box[3])} {int(box[2])}",
+                )
+                elems.sort(key=cmp_to_key(reading_order))
+                for elem in elems:
+                    line_element = self.doc.createElement("span")
+                    cell_element.appendChild(line_element)
+                    line_element.setAttribute("class", "ocrx_line")
+                    line_element.setAttribute(
+                        "title",
+                        " ".join(["bbox"] + [str(int(_)) for _ in elem.bbox]),
+                    )
+                    words = self.get_word_boundaries(elem)
+                    for word in words:
+                        top = int(word[1])
+                        left = int(word[2])
+                        bottom = int(word[3])
+                        right = int(word[4])
+                        # escape special HTML chars
+                        text = html.escape(word[0])
+
+                        word_element = self.doc.createElement("span")
+                        line_element.appendChild(word_element)
+                        word_element.setAttribute("class", "ocrx_word")
+                        word_element.setAttribute(
+                            "title", f"bbox {left} {top} {right} {bottom}"
+                        )
+                        word_element.appendChild(self.doc.createTextNode(text))
         return table_element
diff --git a/pdftotree/core.py b/pdftotree/core.py
index 29d77b7..77a3036 100644
--- a/pdftotree/core.py
+++ b/pdftotree/core.py
@@ -24,17 +24,18 @@
 from pdftotree.TreeExtract import TreeExtractor
 from pdftotree.TreeVisualizer import TreeVisualizer
 
+logger = logging.getLogger(__name__)
+
 
 def load_model(model_type, model_path):
-    log = logging.getLogger(__name__)
-    log.info("Loading pretrained {} model for table detection".format(model_type))
+    logger.info("Loading pretrained {} model for table detection".format(model_type))
     if model_type == "ml":
         model = pickle.load(open(model_path, "rb"))
     else:
         from keras.models import load_model as load_vision_model
 
         model = load_vision_model(model_path)
-    log.info("Model loaded!")
+    logger.info("Model loaded!")
     return model
 
 
@@ -51,20 +52,19 @@ def parse(
     model_path=None,
     visualize=False,
 ):
-    log = logging.getLogger(__name__)
     model = None
     if model_type is not None and model_path is not None:
         model = load_model(model_type, model_path)
     extractor = TreeExtractor(pdf_file)
     if extractor.is_scanned():
-        log.warning("Document looks scanned, the result may be far from expected.")
+        logger.warning("Document looks scanned, the result may be far from expected.")
     else:
-        log.info("Digitized PDF detected, building tree structure...")
+        logger.info("Digitized PDF detected, building tree structure...")
 
     pdf_tree = extractor.get_tree_structure(model_type, model)
-    log.info("Tree structure built, creating html...")
+    logger.info("Tree structure built, creating html...")
     pdf_html = extractor.get_html_tree()
-    log.info("HTML created.")
+    logger.info("HTML created.")
     # TODO: what is the following substition for and is it required?
     # pdf_html = re.sub(r"[\x00-\x1F]+", "", pdf_html)
 
diff --git a/pdftotree/ml/TableExtractML.py b/pdftotree/ml/TableExtractML.py
index b9b6b53..57272e7 100644
--- a/pdftotree/ml/TableExtractML.py
+++ b/pdftotree/ml/TableExtractML.py
@@ -20,6 +20,8 @@
 from pdftotree.utils.pdf.pdf_parsers import parse_layout
 from pdftotree.utils.pdf.pdf_utils import analyze_pages, normalize_pdf
 
+logger = logging.getLogger(__name__)
+
 
 class TableExtractorML(object):
     """
@@ -27,7 +29,6 @@ class TableExtractorML(object):
     """
 
     def __init__(self, pdf_file):
-        self.log = logging.getLogger(__name__)
         self.pdf_file = pdf_file
         self.elems = {}
         self.font_stats = {}
@@ -97,7 +98,7 @@ def parse(self):
                     and round(fig.bbox[2]) == round(elems.layout.width)
                     and round(fig.bbox[3]) == round(elems.layout.height)
                 ):
-                    self.log.debug(
+                    logger.debug(
                         "{} is scanned because of full-page figure.".format(
                             self.pdf_file
                         )
@@ -111,7 +112,7 @@ def parse(self):
             )
             # doc is scanned if any page is scanned
             if page_scanned:
-                self.log.debug(
+                logger.debug(
                     "{} is scanned one of its pages is scanned.".format(self.pdf_file)
                 )
                 is_scanned = True
@@ -139,7 +140,7 @@ def get_candidates(self):
     def get_candidates_and_features(self):
         self.parse()
         if self.scanned:
-            self.log.info("{} is scanned.".format(self.pdf_file))
+            logger.info("{} is scanned.".format(self.pdf_file))
             return [], [], self.scanned
         for page_num in list(self.elems.keys()):
             page_boxes, page_features = self.get_candidates_and_features_page_num(
@@ -161,7 +162,7 @@ def get_candidates_and_features_page_num(self, page_num):
         alignments_bboxes, alignment_features = self.get_candidates_alignments(
             page_num, elems
         )
-        self.log.info(
+        logger.info(
             "Page Num: {}, Line bboxes: {}, Alignment bboxes: {}".format(
                 page_num, len(lines_bboxes), len(alignments_bboxes)
             )
diff --git a/pdftotree/ml/features.py b/pdftotree/ml/features.py
index 5927457..78167db 100644
--- a/pdftotree/ml/features.py
+++ b/pdftotree/ml/features.py
@@ -1,7 +1,7 @@
 import string
 from builtins import str
 from collections import defaultdict
-from typing import List
+from typing import Any, List
 
 from pdfminer.layout import LTTextLine
 
@@ -35,7 +35,15 @@ def get_height_coverage(bbox):
 # ******************* Text Coverage Features *************************************
 
 
-def get_mentions_within_bbox(bbox, mentions) -> List[LTTextLine]:
+def get_mentions_within_bbox(
+    bbox: List[Any], mentions: List[LTTextLine]
+) -> List[LTTextLine]:
+    """Get textlines within bbox.
+
+    :param bbox: a list containing (top, left, bottom, right) in the last 4 digits
+    :param mentions: a list of textlines
+    :return: a list of textlines within the given bbox
+    """
     mentions_within_bbox = []
     for mention in mentions:
         bbox_mention = (
diff --git a/pdftotree/utils/pdf/grid.py b/pdftotree/utils/pdf/grid.py
index c2f18d9..6f2d257 100644
--- a/pdftotree/utils/pdf/grid.py
+++ b/pdftotree/utils/pdf/grid.py
@@ -16,6 +16,8 @@
 
 from pdftotree.utils.pdf.vector_utils import inside, reading_order
 
+logger = logging.getLogger(__name__)
+
 
 class Cell(object):
     """Represents a cell with no visual dividers inside"""
@@ -117,7 +119,6 @@ def get_normalized_grid(self):
         """
         Analyzes subcell structure
         """
-        log = logging.getLogger(__name__)
         # Resolve multirow mentions, TODO: validate against all PDFs
         #  subcol_count = 0
         mega_rows = []
@@ -127,12 +128,12 @@ def get_normalized_grid(self):
             for col_id, cell in enumerate(row):
                 # Keep cell text in reading order
                 cell.texts.sort(key=cmp_to_key(reading_order))
-                log.debug("=" * 50)
+                logger.debug("=" * 50)
                 for m in cell.texts:
                     subrow_across_cell[m.yc_grid].append(m)
                     #  prev = m
 
-            log.debug(pformat(dict(subrow_across_cell)))
+            logger.debug(pformat(dict(subrow_across_cell)))
 
             mega_rows.append(subrow_across_cell)
 
diff --git a/pdftotree/utils/pdf/pdf_parsers.py b/pdftotree/utils/pdf/pdf_parsers.py
index 7b4b912..1e4cd62 100644
--- a/pdftotree/utils/pdf/pdf_parsers.py
+++ b/pdftotree/utils/pdf/pdf_parsers.py
@@ -19,6 +19,8 @@
 from pdftotree.utils.pdf.pdf_utils import PDFElems
 from pdftotree.utils.pdf.vector_utils import center, intersect, l1, xy_reading_order
 
+logger = logging.getLogger(__name__)
+
 
 def parse_layout(elems, font_stat, combine=False):
     """
@@ -75,7 +77,6 @@ def cluster_vertically_aligned_boxes(
     page_width,
     combine,
 ):
-    log = logging.getLogger(__name__)
     # Filter out boxes with zero width or height
     filtered_boxes = []
     for bbox in boxes:
@@ -85,10 +86,10 @@ def cluster_vertically_aligned_boxes(
 
     # Too many "." in the Table of Content pages
     if len(boxes) == 0:
-        log.warning("No boxes were found to cluster.")
+        logger.warning("No boxes were found to cluster.")
         return [], []
     elif len(boxes) > 3500:
-        log.warning("Too many '.' in the Table of Content pages?")
+        logger.warning("Too many '.' in the Table of Content pages?")
         return [], []
 
     plane = Plane(page_bbox)
@@ -810,7 +811,6 @@ def extract_text_candidates(
     page_width,
     page_height,
 ) -> Tuple[Dict[str, List], bool]:
-    log = logging.getLogger(__name__)
     # Filter out boxes with zero width or height
     filtered_boxes = []
     for bbox in boxes:
@@ -1052,7 +1052,7 @@ def extract_text_candidates(
         min_y_page = min(min_y_page, box.bbox[1])
     if page_num == -1:
         # handle title, authors and abstract here
-        log.error("TODO: no way to handle title authors abstract yet.")
+        logger.error("TODO: no way to handle title authors abstract yet.")
     else:
         # eliminate header, footer, page number
         # sort other text and classify as header/paragraph
@@ -1180,7 +1180,6 @@ def extract_text_candidates(
 
 
 def get_figures(boxes, page_bbox, page_num, boxes_figures, page_width, page_height):
-    log = logging.getLogger(__name__)
     # Filter out boxes with zero width or height
     filtered_boxes = []
     for bbox in boxes:
@@ -1189,7 +1188,7 @@ def get_figures(boxes, page_bbox, page_num, boxes_figures, page_width, page_heig
     boxes = filtered_boxes
 
     if len(boxes) == 0:
-        log.warning("No boxes to get figures from on page {}.".format(page_num))
+        logger.warning("No boxes to get figures from on page {}.".format(page_num))
         return []
 
     plane = Plane(page_bbox)
@@ -1255,7 +1254,6 @@ def get_most_common_font_pts(mentions, font_stat):
     """
     font_stat: Counter object of font sizes
     """
-    log = logging.getLogger(__name__)
     try:
         # default min font size of 1 pt in case no font present
         most_common_font_size = font_stat.most_common(1)[0][0]
@@ -1269,7 +1267,7 @@ def get_most_common_font_pts(mentions, font_stat):
         return height_sum / count
 
     except IndexError:
-        log.info("No text found on page. Default most_common_font_pts to 2.0")
+        logger.info("No text found on page. Default most_common_font_pts to 2.0")
         return 2.0
 
 
@@ -1284,7 +1282,6 @@ def get_page_width(boxes):
 
 
 def get_char_width(boxes: List[LTTextLine]) -> float:
-    log = logging.getLogger(__name__)
     box_len_sum = 0
     num_char_sum = 0
     for i, b in enumerate(boxes):
@@ -1293,5 +1290,5 @@ def get_char_width(boxes: List[LTTextLine]) -> float:
     try:
         return box_len_sum / num_char_sum
     except ZeroDivisionError:
-        log.warning("No text found. Defaulting to char_width = 2.0.")
+        logger.warning("No text found. Defaulting to char_width = 2.0.")
         return 2.0
diff --git a/pdftotree/utils/pdf/render.py b/pdftotree/utils/pdf/render.py
index cf806ef..d195f72 100644
--- a/pdftotree/utils/pdf/render.py
+++ b/pdftotree/utils/pdf/render.py
@@ -11,6 +11,8 @@
 import numpy as np
 from pdf.vector_utils import x0, x1, y0, y1
 
+logger = logging.getLogger(__name__)
+
 
 class Renderer(object):
     """
@@ -32,7 +34,6 @@ def __init__(self, elems, scaler=1):
         scaler so we can map original coordinates into the
         new grid map.
         """
-        self.log = logging.getLogger(__name__)
         self.scaler = scaler
         layout = elems.layout
         width = int(np.ceil(scaler * layout.width))
@@ -41,7 +42,7 @@ def __init__(self, elems, scaler=1):
         self.grid = np.zeros((width, height), dtype=np.int8)
 
         # Estimates the grid size in megabytes
-        self.log.info(self.grid.nbytes / float(1048576))
+        logger.info(self.grid.nbytes / float(1048576))
         for line in elems.segments:
             if line.height < 0.1:  # Horizontal lines
                 self.draw_rect(line.bbox, self.horizontal_line)
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 8f9fd0d..92c9463 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -10,6 +10,27 @@
 import pdftotree
 
 
+# Adapted from https://github.com/ocropus/hocr-tools/blob/v1.3.0/hocr-check
+def get_prop(node: Tag, name: str) -> Optional[str]:
+    title = node.get("title")
+    if not title:
+        return None
+    props = title.split(";")
+    for prop in props:
+        (key, args) = prop.split(None, 1)
+        if key == name:
+            return args
+    return None
+
+
+# Adapted from https://github.com/ocropus/hocr-tools/blob/v1.3.0/hocr-check
+def get_bbox(node: Tag) -> box:
+    bbox = get_prop(node, "bbox")
+    if not bbox:
+        return None
+    return box(*[int(x) for x in bbox.split()])
+
+
 def test_heuristic_completion():
     """Simply test that parse runs to completion without errors."""
     output = pdftotree.parse("tests/input/paleo.pdf")
@@ -48,25 +69,6 @@ def test_looks_scanned():
     assert len(soup.find_all(class_="ocrx_word")) >= 1000
     assert len(soup.find_all("figure")) == 3
 
-    # Adapted from https://github.com/ocropus/hocr-tools/blob/v1.3.0/hocr-check
-    def get_prop(node: Tag, name: str) -> Optional[str]:
-        title = node.get("title")
-        if not title:
-            return None
-        props = title.split(";")
-        for prop in props:
-            (key, args) = prop.split(None, 1)
-            if key == name:
-                return args
-        return None
-
-    # Adapted from https://github.com/ocropus/hocr-tools/blob/v1.3.0/hocr-check
-    def get_bbox(node: Tag) -> box:
-        bbox = get_prop(node, "bbox")
-        if not bbox:
-            return None
-        return box(*[int(x) for x in bbox.split()])
-
     # Check if words are extracted even though they are overlapped by a figure (#77).
     page = soup.find(class_="ocr_page")  # checking only 1st page is good enough.
     words = [get_bbox(word) for word in page.find_all(class_="ocrx_word")]
@@ -74,10 +76,12 @@ def get_bbox(node: Tag) -> box:
     assert all([figure.contains(word) for word in words])
 
 
-def test_LTChar_under_LTFigure():
+def test_LTChar_under_LTFigure(tmp_path):
     """Test on a PDF where LTChar(s) are children of LTFigure."""
-    output = pdftotree.parse("tests/input/CentralSemiconductorCorp_2N4013.pdf")
-    soup = BeautifulSoup(output, "lxml")
+    html_path = os.path.join(tmp_path, "paleo.html")
+    pdftotree.parse("tests/input/CentralSemiconductorCorp_2N4013.pdf", html_path)
+    with open(html_path) as f:
+        soup = BeautifulSoup(f, "lxml")
     line: Tag = soup.find(class_="ocrx_line")
     assert [word.text for word in line.find_all(class_="ocrx_word")] == [
         "Small",
@@ -87,8 +91,16 @@ def test_LTChar_under_LTFigure():
 
     # The table in the 1st page should contain 18 columns
     page = soup.find(class_="ocr_page")
-    table = page.find("table")
+    table = page.find(class_="ocr_table")
     assert len(table.find("tr").find_all("td")) == 18
+    assert get_bbox(table) is not None
+
+    # Find a cell containing one or more of ocrx_word and check if it has bbox
+    cell = table.find(class_="ocrx_word").parent.parent
+    assert get_bbox(cell) is not None
+
+    with Popen(["hocr-check", html_path], stderr=PIPE) as proc:
+        assert all([line.decode("utf-8").startswith("ok") for line in proc.stderr])
 
 
 def test_ml_completion():