diff --git a/README.md b/README.md index e42f937..9b58bcb 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,12 @@ Ensure you have a Java runtime and set the PATH for it. pip install tabula-py ``` +If you want to leverage faster execution with jpype, install with `jpype` extra. + +```sh +pip install tabula-py[jpype] +``` + ### Example tabula-py enables you to extract tables from a PDF into a DataFrame, or a JSON. It can also extract tables from a PDF and save the file as a CSV, a TSV, or a JSON.   diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 7fe63bc..9be7b08 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -26,6 +26,12 @@ You can install tabula-py from PyPI with ``pip`` command. pip install tabula-py +If you want to leverage faster execution with jpype, install with `jpype` extra. + +.. code-block:: bash + + pip install tabula-py[jpype] + .. Note:: conda recipe on conda-forge is not maintained by us. We recommend installing via ``pip`` to use the latest version of tabula-py. diff --git a/noxfile.py b/noxfile.py index 50f6711..668d266 100644 --- a/noxfile.py +++ b/noxfile.py @@ -20,8 +20,30 @@ def lint(session): @nox.session -def tests(session): +@nox.parametrize( + "python,jpype", + [ + ("3.8", True), + ("3.9", True), + ("3.10", True), + ("3.11", True), + # ("3.12", False), + ], +) +def tests(session, jpype): + if jpype: + tests_with_jpype(session) + else: + tests_without_jpype(session) + + +def tests_without_jpype(session): session.install(".[test]") session.run("pytest", "-v", "tests/test_read_pdf_table.py") + + +def tests_with_jpype(session): + session.install(".[jpype,test]") + session.run("pytest", "-v", "tests/test_read_pdf_table.py") session.run("pytest", "-v", "tests/test_read_pdf_jar_path.py") session.run("pytest", "-v", "tests/test_read_pdf_silent.py") diff --git a/pyproject.toml b/pyproject.toml index 7addee9..8384caf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,11 +32,11 @@ dependencies = [ "pandas >= 0.25.3", "numpy", "distro", - "jpype1", ] dynamic = ["version"] [project.optional-dependencies] +jpype = ["jpype1"] dev = [ "pytest", "flake8", diff --git a/tabula/backend.py b/tabula/backend.py index 964c01b..f43e160 100644 --- a/tabula/backend.py +++ b/tabula/backend.py @@ -3,9 +3,6 @@ from logging import getLogger from typing import List, Optional -import jpype -import jpype.imports - from .errors import JavaNotFoundError from .util import TabulaOption @@ -27,23 +24,26 @@ def jar_path() -> str: class TabulaVm: def __init__(self, java_options: List[str], silent: Optional[bool]) -> None: - if not jpype.isJVMStarted(): - jpype.addClassPath(jar_path()) - - # Workaround to enforce the silent option. See: - # https://github.com/tabulapdf/tabula-java/issues/231#issuecomment-397281157 - if silent: - java_options.extend( - ( - "-Dorg.slf4j.simpleLogger.defaultLogLevel=off", - "-Dorg.apache.commons.logging.Log" - "=org.apache.commons.logging.impl.NoOpLog", + try: + import jpype + import jpype.imports + + if not jpype.isJVMStarted(): + jpype.addClassPath(jar_path()) + + # Workaround to enforce the silent option. See: + # https://github.com/tabulapdf/tabula-java/issues/231#issuecomment-397281157 + if silent: + java_options.extend( + ( + "-Dorg.slf4j.simpleLogger.defaultLogLevel=off", + "-Dorg.apache.commons.logging.Log" + "=org.apache.commons.logging.impl.NoOpLog", + ) ) - ) - jpype.startJVM(*java_options, convertStrings=False) + jpype.startJVM(*java_options, convertStrings=False) - try: import java.lang as lang import technology.tabula as tabula from org.apache.commons.cli import DefaultParser @@ -51,11 +51,11 @@ def __init__(self, java_options: List[str], silent: Optional[bool]) -> None: self.tabula = tabula self.parser = DefaultParser() self.lang = lang + except (ModuleNotFoundError, ImportError) as e: logger.warning( "Error importing jpype dependencies. Fallback to subprocess." ) - logger.warning(jpype.java.lang.System.getProperty("java.class.path")) logger.warning(e) self.tabula = None self.parse = None diff --git a/tests/test_read_pdf_jar_path.py b/tests/test_read_pdf_jar_path.py index e146271..ee55b45 100644 --- a/tests/test_read_pdf_jar_path.py +++ b/tests/test_read_pdf_jar_path.py @@ -3,6 +3,7 @@ from subprocess import CalledProcessError from unittest.mock import patch +import jpype import pytest import tabula @@ -19,5 +20,5 @@ def test_read_pdf_with_jar_path(self, jar_func): # Fallback to subprocess with pytest.raises(CalledProcessError): tabula.read_pdf(self.pdf_path, encoding="utf-8") - file_name = Path(tabula.backend.jpype.getClassPath()).name + file_name = Path(jpype.getClassPath()).name self.assertEqual(file_name, "tabula-java.jar") diff --git a/tests/test_read_pdf_silent.py b/tests/test_read_pdf_silent.py index a463e65..97e085f 100644 --- a/tests/test_read_pdf_silent.py +++ b/tests/test_read_pdf_silent.py @@ -2,8 +2,6 @@ import unittest from unittest.mock import patch -import pytest - import tabula @@ -11,10 +9,9 @@ class TestReadPdfJarPath(unittest.TestCase): def setUp(self): self.pdf_path = "tests/resources/data.pdf" - @patch("tabula.backend.jpype.startJVM") + @patch("jpype.startJVM") def test_read_pdf_with_silent_true(self, jvm_func): - with pytest.raises(RuntimeError): - tabula.read_pdf(self.pdf_path, encoding="utf-8", silent=True) + tabula.read_pdf(self.pdf_path, encoding="utf-8", silent=True) target_args = [] if platform.system() == "Darwin":