Skip to content

Commit

Permalink
Merge pull request #390 from rbubley/patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
chezou authored May 20, 2024
2 parents 7f2cad1 + df3ba02 commit 79792ad
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 5 deletions.
11 changes: 6 additions & 5 deletions tabula/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import shlex
from dataclasses import dataclass
from logging import getLogger
from typing import IO, Iterable, List, Optional, Union, cast
from typing import IO, Iterable, List, Optional, Sequence, Union, cast

logger = getLogger(__name__)

Expand Down Expand Up @@ -115,8 +115,9 @@ class TabulaOption:
Password to decrypt document. Default: empty
silent (bool, optional):
Suppress all stderr output.
columns (iterable, optional):
X coordinates of column boundaries.
columns (Sequence, optional):
X coordinates of column boundaries. Must be sorted and of a datatype that
preserves order, e.g. tuple or list
Example:
``[10.1, 20.2, 30.3]``
Expand Down Expand Up @@ -147,7 +148,7 @@ class TabulaOption:
stream: bool = False
password: Optional[str] = None
silent: Optional[bool] = None
columns: Optional[Iterable[float]] = None
columns: Optional[Sequence[float]] = None
relative_columns: bool = False
format: Optional[str] = None
batch: Optional[str] = None
Expand Down Expand Up @@ -235,7 +236,7 @@ def build_option_list(self) -> List[str]:
__options += ["--outfile", self.output_path]

if self.columns:
if self.columns != sorted(self.columns):
if list(self.columns) != sorted(self.columns):
raise ValueError("columns option should be sorted")

__columns = _format_with_relative(self.columns, self.relative_columns)
Expand Down
9 changes: 9 additions & 0 deletions tests/test_read_pdf_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,15 @@ def test_read_pdf_with_columns(self):
)[0].equals(pd.read_csv(expected_csv))
)

def test_read_pdf_with_tuple_columns(self):
pdf_path = "tests/resources/campaign_donors.pdf"
expected_csv = "tests/resources/campaign_donors.csv"
self.assertTrue(
tabula.read_pdf(
pdf_path, columns=(47, 147, 256, 310, 375, 431, 504), guess=False
)[0].equals(pd.read_csv(expected_csv))
)

def test_read_pdf_with_relative_columns(self):
pdf_path = "tests/resources/campaign_donors.pdf"
expected_csv = "tests/resources/campaign_donors.csv"
Expand Down

0 comments on commit 79792ad

Please sign in to comment.