Skip to content

Commit

Permalink
Merge branch 'master' into selitvin/fix_all_none_return_by_transformer
Browse files Browse the repository at this point in the history
  • Loading branch information
selitvin authored Apr 21, 2022
2 parents 2306edb + d32709d commit 5d28818
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 26 deletions.
22 changes: 4 additions & 18 deletions .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
config: [pyspark-2.4, tf-1.15, pyarrow-3.0, pyarrow-4.0, latest, pyarrow-0.17.1, py39]
config: [pyspark-2.4, tf-1.15, pyarrow-3.0, pyarrow-4.0, latest]
include:
- config: pyspark-2.4
PYARROW_VERSION: "2.0.0"
Expand Down Expand Up @@ -55,23 +55,9 @@ jobs:
ARROW_PRE_0_15_IPC_FORMAT: 0
PY: "3.7"
- config: latest
PYARROW_VERSION: "5.0.0"
NUMPY_VERSION: "1.20.1"
TF_VERSION: "2.5.0"
PYSPARK_VERSION: "3.0.0"
ARROW_PRE_0_15_IPC_FORMAT: "0"
PY: "3.7"
- config: pyarrow-0.17.1
PYARROW_VERSION: "0.17.1"
NUMPY_VERSION: "1.19.1"
TF_VERSION: "2.5.0"
PYSPARK_VERSION: "3.0.0"
ARROW_PRE_0_15_IPC_FORMAT: 0
PY: "3.7"
- config: py39
PYARROW_VERSION: "3.0.0"
NUMPY_VERSION: "1.20.1"
TF_VERSION: "2.5.0"
PYARROW_VERSION: "6.0.1"
NUMPY_VERSION: "1.21.5"
TF_VERSION: "2.8.0"
PYSPARK_VERSION: "3.0.0"
ARROW_PRE_0_15_IPC_FORMAT: "0"
PY: "3.9"
Expand Down
4 changes: 2 additions & 2 deletions petastorm/predicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
Predicates for petastorm
"""
import abc
import collections
import collections.abc
import hashlib
import numpy as np
import six
Expand Down Expand Up @@ -66,7 +66,7 @@ def get_fields(self):
return {self._predicate_field}

def do_include(self, values):
if not isinstance(values[self._predicate_field], collections.Iterable):
if not isinstance(values[self._predicate_field], collections.abc.Iterable):
raise ValueError('Predicate field should have iterable type')
return any(np.in1d(values[self._predicate_field], self._inclusion_values))

Expand Down
6 changes: 3 additions & 3 deletions petastorm/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import collections
import collections.abc
import decimal
# Must import pyarrow before torch. See: https://github.com/uber/petastorm/blob/master/docs/troubleshoot.rst
import re
Expand Down Expand Up @@ -84,11 +84,11 @@ def decimal_friendly_collate(batch):

if isinstance(batch[0], decimal.Decimal):
return batch
elif isinstance(batch[0], collections.Mapping):
elif isinstance(batch[0], collections.abc.Mapping):
return {key: decimal_friendly_collate([d[key] for d in batch]) for key in batch[0]}
elif isinstance(batch[0], _string_classes):
return batch
elif isinstance(batch[0], collections.Sequence):
elif isinstance(batch[0], collections.abc.Sequence):
transposed = zip(*batch)
return [decimal_friendly_collate(samples) for samples in transposed]
else:
Expand Down
6 changes: 3 additions & 3 deletions petastorm/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import collections
import collections.abc
import logging
import warnings

Expand Down Expand Up @@ -395,7 +395,7 @@ def __init__(self, pyarrow_filesystem, dataset_path, schema_fields=None,
# c. partition: used to get a subset of data for distributed training
# 4. Create a rowgroup ventilator object
# 5. Start workers pool
if not (isinstance(schema_fields, collections.Iterable) or isinstance(schema_fields, NGram)
if not (isinstance(schema_fields, collections.abc.Iterable) or isinstance(schema_fields, NGram)
or schema_fields is None):
raise ValueError('Fields must be either None, an iterable collection of Unischema fields '
'or an NGram object.')
Expand Down Expand Up @@ -431,7 +431,7 @@ def __init__(self, pyarrow_filesystem, dataset_path, schema_fields=None,
if self.ngram:
fields = self.ngram.get_field_names_at_all_timesteps()
else:
fields = schema_fields if isinstance(schema_fields, collections.Iterable) else None
fields = schema_fields if isinstance(schema_fields, collections.abc.Iterable) else None

storage_schema = stored_schema.create_schema_view(fields) if fields else stored_schema
if len(storage_schema.fields) == 0:
Expand Down

0 comments on commit 5d28818

Please sign in to comment.