Skip to content

Commit

Permalink
Add some clarification to Shuffler error message.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 713225767
  • Loading branch information
The TensorFlow Datasets Authors committed Jan 8, 2025
1 parent 1a8fed7 commit 606754a
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions tensorflow_datasets/core/shuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import math
import os
import struct
import typing
from typing import Optional
import uuid
from absl import logging
Expand Down Expand Up @@ -280,7 +281,9 @@ def add(self, key: type_utils.Key, data: bytes) -> bool:
raise AssertionError('add() cannot be called after __iter__.')
if not isinstance(data, bytes):
raise AssertionError(
f'Only bytes (not {type(data)}) can be stored in Shuffler!'
f'Only bytes (not {type(data)}) can be stored in Shuffler! This'
' likely indicates that non-integer keys were used when generating'
' the dataset.'
)
hkey = self._hasher.hash_key(key)
if self._ignore_duplicates:
Expand All @@ -289,7 +292,7 @@ def add(self, key: type_utils.Key, data: bytes) -> bool:
self._seen_keys.add(hkey)
if self._disable_shuffling:
# Use the original key and not the hashed key to maintain the order.
hkey = key
hkey = typing.cast(int, key)
self._total_bytes += len(data)
if self._in_memory:
self._add_to_mem_buffer(hkey, data)
Expand Down

0 comments on commit 606754a

Please sign in to comment.