From 7a068ec4e15224f5a1e3f23a7bcbd9bd4b030181 Mon Sep 17 00:00:00 2001 From: tf-model-analysis-team Date: Thu, 17 Oct 2024 11:34:28 -0700 Subject: [PATCH] Add nltk.download('punkt_tab') to Rouge metric. This is needed to use the nltk.sent_tokenize() downstream. PiperOrigin-RevId: 686987036 --- RELEASE.md | 1 + tensorflow_model_analysis/metrics/rouge.py | 1 + 2 files changed, 2 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index ea163a3272..3eb8b48d36 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -19,6 +19,7 @@ * Modifies a ROUGE Test to be compatible with NumPy v2.0.1. * Remove keras_util_test.py which is based on estimator models. * Remove dependency on eval_saved_model encodings. +* Downloads `punkt_tab` in Rouge metric. ## Breaking Changes diff --git a/tensorflow_model_analysis/metrics/rouge.py b/tensorflow_model_analysis/metrics/rouge.py index 45eef52b42..bd5006ff70 100644 --- a/tensorflow_model_analysis/metrics/rouge.py +++ b/tensorflow_model_analysis/metrics/rouge.py @@ -93,6 +93,7 @@ def setup(self): if not tokenizer_installed: logging.info(_LOGGING_MESSAGE_TOKENIZER_PREPARER) nltk.download('punkt') + nltk.download('punkt_tab') def create_accumulator(self) -> _Accumulator: return _Accumulator()