diff --git a/docs/conf.py b/docs/conf.py index 1e3ac31..574242d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -29,7 +29,7 @@ author = 'Dimo Angelov' # The full version, including alpha/beta/rc tags -release = '1.0.21' +release = '1.0.22' # -- General configuration --------------------------------------------------- diff --git a/requirements.txt b/requirements.txt index 2dda7b0..b60cbf0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,7 @@ -numpy==1.20.0 -numpy pandas gensim -pynndescent>=0.4 -umap-learn -hdbscan +umap-learn>=0.5.1 +hdbscan>=0.8.27 wordcloud tensorflow tensorflow_hub @@ -12,4 +9,3 @@ tensorflow_text torch sentence_transformers hnswlib -joblib<1.0.0 diff --git a/setup.py b/setup.py index bce713a..fbb2c2d 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setuptools.setup( name="top2vec", packages=["top2vec"], - version="1.0.21", + version="1.0.22", author="Dimo Angelov", author_email="dimo.angelov@gmail.com", description="Top2Vec learns jointly embedded topic, document and word vectors.", @@ -26,11 +26,8 @@ "Operating System :: OS Independent", ], install_requires=[ - #'numpy == 1.20.0', - #'joblib < 1.0.0', 'pandas', 'gensim', - #'pynndescent >= 0.4', 'umap-learn >= 0.5.1', 'hdbscan >= 0.8.27', 'wordcloud', diff --git a/top2vec/Top2Vec.py b/top2vec/Top2Vec.py index 3f83e57..ed1bddc 100644 --- a/top2vec/Top2Vec.py +++ b/top2vec/Top2Vec.py @@ -884,12 +884,15 @@ def _validate_doc_ids(self, doc_ids, doc_ids_neg): doc_ids_neg = list(doc_ids_neg) doc_ids_all = doc_ids + doc_ids_neg - for doc_id in doc_ids_all: - if self.document_ids is not None: - if doc_id not in self.document_ids: + + if self.document_ids is not None: + for doc_id in doc_ids_all: + if doc_id not in self.doc_id2index: raise ValueError(f"{doc_id} is not a valid document id.") - elif doc_id < 0 or doc_id > len(self.doc_top) - 1: - raise ValueError(f"{doc_id} is not a valid document id.") + elif min(doc_ids) < 0: + raise ValueError(f"{min(doc_ids)} is not a valid document id.") + elif max(doc_ids) > len(self.doc_top) - 1: + raise ValueError(f"{max(doc_ids)} is not a valid document id.") def _validate_keywords(self, keywords, keywords_neg): if not (isinstance(keywords, list) or isinstance(keywords, np.ndarray)): diff --git a/top2vec/__init__.py b/top2vec/__init__.py index d90a6f3..3117174 100644 --- a/top2vec/__init__.py +++ b/top2vec/__init__.py @@ -1,3 +1,3 @@ from top2vec.Top2Vec import Top2Vec -__version__ = '1.0.21' +__version__ = '1.0.22'