Skip to content

Commit

Permalink
validation of document_ids efficiency
Browse files Browse the repository at this point in the history
  • Loading branch information
ddangelov committed Feb 12, 2021
1 parent abb917c commit c67c866
Show file tree
Hide file tree
Showing 5 changed files with 13 additions and 17 deletions.
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
author = 'Dimo Angelov'

# The full version, including alpha/beta/rc tags
release = '1.0.21'
release = '1.0.22'


# -- General configuration ---------------------------------------------------
Expand Down
8 changes: 2 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,15 +1,11 @@
numpy==1.20.0
numpy
pandas
gensim
pynndescent>=0.4
umap-learn
hdbscan
umap-learn>=0.5.1
hdbscan>=0.8.27
wordcloud
tensorflow
tensorflow_hub
tensorflow_text
torch
sentence_transformers
hnswlib
joblib<1.0.0
5 changes: 1 addition & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
setuptools.setup(
name="top2vec",
packages=["top2vec"],
version="1.0.21",
version="1.0.22",
author="Dimo Angelov",
author_email="dimo.angelov@gmail.com",
description="Top2Vec learns jointly embedded topic, document and word vectors.",
Expand All @@ -26,11 +26,8 @@
"Operating System :: OS Independent",
],
install_requires=[
#'numpy == 1.20.0',
#'joblib < 1.0.0',
'pandas',
'gensim',
#'pynndescent >= 0.4',
'umap-learn >= 0.5.1',
'hdbscan >= 0.8.27',
'wordcloud',
Expand Down
13 changes: 8 additions & 5 deletions top2vec/Top2Vec.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,12 +884,15 @@ def _validate_doc_ids(self, doc_ids, doc_ids_neg):
doc_ids_neg = list(doc_ids_neg)

doc_ids_all = doc_ids + doc_ids_neg
for doc_id in doc_ids_all:
if self.document_ids is not None:
if doc_id not in self.document_ids:

if self.document_ids is not None:
for doc_id in doc_ids_all:
if doc_id not in self.doc_id2index:
raise ValueError(f"{doc_id} is not a valid document id.")
elif doc_id < 0 or doc_id > len(self.doc_top) - 1:
raise ValueError(f"{doc_id} is not a valid document id.")
elif min(doc_ids) < 0:
raise ValueError(f"{min(doc_ids)} is not a valid document id.")
elif max(doc_ids) > len(self.doc_top) - 1:
raise ValueError(f"{max(doc_ids)} is not a valid document id.")

def _validate_keywords(self, keywords, keywords_neg):
if not (isinstance(keywords, list) or isinstance(keywords, np.ndarray)):
Expand Down
2 changes: 1 addition & 1 deletion top2vec/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from top2vec.Top2Vec import Top2Vec

__version__ = '1.0.21'
__version__ = '1.0.22'

0 comments on commit c67c866

Please sign in to comment.