From 1eb532318cdeab3b1f35d93ba5a43ebe4803f74e Mon Sep 17 00:00:00 2001 From: Tibor Reiss Date: Wed, 14 Aug 2024 20:50:32 +0200 Subject: [PATCH 1/7] Add fetch_objects_by_ids --- integration/test_collection_async.py | 79 ++++++ integration/test_collection_filter.py | 41 ++- weaviate/collections/generate.py | 6 + .../queries/fetch_objects_by_ids/__init__.py | 9 + .../queries/fetch_objects_by_ids/generate.py | 73 +++++ .../queries/fetch_objects_by_ids/generate.pyi | 260 ++++++++++++++++++ .../queries/fetch_objects_by_ids/query.py | 64 +++++ .../queries/fetch_objects_by_ids/query.pyi | 213 ++++++++++++++ weaviate/collections/query.py | 6 + 9 files changed, 750 insertions(+), 1 deletion(-) create mode 100644 weaviate/collections/queries/fetch_objects_by_ids/__init__.py create mode 100644 weaviate/collections/queries/fetch_objects_by_ids/generate.py create mode 100644 weaviate/collections/queries/fetch_objects_by_ids/generate.pyi create mode 100644 weaviate/collections/queries/fetch_objects_by_ids/query.py create mode 100644 weaviate/collections/queries/fetch_objects_by_ids/query.pyi diff --git a/integration/test_collection_async.py b/integration/test_collection_async.py index 0cf274390..711208ad7 100644 --- a/integration/test_collection_async.py +++ b/integration/test_collection_async.py @@ -1,14 +1,19 @@ import datetime import uuid +from typing import Iterable import pytest import weaviate.classes as wvc from weaviate.collections.classes.config import DataType, Property +from weaviate.collections.classes.data import DataObject +from weaviate.types import UUID from .conftest import AsyncCollectionFactory, AsyncOpenAICollectionFactory UUID1 = uuid.UUID("806827e0-2b31-43ca-9269-24fa95a221f9") +UUID2 = uuid.uuid4() +UUID3 = uuid.uuid4() DATE1 = datetime.datetime.strptime("2012-02-09", "%Y-%m-%d").replace(tzinfo=datetime.timezone.utc) @@ -32,6 +37,43 @@ async def test_fetch_objects(async_collection_factory: AsyncCollectionFactory) - assert res.objects[0].properties["name"] == "John Doe" +@pytest.mark.asyncio +@pytest.mark.parametrize( + "ids, expected_len, expected", + [ + ([], 0, set()), + ((), 0, set()), + ([UUID3, ], 1, {UUID3, }), + ([UUID1, UUID2], 2, {UUID1, UUID2}), + ((UUID1, UUID3), 2, {UUID1, UUID3}), + ((UUID1, UUID3, UUID3), 2, {UUID1, UUID3}), + ], +) +async def test_fetch_objects_by_ids( + async_collection_factory: AsyncCollectionFactory, + ids: Iterable[UUID], + expected_len: int, + expected: set, +) -> None: + collection = await async_collection_factory( + properties=[ + Property(name="name", data_type=DataType.TEXT), + ], + vectorizer_config=wvc.config.Configure.Vectorizer.none(), + ) + await collection.data.insert_many( + [ + DataObject(properties={"name": "first"}, uuid=UUID1), + DataObject(properties={"name": "second"}, uuid=UUID2), + DataObject(properties={"name": "third"}, uuid=UUID3), + ] + ) + + res = await collection.query.fetch_objects_by_ids(ids) + assert len(res.objects) == expected_len + assert {o.uuid for o in res.objects} == expected + + @pytest.mark.asyncio async def test_config_update(async_collection_factory: AsyncCollectionFactory) -> None: collection = await async_collection_factory( @@ -200,3 +242,40 @@ async def test_generate(async_openai_collection: AsyncOpenAICollectionFactory) - assert len(res.objects) == 2 for obj in res.objects: assert obj.generated is not None + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "ids, expected_len, expected", + [ + ([], 0, set()), + ((), 0, set()), + ([UUID3, ], 1, {UUID3, }), + ([UUID1, UUID2], 2, {UUID1, UUID2}), + ((UUID1, UUID3), 2, {UUID1, UUID3}), + ((UUID1, UUID3, UUID3), 2, {UUID1, UUID3}), + ], +) +async def test_generate_by_ids( + async_collection_factory: AsyncCollectionFactory, + ids: Iterable[UUID], + expected_len: int, + expected: set, +) -> None: + collection = await async_collection_factory( + properties=[ + Property(name="name", data_type=DataType.TEXT), + ], + vectorizer_config=wvc.config.Configure.Vectorizer.none(), + ) + await collection.data.insert_many( + [ + DataObject(properties={"name": "first"}, uuid=UUID1), + DataObject(properties={"name": "second"}, uuid=UUID2), + DataObject(properties={"name": "third"}, uuid=UUID3), + ] + ) + res = await collection.generate.fetch_objects_by_ids(ids) + assert res is not None + assert len(res.objects) == expected_len + assert {o.uuid for o in res.objects} == expected diff --git a/integration/test_collection_filter.py b/integration/test_collection_filter.py index a483e6cfd..814838537 100644 --- a/integration/test_collection_filter.py +++ b/integration/test_collection_filter.py @@ -1,7 +1,7 @@ import datetime import time import uuid -from typing import Callable, List, Optional +from typing import Callable, Iterable, List, Optional import pytest as pytest @@ -21,6 +21,7 @@ ) from weaviate.collections.classes.grpc import MetadataQuery, QueryReference, Sort from weaviate.collections.classes.internal import ReferenceToMulti +from weaviate.types import UUID NOW = datetime.datetime.now(datetime.timezone.utc) LATER = NOW + datetime.timedelta(hours=1) @@ -548,6 +549,44 @@ def test_filter_id(collection_factory: CollectionFactory, weav_filter: _FilterVa assert objects[0].uuid == UUID1 +@pytest.mark.parametrize( + "ids, expected_len, expected", + [ + ([], 0, set()), + ((), 0, set()), + ([UUID3, ], 1, {UUID3, }), + ([UUID1, UUID2], 2, {UUID1, UUID2}), + ((UUID1, UUID3), 2, {UUID1, UUID3}), + ((UUID1, UUID3, UUID3), 2, {UUID1, UUID3}), + ], +) +def test_filter_ids( + collection_factory: CollectionFactory, + ids: Iterable[UUID], + expected_len: int, + expected: set, +) -> None: + collection = collection_factory( + properties=[ + Property(name="Name", data_type=DataType.TEXT), + ], + vectorizer_config=Configure.Vectorizer.none(), + ) + + collection.data.insert_many( + [ + DataObject(properties={"name": "first"}, uuid=UUID1), + DataObject(properties={"name": "second"}, uuid=UUID2), + DataObject(properties={"name": "third"}, uuid=UUID3), + ] + ) + + objects = collection.query.fetch_objects_by_ids(ids).objects + + assert len(objects) == expected_len + assert {o.uuid for o in objects} == expected + + @pytest.mark.parametrize("path", ["_creationTimeUnix", "_lastUpdateTimeUnix"]) def test_filter_timestamp_direct_path(collection_factory: CollectionFactory, path: str) -> None: collection = collection_factory( diff --git a/weaviate/collections/generate.py b/weaviate/collections/generate.py index b818e86c0..e8122c19c 100644 --- a/weaviate/collections/generate.py +++ b/weaviate/collections/generate.py @@ -7,6 +7,10 @@ _FetchObjectsGenerateAsync, _FetchObjectsGenerate, ) +from weaviate.collections.queries.fetch_objects_by_ids import ( + _FetchObjectsByIDsGenerateAsync, + _FetchObjectsByIDsGenerate, +) from weaviate.collections.queries.hybrid import _HybridGenerateAsync, _HybridGenerate from weaviate.collections.queries.near_image import _NearImageGenerateAsync, _NearImageGenerate from weaviate.collections.queries.near_media import _NearMediaGenerateAsync, _NearMediaGenerate @@ -19,6 +23,7 @@ class _GenerateCollectionAsync( Generic[TProperties, References], _BM25GenerateAsync[TProperties, References], _FetchObjectsGenerateAsync[TProperties, References], + _FetchObjectsByIDsGenerateAsync[TProperties, References], _HybridGenerateAsync[TProperties, References], _NearImageGenerateAsync[TProperties, References], _NearMediaGenerateAsync[TProperties, References], @@ -33,6 +38,7 @@ class _GenerateCollection( Generic[TProperties, References], _BM25Generate[TProperties, References], _FetchObjectsGenerate[TProperties, References], + _FetchObjectsByIDsGenerate[TProperties, References], _HybridGenerate[TProperties, References], _NearImageGenerate[TProperties, References], _NearMediaGenerate[TProperties, References], diff --git a/weaviate/collections/queries/fetch_objects_by_ids/__init__.py b/weaviate/collections/queries/fetch_objects_by_ids/__init__.py new file mode 100644 index 000000000..4cf61115f --- /dev/null +++ b/weaviate/collections/queries/fetch_objects_by_ids/__init__.py @@ -0,0 +1,9 @@ +from .generate import _FetchObjectsByIDsGenerateAsync, _FetchObjectsByIDsGenerate +from .query import _FetchObjectsByIDsQueryAsync, _FetchObjectsByIDsQuery + +__all__ = [ + "_FetchObjectsByIDsGenerate", + "_FetchObjectsByIDsGenerateAsync", + "_FetchObjectsByIDsQuery", + "_FetchObjectsByIDsQueryAsync", +] diff --git a/weaviate/collections/queries/fetch_objects_by_ids/generate.py b/weaviate/collections/queries/fetch_objects_by_ids/generate.py new file mode 100644 index 000000000..658320ca6 --- /dev/null +++ b/weaviate/collections/queries/fetch_objects_by_ids/generate.py @@ -0,0 +1,73 @@ +from typing import Generic, Iterable, List, Optional + +from weaviate import syncify +from weaviate.collections.classes.filters import Filter +from weaviate.collections.classes.grpc import METADATA, Sorting +from weaviate.collections.classes.internal import ( + GenerativeReturnType, + _Generative, + ReturnProperties, + ReturnReferences, + _QueryOptions, +) +from weaviate.collections.classes.types import Properties, TProperties, References, TReferences +from weaviate.collections.queries.base import _Base +from weaviate.proto.v1 import search_get_pb2 +from weaviate.types import UUID, INCLUDE_VECTOR + + +class _FetchObjectsByIDsGenerateAsync(Generic[Properties, References], _Base[Properties, References]): + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[ReturnProperties[TProperties]] = None, + return_references: Optional[ReturnReferences[TReferences]] = None + ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: + """Special case of fetch_objects based on filters on uuid""" + if not ids: + res = search_get_pb2.SearchReply(results=None) + else: + res = await self._query.get( + limit=limit, + offset=offset, + after=after, + filters=Filter.any_of([Filter.by_id().equal(uuid) for uuid in ids]), + sort=sort, + return_metadata=self._parse_return_metadata(return_metadata, include_vector), + return_properties=self._parse_return_properties(return_properties), + return_references=self._parse_return_references(return_references), + generative=_Generative( + single=single_prompt, + grouped=grouped_task, + grouped_properties=grouped_properties, + ), + ) + return self._result_to_generative_query_return( + res, + _QueryOptions.from_input( + return_metadata, + return_properties, + include_vector, + self._references, + return_references, + ), + return_properties, + return_references, + ) + + +@syncify.convert +class _FetchObjectsByIDsGenerate( + Generic[Properties, References], _FetchObjectsByIDsGenerateAsync[Properties, References] +): + pass diff --git a/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi b/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi new file mode 100644 index 000000000..a15d86fdb --- /dev/null +++ b/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi @@ -0,0 +1,260 @@ +from typing import Generic, Iterable, List, Literal, Optional, Type, overload + +from weaviate.collections.classes.grpc import ( + METADATA, + PROPERTIES, + REFERENCES, + Sorting, +) +from weaviate.collections.classes.internal import ( + GenerativeReturn, + CrossReferences, + ReturnProperties, + ReturnReferences, + GenerativeReturnType, +) +from weaviate.collections.classes.types import Properties, TProperties, References, TReferences +from weaviate.collections.queries.base import _Base +from weaviate.types import UUID, INCLUDE_VECTOR + +class _FetchObjectsByIDsGenerateAsync(Generic[Properties, References], _Base[Properties, References]): + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Literal[None] = None + ) -> GenerativeReturn[Properties, References]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: REFERENCES + ) -> GenerativeReturn[Properties, CrossReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Type[TReferences] + ) -> GenerativeReturn[Properties, TReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Literal[None] = None + ) -> GenerativeReturn[TProperties, References]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: REFERENCES + ) -> GenerativeReturn[TProperties, CrossReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Type[TReferences] + ) -> GenerativeReturn[TProperties, TReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[ReturnProperties[TProperties]] = None, + return_references: Optional[ReturnReferences[TReferences]] = None + ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: ... + +class _FetchObjectsGenerate(Generic[Properties, References], _Base[Properties, References]): + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Literal[None] = None + ) -> GenerativeReturn[Properties, References]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: REFERENCES + ) -> GenerativeReturn[Properties, CrossReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Type[TReferences] + ) -> GenerativeReturn[Properties, TReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Literal[None] = None + ) -> GenerativeReturn[TProperties, References]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: REFERENCES + ) -> GenerativeReturn[TProperties, CrossReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Type[TReferences] + ) -> GenerativeReturn[TProperties, TReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[ReturnProperties[TProperties]] = None, + return_references: Optional[ReturnReferences[TReferences]] = None + ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: ... diff --git a/weaviate/collections/queries/fetch_objects_by_ids/query.py b/weaviate/collections/queries/fetch_objects_by_ids/query.py new file mode 100644 index 000000000..bc791f17c --- /dev/null +++ b/weaviate/collections/queries/fetch_objects_by_ids/query.py @@ -0,0 +1,64 @@ +from typing import Generic, Iterable, Optional + +from weaviate import syncify +from weaviate.collections.classes.filters import Filter +from weaviate.collections.classes.grpc import METADATA, Sorting +from weaviate.collections.classes.internal import ( + QueryReturnType, + ReturnProperties, + ReturnReferences, + _QueryOptions, +) +from weaviate.collections.classes.types import Properties, TProperties, References, TReferences +from weaviate.collections.queries.base import _Base +from weaviate.proto.v1 import search_get_pb2 +from weaviate.types import UUID, INCLUDE_VECTOR + + +class _FetchObjectsByIDsQueryAsync(Generic[Properties, References], _Base[Properties, References]): + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[ReturnProperties[TProperties]] = None, + return_references: Optional[ReturnReferences[TReferences]] = None + ) -> QueryReturnType[Properties, References, TProperties, TReferences]: + """Special case of fetch_objects based on filters on uuid""" + if not ids: + res = search_get_pb2.SearchReply(results=None) + else: + res = await self._query.get( + limit=limit, + offset=offset, + after=after, + filters=Filter.any_of([Filter.by_id().equal(uuid) for uuid in ids]), + sort=sort, + return_metadata=self._parse_return_metadata(return_metadata, include_vector), + return_properties=self._parse_return_properties(return_properties), + return_references=self._parse_return_references(return_references), + ) + return self._result_to_query_return( + res, + _QueryOptions.from_input( + return_metadata, + return_properties, + include_vector, + self._references, + return_references, + ), + return_properties, + return_references, + ) + + +@syncify.convert +class _FetchObjectsByIDsQuery( + Generic[Properties, References], _FetchObjectsByIDsQueryAsync[Properties, References] +): + pass diff --git a/weaviate/collections/queries/fetch_objects_by_ids/query.pyi b/weaviate/collections/queries/fetch_objects_by_ids/query.pyi new file mode 100644 index 000000000..3c4575f58 --- /dev/null +++ b/weaviate/collections/queries/fetch_objects_by_ids/query.pyi @@ -0,0 +1,213 @@ +from typing import Generic, Iterable, Literal, Optional, Type, overload + +from weaviate.collections.classes.grpc import METADATA, PROPERTIES, REFERENCES, Sorting +from weaviate.collections.classes.internal import ( + QueryReturn, + CrossReferences, + ReturnProperties, + ReturnReferences, + QueryReturnType, +) +from weaviate.collections.classes.types import Properties, TProperties, References, TReferences +from weaviate.collections.queries.base import _Base +from weaviate.types import UUID, INCLUDE_VECTOR + +class _FetchObjectsByIDsQueryAsync(Generic[Properties, References], _Base[Properties, References]): + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Literal[None] = None + ) -> QueryReturn[Properties, References]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: REFERENCES + ) -> QueryReturn[Properties, CrossReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Type[TReferences] + ) -> QueryReturn[Properties, TReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Literal[None] = None + ) -> QueryReturn[TProperties, References]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: REFERENCES + ) -> QueryReturn[TProperties, CrossReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Type[TReferences] + ) -> QueryReturn[TProperties, TReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[ReturnProperties[TProperties]] = None, + return_references: Optional[ReturnReferences[TReferences]] = None + ) -> QueryReturnType[Properties, References, TProperties, TReferences]: ... + +class _FetchObjectsByIDsQuery(Generic[Properties, References], _Base[Properties, References]): + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Literal[None] = None + ) -> QueryReturn[Properties, References]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: REFERENCES + ) -> QueryReturn[Properties, CrossReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Type[TReferences] + ) -> QueryReturn[Properties, TReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Literal[None] = None + ) -> QueryReturn[TProperties, References]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: REFERENCES + ) -> QueryReturn[TProperties, CrossReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Type[TReferences] + ) -> QueryReturn[TProperties, TReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[ReturnProperties[TProperties]] = None, + return_references: Optional[ReturnReferences[TReferences]] = None + ) -> QueryReturnType[Properties, References, TProperties, TReferences]: ... diff --git a/weaviate/collections/query.py b/weaviate/collections/query.py index b3a9a3f4f..5e4875e09 100644 --- a/weaviate/collections/query.py +++ b/weaviate/collections/query.py @@ -7,6 +7,10 @@ _FetchObjectByIDQueryAsync, _FetchObjectByIDQuery, ) +from weaviate.collections.queries.fetch_objects_by_ids import ( + _FetchObjectsByIDsQueryAsync, + _FetchObjectsByIDsQuery, +) from weaviate.collections.queries.fetch_objects import _FetchObjectsQueryAsync, _FetchObjectsQuery from weaviate.collections.queries.hybrid import _HybridQueryAsync, _HybridQuery from weaviate.collections.queries.near_image import _NearImageQueryAsync, _NearImageQuery @@ -20,6 +24,7 @@ class _QueryCollectionAsync( Generic[TProperties, References], _BM25QueryAsync[TProperties, References], _FetchObjectByIDQueryAsync[TProperties, References], + _FetchObjectsByIDsQueryAsync[TProperties, References], _FetchObjectsQueryAsync[TProperties, References], _HybridQueryAsync[TProperties, References], _NearImageQueryAsync[TProperties, References], @@ -35,6 +40,7 @@ class _QueryCollection( Generic[TProperties, References], _BM25Query[TProperties, References], _FetchObjectByIDQuery[TProperties, References], + _FetchObjectsByIDsQuery[TProperties, References], _FetchObjectsQuery[TProperties, References], _HybridQuery[TProperties, References], _NearImageQuery[TProperties, References], From ea851be47cf5b6878ebab6be351f56635b5946b8 Mon Sep 17 00:00:00 2001 From: Tibor Reiss Date: Wed, 14 Aug 2024 20:52:21 +0200 Subject: [PATCH 2/7] Remove generate.fetch_objects_by_ids --- integration/test_collection_async.py | 37 --- weaviate/collections/generate.py | 6 - .../queries/fetch_objects_by_ids/__init__.py | 3 - .../queries/fetch_objects_by_ids/generate.py | 73 ----- .../queries/fetch_objects_by_ids/generate.pyi | 260 ------------------ 5 files changed, 379 deletions(-) delete mode 100644 weaviate/collections/queries/fetch_objects_by_ids/generate.py delete mode 100644 weaviate/collections/queries/fetch_objects_by_ids/generate.pyi diff --git a/integration/test_collection_async.py b/integration/test_collection_async.py index 711208ad7..fcc3a3bf3 100644 --- a/integration/test_collection_async.py +++ b/integration/test_collection_async.py @@ -242,40 +242,3 @@ async def test_generate(async_openai_collection: AsyncOpenAICollectionFactory) - assert len(res.objects) == 2 for obj in res.objects: assert obj.generated is not None - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "ids, expected_len, expected", - [ - ([], 0, set()), - ((), 0, set()), - ([UUID3, ], 1, {UUID3, }), - ([UUID1, UUID2], 2, {UUID1, UUID2}), - ((UUID1, UUID3), 2, {UUID1, UUID3}), - ((UUID1, UUID3, UUID3), 2, {UUID1, UUID3}), - ], -) -async def test_generate_by_ids( - async_collection_factory: AsyncCollectionFactory, - ids: Iterable[UUID], - expected_len: int, - expected: set, -) -> None: - collection = await async_collection_factory( - properties=[ - Property(name="name", data_type=DataType.TEXT), - ], - vectorizer_config=wvc.config.Configure.Vectorizer.none(), - ) - await collection.data.insert_many( - [ - DataObject(properties={"name": "first"}, uuid=UUID1), - DataObject(properties={"name": "second"}, uuid=UUID2), - DataObject(properties={"name": "third"}, uuid=UUID3), - ] - ) - res = await collection.generate.fetch_objects_by_ids(ids) - assert res is not None - assert len(res.objects) == expected_len - assert {o.uuid for o in res.objects} == expected diff --git a/weaviate/collections/generate.py b/weaviate/collections/generate.py index e8122c19c..b818e86c0 100644 --- a/weaviate/collections/generate.py +++ b/weaviate/collections/generate.py @@ -7,10 +7,6 @@ _FetchObjectsGenerateAsync, _FetchObjectsGenerate, ) -from weaviate.collections.queries.fetch_objects_by_ids import ( - _FetchObjectsByIDsGenerateAsync, - _FetchObjectsByIDsGenerate, -) from weaviate.collections.queries.hybrid import _HybridGenerateAsync, _HybridGenerate from weaviate.collections.queries.near_image import _NearImageGenerateAsync, _NearImageGenerate from weaviate.collections.queries.near_media import _NearMediaGenerateAsync, _NearMediaGenerate @@ -23,7 +19,6 @@ class _GenerateCollectionAsync( Generic[TProperties, References], _BM25GenerateAsync[TProperties, References], _FetchObjectsGenerateAsync[TProperties, References], - _FetchObjectsByIDsGenerateAsync[TProperties, References], _HybridGenerateAsync[TProperties, References], _NearImageGenerateAsync[TProperties, References], _NearMediaGenerateAsync[TProperties, References], @@ -38,7 +33,6 @@ class _GenerateCollection( Generic[TProperties, References], _BM25Generate[TProperties, References], _FetchObjectsGenerate[TProperties, References], - _FetchObjectsByIDsGenerate[TProperties, References], _HybridGenerate[TProperties, References], _NearImageGenerate[TProperties, References], _NearMediaGenerate[TProperties, References], diff --git a/weaviate/collections/queries/fetch_objects_by_ids/__init__.py b/weaviate/collections/queries/fetch_objects_by_ids/__init__.py index 4cf61115f..8724f0aa8 100644 --- a/weaviate/collections/queries/fetch_objects_by_ids/__init__.py +++ b/weaviate/collections/queries/fetch_objects_by_ids/__init__.py @@ -1,9 +1,6 @@ -from .generate import _FetchObjectsByIDsGenerateAsync, _FetchObjectsByIDsGenerate from .query import _FetchObjectsByIDsQueryAsync, _FetchObjectsByIDsQuery __all__ = [ - "_FetchObjectsByIDsGenerate", - "_FetchObjectsByIDsGenerateAsync", "_FetchObjectsByIDsQuery", "_FetchObjectsByIDsQueryAsync", ] diff --git a/weaviate/collections/queries/fetch_objects_by_ids/generate.py b/weaviate/collections/queries/fetch_objects_by_ids/generate.py deleted file mode 100644 index 658320ca6..000000000 --- a/weaviate/collections/queries/fetch_objects_by_ids/generate.py +++ /dev/null @@ -1,73 +0,0 @@ -from typing import Generic, Iterable, List, Optional - -from weaviate import syncify -from weaviate.collections.classes.filters import Filter -from weaviate.collections.classes.grpc import METADATA, Sorting -from weaviate.collections.classes.internal import ( - GenerativeReturnType, - _Generative, - ReturnProperties, - ReturnReferences, - _QueryOptions, -) -from weaviate.collections.classes.types import Properties, TProperties, References, TReferences -from weaviate.collections.queries.base import _Base -from weaviate.proto.v1 import search_get_pb2 -from weaviate.types import UUID, INCLUDE_VECTOR - - -class _FetchObjectsByIDsGenerateAsync(Generic[Properties, References], _Base[Properties, References]): - async def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Optional[ReturnProperties[TProperties]] = None, - return_references: Optional[ReturnReferences[TReferences]] = None - ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: - """Special case of fetch_objects based on filters on uuid""" - if not ids: - res = search_get_pb2.SearchReply(results=None) - else: - res = await self._query.get( - limit=limit, - offset=offset, - after=after, - filters=Filter.any_of([Filter.by_id().equal(uuid) for uuid in ids]), - sort=sort, - return_metadata=self._parse_return_metadata(return_metadata, include_vector), - return_properties=self._parse_return_properties(return_properties), - return_references=self._parse_return_references(return_references), - generative=_Generative( - single=single_prompt, - grouped=grouped_task, - grouped_properties=grouped_properties, - ), - ) - return self._result_to_generative_query_return( - res, - _QueryOptions.from_input( - return_metadata, - return_properties, - include_vector, - self._references, - return_references, - ), - return_properties, - return_references, - ) - - -@syncify.convert -class _FetchObjectsByIDsGenerate( - Generic[Properties, References], _FetchObjectsByIDsGenerateAsync[Properties, References] -): - pass diff --git a/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi b/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi deleted file mode 100644 index a15d86fdb..000000000 --- a/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi +++ /dev/null @@ -1,260 +0,0 @@ -from typing import Generic, Iterable, List, Literal, Optional, Type, overload - -from weaviate.collections.classes.grpc import ( - METADATA, - PROPERTIES, - REFERENCES, - Sorting, -) -from weaviate.collections.classes.internal import ( - GenerativeReturn, - CrossReferences, - ReturnProperties, - ReturnReferences, - GenerativeReturnType, -) -from weaviate.collections.classes.types import Properties, TProperties, References, TReferences -from weaviate.collections.queries.base import _Base -from weaviate.types import UUID, INCLUDE_VECTOR - -class _FetchObjectsByIDsGenerateAsync(Generic[Properties, References], _Base[Properties, References]): - @overload - async def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Optional[PROPERTIES] = None, - return_references: Literal[None] = None - ) -> GenerativeReturn[Properties, References]: ... - @overload - async def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Optional[PROPERTIES] = None, - return_references: REFERENCES - ) -> GenerativeReturn[Properties, CrossReferences]: ... - @overload - async def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Optional[PROPERTIES] = None, - return_references: Type[TReferences] - ) -> GenerativeReturn[Properties, TReferences]: ... - @overload - async def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Type[TProperties], - return_references: Literal[None] = None - ) -> GenerativeReturn[TProperties, References]: ... - @overload - async def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Type[TProperties], - return_references: REFERENCES - ) -> GenerativeReturn[TProperties, CrossReferences]: ... - @overload - async def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Type[TProperties], - return_references: Type[TReferences] - ) -> GenerativeReturn[TProperties, TReferences]: ... - @overload - async def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Optional[ReturnProperties[TProperties]] = None, - return_references: Optional[ReturnReferences[TReferences]] = None - ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: ... - -class _FetchObjectsGenerate(Generic[Properties, References], _Base[Properties, References]): - @overload - def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Optional[PROPERTIES] = None, - return_references: Literal[None] = None - ) -> GenerativeReturn[Properties, References]: ... - @overload - def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Optional[PROPERTIES] = None, - return_references: REFERENCES - ) -> GenerativeReturn[Properties, CrossReferences]: ... - @overload - def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Optional[PROPERTIES] = None, - return_references: Type[TReferences] - ) -> GenerativeReturn[Properties, TReferences]: ... - @overload - def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Type[TProperties], - return_references: Literal[None] = None - ) -> GenerativeReturn[TProperties, References]: ... - @overload - def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Type[TProperties], - return_references: REFERENCES - ) -> GenerativeReturn[TProperties, CrossReferences]: ... - @overload - def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Type[TProperties], - return_references: Type[TReferences] - ) -> GenerativeReturn[TProperties, TReferences]: ... - @overload - def fetch_objects_by_ids( - self, - ids: Iterable[UUID], - *, - single_prompt: Optional[str] = None, - grouped_task: Optional[str] = None, - grouped_properties: Optional[List[str]] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - after: Optional[UUID] = None, - sort: Optional[Sorting] = None, - include_vector: INCLUDE_VECTOR = False, - return_metadata: Optional[METADATA] = None, - return_properties: Optional[ReturnProperties[TProperties]] = None, - return_references: Optional[ReturnReferences[TReferences]] = None - ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: ... From 368adf09806ad4fcf845e6b70405a9080b8e59ff Mon Sep 17 00:00:00 2001 From: Tibor Reiss Date: Thu, 15 Aug 2024 13:39:19 +0200 Subject: [PATCH 3/7] Linting --- integration/test_collection_async.py | 10 +++++++++- integration/test_collection_filter.py | 10 +++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/integration/test_collection_async.py b/integration/test_collection_async.py index fcc3a3bf3..7333e0afa 100644 --- a/integration/test_collection_async.py +++ b/integration/test_collection_async.py @@ -43,7 +43,15 @@ async def test_fetch_objects(async_collection_factory: AsyncCollectionFactory) - [ ([], 0, set()), ((), 0, set()), - ([UUID3, ], 1, {UUID3, }), + ( + [ + UUID3, + ], + 1, + { + UUID3, + }, + ), ([UUID1, UUID2], 2, {UUID1, UUID2}), ((UUID1, UUID3), 2, {UUID1, UUID3}), ((UUID1, UUID3, UUID3), 2, {UUID1, UUID3}), diff --git a/integration/test_collection_filter.py b/integration/test_collection_filter.py index 814838537..01fc7fe9f 100644 --- a/integration/test_collection_filter.py +++ b/integration/test_collection_filter.py @@ -554,7 +554,15 @@ def test_filter_id(collection_factory: CollectionFactory, weav_filter: _FilterVa [ ([], 0, set()), ((), 0, set()), - ([UUID3, ], 1, {UUID3, }), + ( + [ + UUID3, + ], + 1, + { + UUID3, + }, + ), ([UUID1, UUID2], 2, {UUID1, UUID2}), ((UUID1, UUID3), 2, {UUID1, UUID3}), ((UUID1, UUID3, UUID3), 2, {UUID1, UUID3}), From 0fe59a471eef75e0432ff31904231870c22d21df Mon Sep 17 00:00:00 2001 From: Tibor Reiss Date: Thu, 15 Aug 2024 13:41:34 +0200 Subject: [PATCH 4/7] Revert "Remove generate.fetch_objects_by_ids" This reverts commit ea851be47cf5b6878ebab6be351f56635b5946b8. --- integration/test_collection_async.py | 37 +++ weaviate/collections/generate.py | 6 + .../queries/fetch_objects_by_ids/__init__.py | 3 + .../queries/fetch_objects_by_ids/generate.py | 73 +++++ .../queries/fetch_objects_by_ids/generate.pyi | 260 ++++++++++++++++++ 5 files changed, 379 insertions(+) create mode 100644 weaviate/collections/queries/fetch_objects_by_ids/generate.py create mode 100644 weaviate/collections/queries/fetch_objects_by_ids/generate.pyi diff --git a/integration/test_collection_async.py b/integration/test_collection_async.py index 7333e0afa..ac73dff46 100644 --- a/integration/test_collection_async.py +++ b/integration/test_collection_async.py @@ -250,3 +250,40 @@ async def test_generate(async_openai_collection: AsyncOpenAICollectionFactory) - assert len(res.objects) == 2 for obj in res.objects: assert obj.generated is not None + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "ids, expected_len, expected", + [ + ([], 0, set()), + ((), 0, set()), + ([UUID3, ], 1, {UUID3, }), + ([UUID1, UUID2], 2, {UUID1, UUID2}), + ((UUID1, UUID3), 2, {UUID1, UUID3}), + ((UUID1, UUID3, UUID3), 2, {UUID1, UUID3}), + ], +) +async def test_generate_by_ids( + async_collection_factory: AsyncCollectionFactory, + ids: Iterable[UUID], + expected_len: int, + expected: set, +) -> None: + collection = await async_collection_factory( + properties=[ + Property(name="name", data_type=DataType.TEXT), + ], + vectorizer_config=wvc.config.Configure.Vectorizer.none(), + ) + await collection.data.insert_many( + [ + DataObject(properties={"name": "first"}, uuid=UUID1), + DataObject(properties={"name": "second"}, uuid=UUID2), + DataObject(properties={"name": "third"}, uuid=UUID3), + ] + ) + res = await collection.generate.fetch_objects_by_ids(ids) + assert res is not None + assert len(res.objects) == expected_len + assert {o.uuid for o in res.objects} == expected diff --git a/weaviate/collections/generate.py b/weaviate/collections/generate.py index b818e86c0..e8122c19c 100644 --- a/weaviate/collections/generate.py +++ b/weaviate/collections/generate.py @@ -7,6 +7,10 @@ _FetchObjectsGenerateAsync, _FetchObjectsGenerate, ) +from weaviate.collections.queries.fetch_objects_by_ids import ( + _FetchObjectsByIDsGenerateAsync, + _FetchObjectsByIDsGenerate, +) from weaviate.collections.queries.hybrid import _HybridGenerateAsync, _HybridGenerate from weaviate.collections.queries.near_image import _NearImageGenerateAsync, _NearImageGenerate from weaviate.collections.queries.near_media import _NearMediaGenerateAsync, _NearMediaGenerate @@ -19,6 +23,7 @@ class _GenerateCollectionAsync( Generic[TProperties, References], _BM25GenerateAsync[TProperties, References], _FetchObjectsGenerateAsync[TProperties, References], + _FetchObjectsByIDsGenerateAsync[TProperties, References], _HybridGenerateAsync[TProperties, References], _NearImageGenerateAsync[TProperties, References], _NearMediaGenerateAsync[TProperties, References], @@ -33,6 +38,7 @@ class _GenerateCollection( Generic[TProperties, References], _BM25Generate[TProperties, References], _FetchObjectsGenerate[TProperties, References], + _FetchObjectsByIDsGenerate[TProperties, References], _HybridGenerate[TProperties, References], _NearImageGenerate[TProperties, References], _NearMediaGenerate[TProperties, References], diff --git a/weaviate/collections/queries/fetch_objects_by_ids/__init__.py b/weaviate/collections/queries/fetch_objects_by_ids/__init__.py index 8724f0aa8..4cf61115f 100644 --- a/weaviate/collections/queries/fetch_objects_by_ids/__init__.py +++ b/weaviate/collections/queries/fetch_objects_by_ids/__init__.py @@ -1,6 +1,9 @@ +from .generate import _FetchObjectsByIDsGenerateAsync, _FetchObjectsByIDsGenerate from .query import _FetchObjectsByIDsQueryAsync, _FetchObjectsByIDsQuery __all__ = [ + "_FetchObjectsByIDsGenerate", + "_FetchObjectsByIDsGenerateAsync", "_FetchObjectsByIDsQuery", "_FetchObjectsByIDsQueryAsync", ] diff --git a/weaviate/collections/queries/fetch_objects_by_ids/generate.py b/weaviate/collections/queries/fetch_objects_by_ids/generate.py new file mode 100644 index 000000000..658320ca6 --- /dev/null +++ b/weaviate/collections/queries/fetch_objects_by_ids/generate.py @@ -0,0 +1,73 @@ +from typing import Generic, Iterable, List, Optional + +from weaviate import syncify +from weaviate.collections.classes.filters import Filter +from weaviate.collections.classes.grpc import METADATA, Sorting +from weaviate.collections.classes.internal import ( + GenerativeReturnType, + _Generative, + ReturnProperties, + ReturnReferences, + _QueryOptions, +) +from weaviate.collections.classes.types import Properties, TProperties, References, TReferences +from weaviate.collections.queries.base import _Base +from weaviate.proto.v1 import search_get_pb2 +from weaviate.types import UUID, INCLUDE_VECTOR + + +class _FetchObjectsByIDsGenerateAsync(Generic[Properties, References], _Base[Properties, References]): + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[ReturnProperties[TProperties]] = None, + return_references: Optional[ReturnReferences[TReferences]] = None + ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: + """Special case of fetch_objects based on filters on uuid""" + if not ids: + res = search_get_pb2.SearchReply(results=None) + else: + res = await self._query.get( + limit=limit, + offset=offset, + after=after, + filters=Filter.any_of([Filter.by_id().equal(uuid) for uuid in ids]), + sort=sort, + return_metadata=self._parse_return_metadata(return_metadata, include_vector), + return_properties=self._parse_return_properties(return_properties), + return_references=self._parse_return_references(return_references), + generative=_Generative( + single=single_prompt, + grouped=grouped_task, + grouped_properties=grouped_properties, + ), + ) + return self._result_to_generative_query_return( + res, + _QueryOptions.from_input( + return_metadata, + return_properties, + include_vector, + self._references, + return_references, + ), + return_properties, + return_references, + ) + + +@syncify.convert +class _FetchObjectsByIDsGenerate( + Generic[Properties, References], _FetchObjectsByIDsGenerateAsync[Properties, References] +): + pass diff --git a/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi b/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi new file mode 100644 index 000000000..a15d86fdb --- /dev/null +++ b/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi @@ -0,0 +1,260 @@ +from typing import Generic, Iterable, List, Literal, Optional, Type, overload + +from weaviate.collections.classes.grpc import ( + METADATA, + PROPERTIES, + REFERENCES, + Sorting, +) +from weaviate.collections.classes.internal import ( + GenerativeReturn, + CrossReferences, + ReturnProperties, + ReturnReferences, + GenerativeReturnType, +) +from weaviate.collections.classes.types import Properties, TProperties, References, TReferences +from weaviate.collections.queries.base import _Base +from weaviate.types import UUID, INCLUDE_VECTOR + +class _FetchObjectsByIDsGenerateAsync(Generic[Properties, References], _Base[Properties, References]): + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Literal[None] = None + ) -> GenerativeReturn[Properties, References]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: REFERENCES + ) -> GenerativeReturn[Properties, CrossReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Type[TReferences] + ) -> GenerativeReturn[Properties, TReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Literal[None] = None + ) -> GenerativeReturn[TProperties, References]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: REFERENCES + ) -> GenerativeReturn[TProperties, CrossReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Type[TReferences] + ) -> GenerativeReturn[TProperties, TReferences]: ... + @overload + async def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[ReturnProperties[TProperties]] = None, + return_references: Optional[ReturnReferences[TReferences]] = None + ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: ... + +class _FetchObjectsGenerate(Generic[Properties, References], _Base[Properties, References]): + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Literal[None] = None + ) -> GenerativeReturn[Properties, References]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: REFERENCES + ) -> GenerativeReturn[Properties, CrossReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[PROPERTIES] = None, + return_references: Type[TReferences] + ) -> GenerativeReturn[Properties, TReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Literal[None] = None + ) -> GenerativeReturn[TProperties, References]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: REFERENCES + ) -> GenerativeReturn[TProperties, CrossReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Type[TProperties], + return_references: Type[TReferences] + ) -> GenerativeReturn[TProperties, TReferences]: ... + @overload + def fetch_objects_by_ids( + self, + ids: Iterable[UUID], + *, + single_prompt: Optional[str] = None, + grouped_task: Optional[str] = None, + grouped_properties: Optional[List[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + after: Optional[UUID] = None, + sort: Optional[Sorting] = None, + include_vector: INCLUDE_VECTOR = False, + return_metadata: Optional[METADATA] = None, + return_properties: Optional[ReturnProperties[TProperties]] = None, + return_references: Optional[ReturnReferences[TReferences]] = None + ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: ... From 75e010f366f305cfe73a5a470f8cf4fa12bcd9d6 Mon Sep 17 00:00:00 2001 From: Tibor Reiss Date: Thu, 15 Aug 2024 13:44:13 +0200 Subject: [PATCH 5/7] Linting --- integration/test_collection_async.py | 10 +++++++++- .../queries/fetch_objects_by_ids/generate.py | 4 +++- .../queries/fetch_objects_by_ids/generate.pyi | 4 +++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/integration/test_collection_async.py b/integration/test_collection_async.py index ac73dff46..6cfe540a6 100644 --- a/integration/test_collection_async.py +++ b/integration/test_collection_async.py @@ -258,7 +258,15 @@ async def test_generate(async_openai_collection: AsyncOpenAICollectionFactory) - [ ([], 0, set()), ((), 0, set()), - ([UUID3, ], 1, {UUID3, }), + ( + [ + UUID3, + ], + 1, + { + UUID3, + }, + ), ([UUID1, UUID2], 2, {UUID1, UUID2}), ((UUID1, UUID3), 2, {UUID1, UUID3}), ((UUID1, UUID3, UUID3), 2, {UUID1, UUID3}), diff --git a/weaviate/collections/queries/fetch_objects_by_ids/generate.py b/weaviate/collections/queries/fetch_objects_by_ids/generate.py index 658320ca6..15ae18ea1 100644 --- a/weaviate/collections/queries/fetch_objects_by_ids/generate.py +++ b/weaviate/collections/queries/fetch_objects_by_ids/generate.py @@ -16,7 +16,9 @@ from weaviate.types import UUID, INCLUDE_VECTOR -class _FetchObjectsByIDsGenerateAsync(Generic[Properties, References], _Base[Properties, References]): +class _FetchObjectsByIDsGenerateAsync( + Generic[Properties, References], _Base[Properties, References] +): async def fetch_objects_by_ids( self, ids: Iterable[UUID], diff --git a/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi b/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi index a15d86fdb..f5b686980 100644 --- a/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi +++ b/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi @@ -17,7 +17,9 @@ from weaviate.collections.classes.types import Properties, TProperties, Referenc from weaviate.collections.queries.base import _Base from weaviate.types import UUID, INCLUDE_VECTOR -class _FetchObjectsByIDsGenerateAsync(Generic[Properties, References], _Base[Properties, References]): +class _FetchObjectsByIDsGenerateAsync( + Generic[Properties, References], _Base[Properties, References] +): @overload async def fetch_objects_by_ids( self, From 90f19816f9520a095cf3b56f2bdb41aac9326732 Mon Sep 17 00:00:00 2001 From: Tibor Reiss Date: Thu, 15 Aug 2024 13:52:04 +0200 Subject: [PATCH 6/7] Adjust generate test to use openai --- integration/test_collection_async.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/integration/test_collection_async.py b/integration/test_collection_async.py index 6cfe540a6..5dcaad634 100644 --- a/integration/test_collection_async.py +++ b/integration/test_collection_async.py @@ -273,25 +273,29 @@ async def test_generate(async_openai_collection: AsyncOpenAICollectionFactory) - ], ) async def test_generate_by_ids( - async_collection_factory: AsyncCollectionFactory, + async_openai_collection: AsyncOpenAICollectionFactory, ids: Iterable[UUID], expected_len: int, expected: set, ) -> None: - collection = await async_collection_factory( - properties=[ - Property(name="name", data_type=DataType.TEXT), - ], + collection = await async_openai_collection( vectorizer_config=wvc.config.Configure.Vectorizer.none(), ) await collection.data.insert_many( [ - DataObject(properties={"name": "first"}, uuid=UUID1), - DataObject(properties={"name": "second"}, uuid=UUID2), - DataObject(properties={"name": "third"}, uuid=UUID3), + DataObject(properties={"text": "John Doe"}, uuid=UUID1), + DataObject(properties={"text": "Jane Doe"}, uuid=UUID2), + DataObject(properties={"text": "J. Doe"}, uuid=UUID3), ] ) - res = await collection.generate.fetch_objects_by_ids(ids) + res = await collection.generate.fetch_objects_by_ids( + ids, + single_prompt="Who is this? {text}", + grouped_task="Who are these people?", + ) assert res is not None + assert res.generated is not None assert len(res.objects) == expected_len assert {o.uuid for o in res.objects} == expected + for obj in res.objects: + assert obj.generated is not None From dcfb8b2312de34f6f5cd7de81611bc3976114b4f Mon Sep 17 00:00:00 2001 From: Tibor Reiss Date: Thu, 15 Aug 2024 14:05:15 +0200 Subject: [PATCH 7/7] Fix typo --- weaviate/collections/queries/fetch_objects_by_ids/generate.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi b/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi index f5b686980..13d01ee8d 100644 --- a/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi +++ b/weaviate/collections/queries/fetch_objects_by_ids/generate.pyi @@ -140,7 +140,7 @@ class _FetchObjectsByIDsGenerateAsync( return_references: Optional[ReturnReferences[TReferences]] = None ) -> GenerativeReturnType[Properties, References, TProperties, TReferences]: ... -class _FetchObjectsGenerate(Generic[Properties, References], _Base[Properties, References]): +class _FetchObjectsByIDsGenerate(Generic[Properties, References], _Base[Properties, References]): @overload def fetch_objects_by_ids( self,