From ac5d45f11572a0c68df44258f2cfe3dd79174d0f Mon Sep 17 00:00:00 2001 From: fern-api <115122769+fern-api[bot]@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:29:33 +0000 Subject: [PATCH] Release 0.2.38 --- poetry.lock | 19 +- pyproject.toml | 3 +- reference.md | 369 +++++++++++--- src/vectara/__init__.py | 16 + src/vectara/chats/client.py | 15 +- src/vectara/core/client_wrapper.py | 2 +- src/vectara/corpora/client.py | 93 ++-- src/vectara/documents/client.py | 32 +- src/vectara/index/__init__.py | 2 + src/vectara/index/client.py | 459 ++++++++++++++++++ src/vectara/types/__init__.py | 14 + src/vectara/types/app_client.py | 4 +- src/vectara/types/cell.py | 51 ++ src/vectara/types/chat_full_response.py | 4 +- src/vectara/types/citation_parameters.py | 9 +- src/vectara/types/context_configuration.py | 16 +- src/vectara/types/core_document.py | 6 + src/vectara/types/core_document_part.py | 5 + src/vectara/types/corpus.py | 2 +- .../types/customer_specific_reranker.py | 8 +- src/vectara/types/data.py | 33 ++ src/vectara/types/document.py | 6 + src/vectara/types/generation_info.py | 2 +- src/vectara/types/generation_parameters.py | 11 +- src/vectara/types/header.py | 6 + src/vectara/types/individual_search_result.py | 8 +- src/vectara/types/mmr_reranker.py | 8 +- src/vectara/types/none_reranker.py | 7 +- src/vectara/types/row.py | 6 + .../types/structured_document_section.py | 10 +- src/vectara/types/table.py | 42 ++ src/vectara/types/table_extraction_config.py | 27 ++ src/vectara/types/update_document_request.py | 29 ++ src/vectara/types/user_function_reranker.py | 8 +- src/vectara/upload/client.py | 49 +- 35 files changed, 1178 insertions(+), 203 deletions(-) create mode 100644 src/vectara/index/__init__.py create mode 100644 src/vectara/index/client.py create mode 100644 src/vectara/types/cell.py create mode 100644 src/vectara/types/data.py create mode 100644 src/vectara/types/header.py create mode 100644 src/vectara/types/row.py create mode 100644 src/vectara/types/table.py create mode 100644 src/vectara/types/table_extraction_config.py create mode 100644 src/vectara/types/update_document_request.py diff --git a/poetry.lock b/poetry.lock index 514f6a4..15fa9e2 100644 --- a/poetry.lock +++ b/poetry.lock @@ -38,13 +38,13 @@ trio = ["trio (>=0.26.1)"] [[package]] name = "certifi" -version = "2024.8.30" +version = "2024.12.14" description = "Python package for providing Mozilla's CA Bundle." optional = false python-versions = ">=3.6" files = [ - {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, - {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, + {file = "certifi-2024.12.14-py3-none-any.whl", hash = "sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56"}, + {file = "certifi-2024.12.14.tar.gz", hash = "sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db"}, ] [[package]] @@ -128,6 +128,17 @@ http2 = ["h2 (>=3,<5)"] socks = ["socksio (==1.*)"] zstd = ["zstandard (>=0.18.0)"] +[[package]] +name = "httpx-sse" +version = "0.4.0" +description = "Consume Server-Sent Event (SSE) messages with HTTPX." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-sse-0.4.0.tar.gz", hash = "sha256:1e81a3a3070ce322add1d3529ed42eb5f70817f45ed6ec915ab753f961139721"}, + {file = "httpx_sse-0.4.0-py3-none-any.whl", hash = "sha256:f329af6eae57eaa2bdfd962b42524764af68075ea87370a2de920af5341e318f"}, +] + [[package]] name = "idna" version = "3.10" @@ -610,4 +621,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "5b7b4ffb565a0c697e7bb8a94d3facab68a75f50bf298682266a2f61c1f3d46c" +content-hash = "80481ae3d8e01d3969da1a60c74bff2c2d94f9d4cc5947cf2f5aa67bbe1aaeb2" diff --git a/pyproject.toml b/pyproject.toml index 96bd968..5ca45e9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "vectara" -version = "0.2.37" +version = "0.2.38" description = "" readme = "README.md" authors = [] @@ -34,6 +34,7 @@ Repository = 'https://github.com/vectara/python-sdk' python = "^3.8" PyYAML = "6.0.2" httpx = ">=0.21.2" +httpx-sse = "0.4.0" pydantic = ">= 1.9.2" pydantic-core = "^2.18.2" typing_extensions = ">= 4.0.0" diff --git a/reference.md b/reference.md index 35b7068..932445d 100644 --- a/reference.md +++ b/reference.md @@ -13,13 +13,13 @@ Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG). -- Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). -- Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) -- Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response - will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization) -- Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) -- Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options) -- Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary) +* Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). +* Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) +* Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response +will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization) +* Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) +* Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options) +* Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary) For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). @@ -166,13 +166,13 @@ for chunk in response: Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG). -- Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). -- Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) -- Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response - will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization) -- Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) -- Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options) -- Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary) +* Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). +* Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) +* Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response +will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization) +* Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) +* Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options) +* Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary) For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). @@ -642,10 +642,10 @@ for page in response.iter_pages():
-Create a corpus, which is a container to store documents and associated metadata. Here, you -define the unique `corpus_key` that identifies the corpus. The `corpus_key` can be custom-defined -following your preferred naming convention, allowing you to easily manage the corpus's data and -reference it in queries. For more information, see +Create a corpus, which is a container to store documents and associated metadata. Here, you +define the unique `corpus_key` that identifies the corpus. The `corpus_key` can be custom-defined +following your preferred naming convention, allowing you to easily manage the corpus's data and +reference it in queries. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
@@ -809,10 +809,10 @@ This feature is only enabled for Pro and Enterprise customers.
-Get metadata about a corpus. This operation does not search the corpus contents. -Specify the `corpus_key` to identify the corpus whose metadata you want to +Get metadata about a corpus. This operation does not search the corpus contents. +Specify the `corpus_key` to identify the corpus whose metadata you want to retrieve. The `corpus_key` is created when the corpus is set up, either through -the Vectara Console UI or the Create Corpus API. For more information, +the Vectara Console UI or the Create Corpus API. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
@@ -901,7 +901,7 @@ client.corpora.get(
-Permanently delete a corpus and all its associated data. The `corpus_key` uniquely identifies +Permanently delete a corpus and all its associated data. The `corpus_key` uniquely identifies the corpus. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
@@ -991,10 +991,10 @@ client.corpora.delete(
Enable, disable, or update the name and description of a corpus. This lets you -manage data availability without deleting the corpus, which is useful for -maintenance and security purposes. The `corpus_key` uniquely identifies the corpus. -For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). -Consider updating the name and description of a corpus dynamically to help keep your data +manage data availability without deleting the corpus, which is useful for +maintenance and security purposes. The `corpus_key` uniquely identifies the corpus. +For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). +Consider updating the name and description of a corpus dynamically to help keep your data aligned with changing business needs.
@@ -1107,8 +1107,8 @@ client.corpora.update(
-Resets a corpus, which removes all documents and data from the specified corpus, -while keeping the corpus itself. The `corpus_key` uniquely identifies the corpus. +Resets a corpus, which removes all documents and data from the specified corpus, +while keeping the corpus itself. The `corpus_key` uniquely identifies the corpus. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
@@ -1198,11 +1198,11 @@ client.corpora.reset(
Replace the filter attributes of a corpus. This does not happen immediately, as -this operation creates a job that completes asynchronously. These new filter +this operation creates a job that completes asynchronously. These new filter attributes will not work until the job completes. -You can monitor the status of the filter change using the returned job ID. The -`corpus_key` uniquely identifies the corpus. For more information, see +You can monitor the status of the filter change using the returned job ID. The +`corpus_key` uniquely identifies the corpus. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
@@ -1307,12 +1307,11 @@ client.corpora.replace_filter_attributes(
Search a single corpus with a straightforward query request, specifying the corpus key and query parameters. - -- Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is - [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). -- Enter the search `query` string for the corpus, which is the question you want to ask. -- Set the maximum number of results (`limit`) to return. **Default**: 10, **minimum**: 1 -- Define the `offset` position from which to start in the result set. +* Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is +[created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). +* Enter the search `query` string for the corpus, which is the question you want to ask. +* Set the maximum number of results (`limit`) to return. **Default**: 10, **minimum**: 1 +* Define the `offset` position from which to start in the result set. For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search).
@@ -1437,13 +1436,13 @@ client.corpora.search( Perform an advanced query on a specific corpus to find relevant results, highlight relevant snippets, and use Retrieval Augmented Generation: -- Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). -- Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) -- Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response - will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). -- Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) -- Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) -- Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) +* Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). +* Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) +* Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response +will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). +* Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) +* Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) +* Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) For more detailed information, see [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). @@ -1570,13 +1569,13 @@ for chunk in response: Perform an advanced query on a specific corpus to find relevant results, highlight relevant snippets, and use Retrieval Augmented Generation: -- Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). -- Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) -- Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response - will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). -- Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) -- Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) -- Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) +* Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). +* Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) +* Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response +will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). +* Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) +* Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) +* Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) For more detailed information, see [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). @@ -1702,11 +1701,11 @@ client.corpora.query( Upload files such as PDFs and Word Documents for automatic text extraction and metadata parsing. The request expects a `multipart/form-data` format containing the following parts: - -- `metadata` - (Optional) Specifies a JSON object representing any additional metadata to be associated with the extracted document. For example, `'metadata={"key": "value"};type=application/json'` -- `chunking_strategy` - (Optional) Specifies the chunking strategy for the platform to use. If you do not set this option, the platform uses the default strategy, which creates one chunk per sentence. For example, `'chunking_strategy={"type":"max_chars_chunking_strategy","max_chars_per_chunk":200};type=application/json'` -- `file` - Specifies the file that you want to upload. -- `filename` - Specified as part of the file field with the file name that you want to associate with the uploaded file. For a curl example, use the following syntax: `'file=@/path/to/file/file.pdf;filename=desired_filename.pdf'` +* `metadata` - (Optional) Specifies a JSON object representing any additional metadata to be associated with the extracted document. For example, `'metadata={"key": "value"};type=application/json'` +* `chunking_strategy` - (Optional) Specifies the chunking strategy for the platform to use. If you do not set this option, the platform uses the default strategy, which creates one chunk per sentence. For example, `'chunking_strategy={"type":"max_chars_chunking_strategy","max_chars_per_chunk":200};type=application/json'` +* `table_extraction_config` - (Optional) Specifies whether to extract table data from the uploaded file. If you do not set this option, the platform does not extract tables from PDF files. Example config, `'table_extraction_config={"extract_tables":true};type=application/json'` +* `file` - Specifies the file that you want to upload. +* `filename` - Specified as part of the file field with the file name that you want to associate with the uploaded file. For a curl example, use the following syntax: `'file=@/path/to/file/file.pdf;filename=desired_filename.pdf'` For more detailed information, see this [File Upload API guide.](https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload) @@ -1798,6 +1797,14 @@ core.File` — See core.File for more documentation
+**table_extraction_config:** `typing.Optional[TableExtractionConfig]` + +
+
+ +
+
+ **filename:** `typing.Optional[str]` — Optional multipart section to override the filename.
@@ -1831,8 +1838,8 @@ core.File` — See core.File for more documentation
-Retrieve a list of documents stored in a specifi corpus. This endpoint -provides an overview of document metadata without returning the full content of +Retrieve a list of documents stored in a specific corpus. This endpoint +provides an overview of document metadata without returning the full content of each document.
@@ -1955,15 +1962,15 @@ allows filtering on document metadata. Add a document to a corpus. This endpoint supports two document formats, structured and core. -- **Structured** documents have a more conventional structure that provide document sections - and parts in a format created by Vectara's proprietary strategy automatically. You provide - a logical document structure, and Vectara handles the partitioning. -- **Core** documents differ in that they follow an advanced, granular structure that - explicitly defines each document part in an array. Each part becomes a distinct, - searchable item in query results. You have precise control over the document structure - and content. +* **Structured** documents have a more conventional structure that provide document sections +and parts in a format created by Vectara's proprietary strategy automatically. You provide +a logical document structure, and Vectara handles the partitioning. +* **Core** documents differ in that they follow an advanced, granular structure that +explicitly defines each document part in an array. Each part becomes a distinct, +searchable item in query results. You have precise control over the document structure +and content. -For more details, see [Indexing](https://docs.vectara.com/docs/learn/select-ideal-indexing-api). +For more details, see [Indexing](https://docs.vectara.com/docs/learn/select-ideal-indexing-api).
@@ -2067,7 +2074,7 @@ client.documents.create(
-Retrieve the content and metadata of a specific document, identified by its +Retrieve the content and metadata of a specific document, identified by its unique `document_id` from a specific corpus.
@@ -2168,7 +2175,7 @@ This `document_id` must be percent encoded.
-Permanently delete a document identified by its unique `document_id` from a specific +Permanently delete a document identified by its unique `document_id` from a specific corpus. This operation cannot be undone, so use it with caution.
@@ -2253,6 +2260,232 @@ This `document_id` must be percent encoded. + + + + +## Index +
client.index.update_corpus_document(...) +
+
+ +#### 📝 Description + +
+
+ +
+
+ +Updates document identified by its unique `document_id` from a specific +corpus. The request body metadata is merged with the existing metadata, +adding or modifying only the specified fields. +
+
+
+
+ +#### 🔌 Usage + +
+
+ +
+
+ +```python +from vectara import Vectara + +client = Vectara( + api_key="YOUR_API_KEY", + client_id="YOUR_CLIENT_ID", + client_secret="YOUR_CLIENT_SECRET", +) +client.index.update_corpus_document( + corpus_key="my-corpus", + document_id="document_id", +) + +``` +
+
+
+
+ +#### ⚙️ Parameters + +
+
+ +
+
+ +**corpus_key:** `CorpusKey` — The unique key identifying the corpus with the document to update. + +
+
+ +
+
+ +**document_id:** `str` + +The document ID of the document to update. +This `document_id` must be percent encoded. + +
+
+ +
+
+ +**request_timeout:** `typing.Optional[int]` — The API will make a best effort to complete the request in the specified seconds or time out. + +
+
+ +
+
+ +**request_timeout_millis:** `typing.Optional[int]` — The API will make a best effort to complete the request in the specified milliseconds or time out. + +
+
+ +
+
+ +**metadata:** `typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]` + +The metadata for a document as an arbitrary object. Properties of this object +can be used by document level filter attributes. + +
+
+ +
+
+ +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration. + +
+
+
+
+ + +
+
+
+ +
client.index.replace_corpus_document_metadata(...) +
+
+ +#### 📝 Description + +
+
+ +
+
+ +Replaces metadata of a document identified by its unique `document_id` +from a specific corpus. +
+
+
+
+ +#### 🔌 Usage + +
+
+ +
+
+ +```python +from vectara import Vectara + +client = Vectara( + api_key="YOUR_API_KEY", + client_id="YOUR_CLIENT_ID", + client_secret="YOUR_CLIENT_SECRET", +) +client.index.replace_corpus_document_metadata( + corpus_key="my-corpus", + document_id="document_id", +) + +``` +
+
+
+
+ +#### ⚙️ Parameters + +
+
+ +
+
+ +**corpus_key:** `CorpusKey` — The unique key identifying the corpus with the document to update. + +
+
+ +
+
+ +**document_id:** `str` + +The document ID of the document to update. +This `document_id` must be percent encoded. + +
+
+ +
+
+ +**request_timeout:** `typing.Optional[int]` — The API will make a best effort to complete the request in the specified seconds or time out. + +
+
+ +
+
+ +**request_timeout_millis:** `typing.Optional[int]` — The API will make a best effort to complete the request in the specified milliseconds or time out. + +
+
+ +
+
+ +**metadata:** `typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]]` + +The metadata for a document as an arbitrary object. Properties of this object +can be used by document level filter attributes. + +
+
+ +
+
+ +**request_options:** `typing.Optional[RequestOptions]` — Request-specific configuration. + +
+
+
+
+ +
diff --git a/src/vectara/__init__.py b/src/vectara/__init__.py index b8628c4..041c8d1 100644 --- a/src/vectara/__init__.py +++ b/src/vectara/__init__.py @@ -8,6 +8,7 @@ ApiRole, AppClient, BadRequestErrorBody, + Cell, ChainReranker, Chat, ChatFullResponse, @@ -29,6 +30,7 @@ CreateDocumentRequest, CustomDimensions, CustomerSpecificReranker, + Data, Document, DocumentPart, DocumentStorageUsage, @@ -43,6 +45,7 @@ GenerationParameters, GenerationPreset, GenerationSpan, + Header, IndividualSearchResult, Job, JobState, @@ -81,6 +84,7 @@ RerankSpan, RerankedSearchResult, Reranker, + Row, SearchCorporaParameters, SearchCorpus, SearchParameters, @@ -94,7 +98,10 @@ StreamSearchResponse, StructuredDocument, StructuredDocumentSection, + Table, + TableExtractionConfig, Turn, + UpdateDocumentRequest, User, UserFunctionReranker, ) @@ -108,6 +115,7 @@ documents, encoders, generation_presets, + index, jobs, llms, query_history, @@ -131,6 +139,7 @@ "AsyncVectara", "BadRequestError", "BadRequestErrorBody", + "Cell", "ChainReranker", "Chat", "ChatFullResponse", @@ -152,6 +161,7 @@ "CreateDocumentRequest", "CustomDimensions", "CustomerSpecificReranker", + "Data", "Document", "DocumentPart", "DocumentStorageUsage", @@ -168,6 +178,7 @@ "GenerationPreset", "GenerationSpan", "GetTokenResponse", + "Header", "IndividualSearchResult", "Job", "JobState", @@ -207,6 +218,7 @@ "RerankSpan", "RerankedSearchResult", "Reranker", + "Row", "SearchCorporaParameters", "SearchCorpus", "SearchCorpusParameters", @@ -221,7 +233,10 @@ "StreamSearchResponse", "StructuredDocument", "StructuredDocumentSection", + "Table", + "TableExtractionConfig", "Turn", + "UpdateDocumentRequest", "User", "UserFunctionReranker", "Vectara", @@ -235,6 +250,7 @@ "documents", "encoders", "generation_presets", + "index", "jobs", "llms", "query_history", diff --git a/src/vectara/chats/client.py b/src/vectara/chats/client.py index 337be6c..dd344c3 100644 --- a/src/vectara/chats/client.py +++ b/src/vectara/chats/client.py @@ -20,6 +20,7 @@ from ..types.chat_parameters import ChatParameters from ..types.chat_streamed_response import ChatStreamedResponse from ..core.serialization import convert_and_respect_annotation_metadata +import httpx_sse import json from ..errors.bad_request_error import BadRequestError from ..types.bad_request_error_body import BadRequestErrorBody @@ -493,15 +494,14 @@ def create_turns_stream( ) as _response: try: if 200 <= _response.status_code < 300: - for _text in _response.iter_lines(): + _event_source = httpx_sse.EventSource(_response) + for _sse in _event_source.iter_sse(): try: - if len(_text) == 0: - continue yield typing.cast( ChatStreamedResponse, parse_obj_as( type_=ChatStreamedResponse, # type: ignore - object_=json.loads(_text), + object_=json.loads(_sse.data), ), ) except: @@ -1455,15 +1455,14 @@ async def main() -> None: ) as _response: try: if 200 <= _response.status_code < 300: - async for _text in _response.aiter_lines(): + _event_source = httpx_sse.EventSource(_response) + async for _sse in _event_source.aiter_sse(): try: - if len(_text) == 0: - continue yield typing.cast( ChatStreamedResponse, parse_obj_as( type_=ChatStreamedResponse, # type: ignore - object_=json.loads(_text), + object_=json.loads(_sse.data), ), ) except: diff --git a/src/vectara/core/client_wrapper.py b/src/vectara/core/client_wrapper.py index 204ce18..997c046 100644 --- a/src/vectara/core/client_wrapper.py +++ b/src/vectara/core/client_wrapper.py @@ -25,7 +25,7 @@ def get_headers(self) -> typing.Dict[str, str]: headers: typing.Dict[str, str] = { "X-Fern-Language": "Python", "X-Fern-SDK-Name": "vectara", - "X-Fern-SDK-Version": "0.2.37", + "X-Fern-SDK-Version": "0.2.38", } if self._api_key is not None: headers["x-api-key"] = self._api_key diff --git a/src/vectara/corpora/client.py b/src/vectara/corpora/client.py index 68b9bcf..4034160 100644 --- a/src/vectara/corpora/client.py +++ b/src/vectara/corpora/client.py @@ -25,6 +25,7 @@ from .types.search_corpus_parameters import SearchCorpusParameters from ..types.generation_parameters import GenerationParameters from ..types.query_streamed_response import QueryStreamedResponse +import httpx_sse import json from ..core.client_wrapper import AsyncClientWrapper from ..core.pagination import AsyncPager @@ -779,12 +780,11 @@ def search( ) -> QueryFullResponse: """ Search a single corpus with a straightforward query request, specifying the corpus key and query parameters. - - - Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is - [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). - - Enter the search `query` string for the corpus, which is the question you want to ask. - - Set the maximum number of results (`limit`) to return. **Default**: 10, **minimum**: 1 - - Define the `offset` position from which to start in the result set. + * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is + [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). + * Enter the search `query` string for the corpus, which is the question you want to ask. + * Set the maximum number of results (`limit`) to return. **Default**: 10, **minimum**: 1 + * Define the `offset` position from which to start in the result set. For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). @@ -908,13 +908,13 @@ def query_stream( """ Perform an advanced query on a specific corpus to find relevant results, highlight relevant snippets, and use Retrieval Augmented Generation: - - Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). - - Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) - - Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response - will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). - - Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) - - Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) - - Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) + * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). + * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) + * Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response + will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). + * Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) + * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) + * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) For more detailed information, see [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). @@ -988,15 +988,14 @@ def query_stream( ) as _response: try: if 200 <= _response.status_code < 300: - for _text in _response.iter_lines(): + _event_source = httpx_sse.EventSource(_response) + for _sse in _event_source.iter_sse(): try: - if len(_text) == 0: - continue yield typing.cast( QueryStreamedResponse, parse_obj_as( type_=QueryStreamedResponse, # type: ignore - object_=json.loads(_text), + object_=json.loads(_sse.data), ), ) except: @@ -1053,13 +1052,13 @@ def query( """ Perform an advanced query on a specific corpus to find relevant results, highlight relevant snippets, and use Retrieval Augmented Generation: - - Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). - - Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) - - Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response - will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). - - Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) - - Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) - - Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) + * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). + * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) + * Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response + will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). + * Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) + * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) + * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) For more detailed information, see [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). @@ -1976,12 +1975,11 @@ async def search( ) -> QueryFullResponse: """ Search a single corpus with a straightforward query request, specifying the corpus key and query parameters. - - - Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is - [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). - - Enter the search `query` string for the corpus, which is the question you want to ask. - - Set the maximum number of results (`limit`) to return. **Default**: 10, **minimum**: 1 - - Define the `offset` position from which to start in the result set. + * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is + [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). + * Enter the search `query` string for the corpus, which is the question you want to ask. + * Set the maximum number of results (`limit`) to return. **Default**: 10, **minimum**: 1 + * Define the `offset` position from which to start in the result set. For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). @@ -2113,13 +2111,13 @@ async def query_stream( """ Perform an advanced query on a specific corpus to find relevant results, highlight relevant snippets, and use Retrieval Augmented Generation: - - Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). - - Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) - - Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response - will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). - - Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) - - Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) - - Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) + * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). + * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) + * Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response + will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). + * Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) + * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) + * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) For more detailed information, see [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). @@ -2201,15 +2199,14 @@ async def main() -> None: ) as _response: try: if 200 <= _response.status_code < 300: - async for _text in _response.aiter_lines(): + _event_source = httpx_sse.EventSource(_response) + async for _sse in _event_source.aiter_sse(): try: - if len(_text) == 0: - continue yield typing.cast( QueryStreamedResponse, parse_obj_as( type_=QueryStreamedResponse, # type: ignore - object_=json.loads(_text), + object_=json.loads(_sse.data), ), ) except: @@ -2266,13 +2263,13 @@ async def query( """ Perform an advanced query on a specific corpus to find relevant results, highlight relevant snippets, and use Retrieval Augmented Generation: - - Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). - - Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) - - Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response - will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). - - Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) - - Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) - - Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) + * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition). + * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition) + * Leverage advanced search capabilities like reranking (`reranker`) and Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response + will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization). + * Use hybrid search to achieve optimal results by setting different values for `lexical_interpolation` (e.g., `0.025`). [Learn more](https://docs.vectara.com/docs/learn/hybrid-search) + * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm) + * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-options) For more detailed information, see [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search). diff --git a/src/vectara/documents/client.py b/src/vectara/documents/client.py index 394ba18..d551bc5 100644 --- a/src/vectara/documents/client.py +++ b/src/vectara/documents/client.py @@ -42,7 +42,7 @@ def list( request_options: typing.Optional[RequestOptions] = None, ) -> SyncPager[Document]: """ - Retrieve a list of documents stored in a specifi corpus. This endpoint + Retrieve a list of documents stored in a specific corpus. This endpoint provides an overview of document metadata without returning the full content of each document. @@ -170,13 +170,13 @@ def create( """ Add a document to a corpus. This endpoint supports two document formats, structured and core. - - **Structured** documents have a more conventional structure that provide document sections - and parts in a format created by Vectara's proprietary strategy automatically. You provide - a logical document structure, and Vectara handles the partitioning. - - **Core** documents differ in that they follow an advanced, granular structure that - explicitly defines each document part in an array. Each part becomes a distinct, - searchable item in query results. You have precise control over the document structure - and content. + * **Structured** documents have a more conventional structure that provide document sections + and parts in a format created by Vectara's proprietary strategy automatically. You provide + a logical document structure, and Vectara handles the partitioning. + * **Core** documents differ in that they follow an advanced, granular structure that + explicitly defines each document part in an array. Each part becomes a distinct, + searchable item in query results. You have precise control over the document structure + and content. For more details, see [Indexing](https://docs.vectara.com/docs/learn/select-ideal-indexing-api). @@ -478,7 +478,7 @@ async def list( request_options: typing.Optional[RequestOptions] = None, ) -> AsyncPager[Document]: """ - Retrieve a list of documents stored in a specifi corpus. This endpoint + Retrieve a list of documents stored in a specific corpus. This endpoint provides an overview of document metadata without returning the full content of each document. @@ -614,13 +614,13 @@ async def create( """ Add a document to a corpus. This endpoint supports two document formats, structured and core. - - **Structured** documents have a more conventional structure that provide document sections - and parts in a format created by Vectara's proprietary strategy automatically. You provide - a logical document structure, and Vectara handles the partitioning. - - **Core** documents differ in that they follow an advanced, granular structure that - explicitly defines each document part in an array. Each part becomes a distinct, - searchable item in query results. You have precise control over the document structure - and content. + * **Structured** documents have a more conventional structure that provide document sections + and parts in a format created by Vectara's proprietary strategy automatically. You provide + a logical document structure, and Vectara handles the partitioning. + * **Core** documents differ in that they follow an advanced, granular structure that + explicitly defines each document part in an array. Each part becomes a distinct, + searchable item in query results. You have precise control over the document structure + and content. For more details, see [Indexing](https://docs.vectara.com/docs/learn/select-ideal-indexing-api). diff --git a/src/vectara/index/__init__.py b/src/vectara/index/__init__.py new file mode 100644 index 0000000..f3ea265 --- /dev/null +++ b/src/vectara/index/__init__.py @@ -0,0 +1,2 @@ +# This file was auto-generated by Fern from our API Definition. + diff --git a/src/vectara/index/client.py b/src/vectara/index/client.py new file mode 100644 index 0000000..3132844 --- /dev/null +++ b/src/vectara/index/client.py @@ -0,0 +1,459 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing +from ..core.client_wrapper import SyncClientWrapper +from ..types.corpus_key import CorpusKey +from ..core.request_options import RequestOptions +from ..types.document import Document +from ..core.jsonable_encoder import jsonable_encoder +from ..core.pydantic_utilities import parse_obj_as +from ..errors.forbidden_error import ForbiddenError +from ..types.error import Error +from ..errors.not_found_error import NotFoundError +from ..types.not_found_error_body import NotFoundErrorBody +from json.decoder import JSONDecodeError +from ..core.api_error import ApiError +from ..core.client_wrapper import AsyncClientWrapper + +# this is used as the default value for optional parameters +OMIT = typing.cast(typing.Any, ...) + + +class IndexClient: + def __init__(self, *, client_wrapper: SyncClientWrapper): + self._client_wrapper = client_wrapper + + def update_corpus_document( + self, + corpus_key: CorpusKey, + document_id: str, + *, + request_timeout: typing.Optional[int] = None, + request_timeout_millis: typing.Optional[int] = None, + metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> Document: + """ + Updates document identified by its unique `document_id` from a specific + corpus. The request body metadata is merged with the existing metadata, + adding or modifying only the specified fields. + + Parameters + ---------- + corpus_key : CorpusKey + The unique key identifying the corpus with the document to update. + + document_id : str + The document ID of the document to update. + This `document_id` must be percent encoded. + + request_timeout : typing.Optional[int] + The API will make a best effort to complete the request in the specified seconds or time out. + + request_timeout_millis : typing.Optional[int] + The API will make a best effort to complete the request in the specified milliseconds or time out. + + metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] + The metadata for a document as an arbitrary object. Properties of this object + can be used by document level filter attributes. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Document + Successfully updated the document. + + Examples + -------- + from vectara import Vectara + + client = Vectara( + api_key="YOUR_API_KEY", + client_id="YOUR_CLIENT_ID", + client_secret="YOUR_CLIENT_SECRET", + ) + client.index.update_corpus_document( + corpus_key="my-corpus", + document_id="document_id", + ) + """ + _response = self._client_wrapper.httpx_client.request( + f"v2/corpora/{jsonable_encoder(corpus_key)}/documents/{jsonable_encoder(document_id)}", + base_url=self._client_wrapper.get_environment().default, + method="PATCH", + json={ + "metadata": metadata, + }, + headers={ + "Request-Timeout": str(request_timeout) if request_timeout is not None else None, + "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None, + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + return typing.cast( + Document, + parse_obj_as( + type_=Document, # type: ignore + object_=_response.json(), + ), + ) + if _response.status_code == 403: + raise ForbiddenError( + typing.cast( + Error, + parse_obj_as( + type_=Error, # type: ignore + object_=_response.json(), + ), + ) + ) + if _response.status_code == 404: + raise NotFoundError( + typing.cast( + NotFoundErrorBody, + parse_obj_as( + type_=NotFoundErrorBody, # type: ignore + object_=_response.json(), + ), + ) + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + def replace_corpus_document_metadata( + self, + corpus_key: CorpusKey, + document_id: str, + *, + request_timeout: typing.Optional[int] = None, + request_timeout_millis: typing.Optional[int] = None, + metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> Document: + """ + Replaces metadata of a document identified by its unique `document_id` + from a specific corpus. + + Parameters + ---------- + corpus_key : CorpusKey + The unique key identifying the corpus with the document to update. + + document_id : str + The document ID of the document to update. + This `document_id` must be percent encoded. + + request_timeout : typing.Optional[int] + The API will make a best effort to complete the request in the specified seconds or time out. + + request_timeout_millis : typing.Optional[int] + The API will make a best effort to complete the request in the specified milliseconds or time out. + + metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] + The metadata for a document as an arbitrary object. Properties of this object + can be used by document level filter attributes. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Document + Successfully updated the document. + + Examples + -------- + from vectara import Vectara + + client = Vectara( + api_key="YOUR_API_KEY", + client_id="YOUR_CLIENT_ID", + client_secret="YOUR_CLIENT_SECRET", + ) + client.index.replace_corpus_document_metadata( + corpus_key="my-corpus", + document_id="document_id", + ) + """ + _response = self._client_wrapper.httpx_client.request( + f"v2/corpora/{jsonable_encoder(corpus_key)}/documents/{jsonable_encoder(document_id)}/metadata", + base_url=self._client_wrapper.get_environment().default, + method="PUT", + json={ + "metadata": metadata, + }, + headers={ + "Request-Timeout": str(request_timeout) if request_timeout is not None else None, + "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None, + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + return typing.cast( + Document, + parse_obj_as( + type_=Document, # type: ignore + object_=_response.json(), + ), + ) + if _response.status_code == 403: + raise ForbiddenError( + typing.cast( + Error, + parse_obj_as( + type_=Error, # type: ignore + object_=_response.json(), + ), + ) + ) + if _response.status_code == 404: + raise NotFoundError( + typing.cast( + NotFoundErrorBody, + parse_obj_as( + type_=NotFoundErrorBody, # type: ignore + object_=_response.json(), + ), + ) + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + +class AsyncIndexClient: + def __init__(self, *, client_wrapper: AsyncClientWrapper): + self._client_wrapper = client_wrapper + + async def update_corpus_document( + self, + corpus_key: CorpusKey, + document_id: str, + *, + request_timeout: typing.Optional[int] = None, + request_timeout_millis: typing.Optional[int] = None, + metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> Document: + """ + Updates document identified by its unique `document_id` from a specific + corpus. The request body metadata is merged with the existing metadata, + adding or modifying only the specified fields. + + Parameters + ---------- + corpus_key : CorpusKey + The unique key identifying the corpus with the document to update. + + document_id : str + The document ID of the document to update. + This `document_id` must be percent encoded. + + request_timeout : typing.Optional[int] + The API will make a best effort to complete the request in the specified seconds or time out. + + request_timeout_millis : typing.Optional[int] + The API will make a best effort to complete the request in the specified milliseconds or time out. + + metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] + The metadata for a document as an arbitrary object. Properties of this object + can be used by document level filter attributes. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Document + Successfully updated the document. + + Examples + -------- + import asyncio + + from vectara import AsyncVectara + + client = AsyncVectara( + api_key="YOUR_API_KEY", + client_id="YOUR_CLIENT_ID", + client_secret="YOUR_CLIENT_SECRET", + ) + + + async def main() -> None: + await client.index.update_corpus_document( + corpus_key="my-corpus", + document_id="document_id", + ) + + + asyncio.run(main()) + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/corpora/{jsonable_encoder(corpus_key)}/documents/{jsonable_encoder(document_id)}", + base_url=self._client_wrapper.get_environment().default, + method="PATCH", + json={ + "metadata": metadata, + }, + headers={ + "Request-Timeout": str(request_timeout) if request_timeout is not None else None, + "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None, + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + return typing.cast( + Document, + parse_obj_as( + type_=Document, # type: ignore + object_=_response.json(), + ), + ) + if _response.status_code == 403: + raise ForbiddenError( + typing.cast( + Error, + parse_obj_as( + type_=Error, # type: ignore + object_=_response.json(), + ), + ) + ) + if _response.status_code == 404: + raise NotFoundError( + typing.cast( + NotFoundErrorBody, + parse_obj_as( + type_=NotFoundErrorBody, # type: ignore + object_=_response.json(), + ), + ) + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) + + async def replace_corpus_document_metadata( + self, + corpus_key: CorpusKey, + document_id: str, + *, + request_timeout: typing.Optional[int] = None, + request_timeout_millis: typing.Optional[int] = None, + metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT, + request_options: typing.Optional[RequestOptions] = None, + ) -> Document: + """ + Replaces metadata of a document identified by its unique `document_id` + from a specific corpus. + + Parameters + ---------- + corpus_key : CorpusKey + The unique key identifying the corpus with the document to update. + + document_id : str + The document ID of the document to update. + This `document_id` must be percent encoded. + + request_timeout : typing.Optional[int] + The API will make a best effort to complete the request in the specified seconds or time out. + + request_timeout_millis : typing.Optional[int] + The API will make a best effort to complete the request in the specified milliseconds or time out. + + metadata : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] + The metadata for a document as an arbitrary object. Properties of this object + can be used by document level filter attributes. + + request_options : typing.Optional[RequestOptions] + Request-specific configuration. + + Returns + ------- + Document + Successfully updated the document. + + Examples + -------- + import asyncio + + from vectara import AsyncVectara + + client = AsyncVectara( + api_key="YOUR_API_KEY", + client_id="YOUR_CLIENT_ID", + client_secret="YOUR_CLIENT_SECRET", + ) + + + async def main() -> None: + await client.index.replace_corpus_document_metadata( + corpus_key="my-corpus", + document_id="document_id", + ) + + + asyncio.run(main()) + """ + _response = await self._client_wrapper.httpx_client.request( + f"v2/corpora/{jsonable_encoder(corpus_key)}/documents/{jsonable_encoder(document_id)}/metadata", + base_url=self._client_wrapper.get_environment().default, + method="PUT", + json={ + "metadata": metadata, + }, + headers={ + "Request-Timeout": str(request_timeout) if request_timeout is not None else None, + "Request-Timeout-Millis": str(request_timeout_millis) if request_timeout_millis is not None else None, + }, + request_options=request_options, + omit=OMIT, + ) + try: + if 200 <= _response.status_code < 300: + return typing.cast( + Document, + parse_obj_as( + type_=Document, # type: ignore + object_=_response.json(), + ), + ) + if _response.status_code == 403: + raise ForbiddenError( + typing.cast( + Error, + parse_obj_as( + type_=Error, # type: ignore + object_=_response.json(), + ), + ) + ) + if _response.status_code == 404: + raise NotFoundError( + typing.cast( + NotFoundErrorBody, + parse_obj_as( + type_=NotFoundErrorBody, # type: ignore + object_=_response.json(), + ), + ) + ) + _response_json = _response.json() + except JSONDecodeError: + raise ApiError(status_code=_response.status_code, body=_response.text) + raise ApiError(status_code=_response.status_code, body=_response_json) diff --git a/src/vectara/types/__init__.py b/src/vectara/types/__init__.py index c551c93..8752f86 100644 --- a/src/vectara/types/__init__.py +++ b/src/vectara/types/__init__.py @@ -7,6 +7,7 @@ from .api_role import ApiRole from .app_client import AppClient from .bad_request_error_body import BadRequestErrorBody +from .cell import Cell from .chain_reranker import ChainReranker from .chat import Chat from .chat_full_response import ChatFullResponse @@ -28,6 +29,7 @@ from .create_document_request import CreateDocumentRequest from .custom_dimensions import CustomDimensions from .customer_specific_reranker import CustomerSpecificReranker +from .data import Data from .document import Document from .document_part import DocumentPart from .document_storage_usage import DocumentStorageUsage @@ -42,6 +44,7 @@ from .generation_parameters import GenerationParameters from .generation_preset import GenerationPreset from .generation_span import GenerationSpan +from .header import Header from .individual_search_result import IndividualSearchResult from .job import Job from .job_state import JobState @@ -80,6 +83,7 @@ from .rerank_span import RerankSpan from .reranked_search_result import RerankedSearchResult from .reranker import Reranker +from .row import Row from .search_corpora_parameters import SearchCorporaParameters from .search_corpus import SearchCorpus from .search_parameters import SearchParameters @@ -93,7 +97,10 @@ from .stream_search_response import StreamSearchResponse from .structured_document import StructuredDocument from .structured_document_section import StructuredDocumentSection +from .table import Table +from .table_extraction_config import TableExtractionConfig from .turn import Turn +from .update_document_request import UpdateDocumentRequest from .user import User from .user_function_reranker import UserFunctionReranker @@ -105,6 +112,7 @@ "ApiRole", "AppClient", "BadRequestErrorBody", + "Cell", "ChainReranker", "Chat", "ChatFullResponse", @@ -126,6 +134,7 @@ "CreateDocumentRequest", "CustomDimensions", "CustomerSpecificReranker", + "Data", "Document", "DocumentPart", "DocumentStorageUsage", @@ -140,6 +149,7 @@ "GenerationParameters", "GenerationPreset", "GenerationSpan", + "Header", "IndividualSearchResult", "Job", "JobState", @@ -178,6 +188,7 @@ "RerankSpan", "RerankedSearchResult", "Reranker", + "Row", "SearchCorporaParameters", "SearchCorpus", "SearchParameters", @@ -191,7 +202,10 @@ "StreamSearchResponse", "StructuredDocument", "StructuredDocumentSection", + "Table", + "TableExtractionConfig", "Turn", + "UpdateDocumentRequest", "User", "UserFunctionReranker", ] diff --git a/src/vectara/types/app_client.py b/src/vectara/types/app_client.py index 39b8b25..dfd9810 100644 --- a/src/vectara/types/app_client.py +++ b/src/vectara/types/app_client.py @@ -12,7 +12,7 @@ class AppClient(UniversalBaseModel): id: typing.Optional[str] = pydantic.Field(default=None) """ The Vectara App Client ID. This ID is not used during an OAuth - flow. However, the ID used within the Vectara API. + flow. However, the ID used within the Vectara API. """ name: typing.Optional[str] = pydantic.Field(default=None) @@ -32,7 +32,7 @@ class AppClient(UniversalBaseModel): client_secret: typing.Optional[str] = pydantic.Field(default=None) """ - The client secret used in API requests. The secret should be kept secure. + The client secret used in API requests. The secret should be kept secure. """ api_roles: typing.Optional[typing.List[ApiRole]] = pydantic.Field(default=None) diff --git a/src/vectara/types/cell.py b/src/vectara/types/cell.py new file mode 100644 index 0000000..7ca451d --- /dev/null +++ b/src/vectara/types/cell.py @@ -0,0 +1,51 @@ +# This file was auto-generated by Fern from our API Definition. + +from ..core.pydantic_utilities import UniversalBaseModel +import typing +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 + + +class Cell(UniversalBaseModel): + """ + A cell in a table. + """ + + text_value: typing.Optional[str] = pydantic.Field(default=None) + """ + A text value. + """ + + int_value: typing.Optional[int] = pydantic.Field(default=None) + """ + A signed 64-bit integer value. + """ + + float_value: typing.Optional[float] = pydantic.Field(default=None) + """ + A floating-point value with double precision. + """ + + bool_value: typing.Optional[bool] = pydantic.Field(default=None) + """ + A boolean value. + """ + + colspan: typing.Optional[int] = pydantic.Field(default=None) + """ + (Optional) The number of columns the cell spans. Default is 1. Must be greater than 0. + """ + + rowspan: typing.Optional[int] = pydantic.Field(default=None) + """ + (Optional) The number of rows the cell spans. Default is 1. Must be greater than 0. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/vectara/types/chat_full_response.py b/src/vectara/types/chat_full_response.py index 02d794d..ca20d3d 100644 --- a/src/vectara/types/chat_full_response.py +++ b/src/vectara/types/chat_full_response.py @@ -45,12 +45,12 @@ class ChatFullResponse(UniversalBaseModel): rendered_prompt: typing.Optional[str] = pydantic.Field(default=None) """ - The rendered prompt sent to the LLM. Useful when creating customer `prompt_template` templates. + The rendered prompt sent to the LLM. Useful when creating customer `prompt_template` templates. """ rephrased_query: typing.Optional[str] = pydantic.Field(default=None) """ - View the actual query made to backend that was rephrased + View the actual query made to backend that was rephrased by the LLM from the input query. """ diff --git a/src/vectara/types/citation_parameters.py b/src/vectara/types/citation_parameters.py index e1ba700..40dc3d8 100644 --- a/src/vectara/types/citation_parameters.py +++ b/src/vectara/types/citation_parameters.py @@ -16,11 +16,10 @@ class CitationParameters(UniversalBaseModel): """ The citation style to be used in summary. Can be one of: - - - `numeric` - Citations formatted as simple numerals: \[1\], \[2\] ... - - `none` - Citations removed from text. - - `html` - Citation formatted as a URL like `text_pattern`. - - `markdown` - Formatted as `[text_pattern](url_pattern)`. + * `numeric` - Citations formatted as simple numerals: \[1\], \[2\] ... + * `none` - Citations removed from text. + * `html` - Citation formatted as a URL like `text_pattern`. + * `markdown` - Formatted as `[text_pattern](url_pattern)`. """ url_pattern: typing.Optional[str] = pydantic.Field(default=None) diff --git a/src/vectara/types/context_configuration.py b/src/vectara/types/context_configuration.py index 98c62f3..060b778 100644 --- a/src/vectara/types/context_configuration.py +++ b/src/vectara/types/context_configuration.py @@ -22,7 +22,7 @@ class ContextConfiguration(UniversalBaseModel): characters_after: typing.Optional[int] = pydantic.Field(default=None) """ - The number of characters that are shown after the matching document part. + The number of characters that are shown after the matching document part. This is useful to show the context of the document part in the wider document. Ignored if `sentences_after` is set. Vectara will capture the full sentence that contains the captured characters, @@ -37,23 +37,23 @@ class ContextConfiguration(UniversalBaseModel): sentences_after: typing.Optional[int] = pydantic.Field(default=None) """ - The number of sentences that are shown after the matching document part. + The number of sentences that are shown after the matching document part. This is useful to show the context of the document part in the wider document. """ start_tag: typing.Optional[str] = pydantic.Field(default=None) """ - The tag that wraps the document part at the start. This is often used to - provide a start HTML/XML tag or some other delimiter you can use in an - application to understand where to provide highlighting in your UI and + The tag that wraps the document part at the start. This is often used to + provide a start HTML/XML tag or some other delimiter you can use in an + application to understand where to provide highlighting in your UI and understand where the context before ends and the document part begins. """ end_tag: typing.Optional[str] = pydantic.Field(default=None) """ - The tag that wraps the document part at the end. This is often used to - provide a start HTML/XML tag or some other delimiter you can use in an - application to understand where to provide highlighting in your UI and + The tag that wraps the document part at the end. This is often used to + provide a start HTML/XML tag or some other delimiter you can use in an + application to understand where to provide highlighting in your UI and understand where the context before ends and the document part begins. """ diff --git a/src/vectara/types/core_document.py b/src/vectara/types/core_document.py index 13b191d..3292fe8 100644 --- a/src/vectara/types/core_document.py +++ b/src/vectara/types/core_document.py @@ -3,6 +3,7 @@ from ..core.pydantic_utilities import UniversalBaseModel import pydantic import typing +from .table import Table from .core_document_part import CoreDocumentPart from ..core.pydantic_utilities import IS_PYDANTIC_V2 @@ -24,6 +25,11 @@ class CoreDocument(UniversalBaseModel): can be used by document filters if defined as a corpus filter attribute. """ + tables: typing.Optional[typing.List[Table]] = pydantic.Field(default=None) + """ + The tables that this document contains. + """ + document_parts: typing.List[CoreDocumentPart] = pydantic.Field() """ Parts of the document that make up the document. diff --git a/src/vectara/types/core_document_part.py b/src/vectara/types/core_document_part.py index b59f1ac..819329c 100644 --- a/src/vectara/types/core_document_part.py +++ b/src/vectara/types/core_document_part.py @@ -22,6 +22,11 @@ class CoreDocumentPart(UniversalBaseModel): The metadata for a document part. These may be used in metadata filters at query time if filter attributes are configured on the corpus. """ + table_id: typing.Optional[str] = pydantic.Field(default=None) + """ + The ID of the table that this document part belongs to. + """ + context: typing.Optional[str] = pydantic.Field(default=None) """ The context text for the document part. diff --git a/src/vectara/types/corpus.py b/src/vectara/types/corpus.py index 1798241..3437f80 100644 --- a/src/vectara/types/corpus.py +++ b/src/vectara/types/corpus.py @@ -53,7 +53,7 @@ class Corpus(UniversalBaseModel): encoder_id: typing.Optional[str] = pydantic.Field(default=None) """ The encoder used by the corpus. - _Deprecated_: Use `encoder_name` instead + *Deprecated*: Use `encoder_name` instead """ encoder_name: typing.Optional[str] = pydantic.Field(default=None) diff --git a/src/vectara/types/customer_specific_reranker.py b/src/vectara/types/customer_specific_reranker.py index a21d37a..e61f45d 100644 --- a/src/vectara/types/customer_specific_reranker.py +++ b/src/vectara/types/customer_specific_reranker.py @@ -26,15 +26,14 @@ class CustomerSpecificReranker(UniversalBaseModel): limit: typing.Optional[int] = pydantic.Field(default=None) """ - Specifies the maximum number of results to be returned after the reranking process. + Specifies the maximum number of results to be returned after the reranking process. When a reranker is applied, it performs the following steps: - 1. Reranks all input results according to its algorithm. 2. Sorts the reranked results based on their new scores. 3. Returns the top N results, where N is the value specified by this limit. - Note: This limit is applied per reranking stage. In a chain of rerankers, - each reranker can have its own limit, potentially reducing the number of + Note: This limit is applied per reranking stage. In a chain of rerankers, + each reranker can have its own limit, potentially reducing the number of results at each stage. """ @@ -42,7 +41,6 @@ class CustomerSpecificReranker(UniversalBaseModel): """ Specifies the minimum score threshold for results to be included after the reranking process. When a reranker is applied with a cutoff, it performs the following steps: - 1. Reranks all input results according to its algorithm. 2. Applies the cutoff, removing any results with scores below the specified threshold. 3. Returns the remaining results, sorted by their new scores. diff --git a/src/vectara/types/data.py b/src/vectara/types/data.py new file mode 100644 index 0000000..e406a61 --- /dev/null +++ b/src/vectara/types/data.py @@ -0,0 +1,33 @@ +# This file was auto-generated by Fern from our API Definition. + +from ..core.pydantic_utilities import UniversalBaseModel +import typing +from .header import Header +import pydantic +from .row import Row +from ..core.pydantic_utilities import IS_PYDANTIC_V2 + + +class Data(UniversalBaseModel): + """ + The data of a table. + """ + + headers: typing.Optional[typing.List[Header]] = pydantic.Field(default=None) + """ + The headers of the table. + """ + + rows: typing.Optional[typing.List[Row]] = pydantic.Field(default=None) + """ + The rows in the data. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/vectara/types/document.py b/src/vectara/types/document.py index 8c26f55..0c11eb4 100644 --- a/src/vectara/types/document.py +++ b/src/vectara/types/document.py @@ -3,6 +3,7 @@ from ..core.pydantic_utilities import UniversalBaseModel import typing import pydantic +from .table import Table from .document_part import DocumentPart from .document_storage_usage import DocumentStorageUsage from ..core.pydantic_utilities import IS_PYDANTIC_V2 @@ -19,6 +20,11 @@ class Document(UniversalBaseModel): The document metadata. """ + tables: typing.Optional[typing.List[Table]] = pydantic.Field(default=None) + """ + The tables that this document contains. Tables are not available when table extraction is not enabled. + """ + parts: typing.Optional[typing.List[DocumentPart]] = pydantic.Field(default=None) """ Parts of the document that make up the document. However, parts are not available when diff --git a/src/vectara/types/generation_info.py b/src/vectara/types/generation_info.py index d32c2a1..0db1e04 100644 --- a/src/vectara/types/generation_info.py +++ b/src/vectara/types/generation_info.py @@ -19,7 +19,7 @@ class GenerationInfo(UniversalBaseModel): rephrased_query: typing.Optional[str] = pydantic.Field(default=None) """ - View the actual query made to backend that was rephrased + View the actual query made to backend that was rephrased by the LLM from the input query. """ diff --git a/src/vectara/types/generation_parameters.py b/src/vectara/types/generation_parameters.py index 6754415..3695745 100644 --- a/src/vectara/types/generation_parameters.py +++ b/src/vectara/types/generation_parameters.py @@ -19,13 +19,12 @@ class GenerationParameters(UniversalBaseModel): The preset values to use to feed the query results and other context to the model. A `generation_preset` is an object with a bundle of properties that specifies: - - - The `prompt_template` that is rendered and then sent to the LLM. - - The LLM used. - - `model_parameter`s such as temperature. - + * The `prompt_template` that is rendered and then sent to the LLM. + * The LLM used. + * `model_parameter`s such as temperature. + All of these properties except the model can be overridden by setting them in this - object. Even when a `prompt_template` is set, the `generation_preset_name` is used to set + object. Even when a `prompt_template` is set, the `generation_preset_name` is used to set the model used. If `generation_preset_name` is not set, the Vectara platform will use the default model and diff --git a/src/vectara/types/header.py b/src/vectara/types/header.py new file mode 100644 index 0000000..7769c37 --- /dev/null +++ b/src/vectara/types/header.py @@ -0,0 +1,6 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing +from .cell import Cell + +Header = typing.List[Cell] diff --git a/src/vectara/types/individual_search_result.py b/src/vectara/types/individual_search_result.py index 4be9e56..c49dd17 100644 --- a/src/vectara/types/individual_search_result.py +++ b/src/vectara/types/individual_search_result.py @@ -3,6 +3,7 @@ from ..core.pydantic_utilities import UniversalBaseModel import typing import pydantic +from .table import Table from ..core.pydantic_utilities import IS_PYDANTIC_V2 @@ -36,9 +37,14 @@ class IndividualSearchResult(UniversalBaseModel): The ID of the document that contains the document part. """ + table: typing.Optional[Table] = pydantic.Field(default=None) + """ + The table that the document part is from. + """ + request_corpora_index: typing.Optional[int] = pydantic.Field(default=None) """ - A query request can search over multiple corpora at a time. This property + A query request can search over multiple corpora at a time. This property is set to the index in the list of corpora in the original search request that this search result originated from. diff --git a/src/vectara/types/mmr_reranker.py b/src/vectara/types/mmr_reranker.py index 4e6d43e..7ea0e34 100644 --- a/src/vectara/types/mmr_reranker.py +++ b/src/vectara/types/mmr_reranker.py @@ -15,15 +15,14 @@ class MmrReranker(UniversalBaseModel): limit: typing.Optional[int] = pydantic.Field(default=None) """ - Specifies the maximum number of results to be returned after the reranking process. + Specifies the maximum number of results to be returned after the reranking process. When a reranker is applied, it performs the following steps: - 1. Reranks all input results according to its algorithm. 2. Sorts the reranked results based on their new scores. 3. Returns the top N results, where N is the value specified by this limit. - Note: This limit is applied per reranking stage. In a chain of rerankers, - each reranker can have its own limit, potentially reducing the number of + Note: This limit is applied per reranking stage. In a chain of rerankers, + each reranker can have its own limit, potentially reducing the number of results at each stage. """ @@ -31,7 +30,6 @@ class MmrReranker(UniversalBaseModel): """ Specifies the minimum score threshold for results to be included after the reranking process. When a reranker is applied with a cutoff, it performs the following steps: - 1. Reranks all input results according to its algorithm. 2. Applies the cutoff, removing any results with scores below the specified threshold. 3. Returns the remaining results, sorted by their new scores. diff --git a/src/vectara/types/none_reranker.py b/src/vectara/types/none_reranker.py index cf1671c..1e387cf 100644 --- a/src/vectara/types/none_reranker.py +++ b/src/vectara/types/none_reranker.py @@ -10,15 +10,14 @@ class NoneReranker(UniversalBaseModel): type: typing.Literal["none"] = "none" limit: typing.Optional[int] = pydantic.Field(default=None) """ - Specifies the maximum number of results to be returned after the reranking process. + Specifies the maximum number of results to be returned after the reranking process. When a reranker is applied, it performs the following steps: - 1. Reranks all input results according to its algorithm. 2. Sorts the reranked results based on their new scores. 3. Returns the top N results, where N is the value specified by this limit. - Note: This limit is applied per reranking stage. In a chain of rerankers, - each reranker can have its own limit, potentially reducing the number of + Note: This limit is applied per reranking stage. In a chain of rerankers, + each reranker can have its own limit, potentially reducing the number of results at each stage. """ diff --git a/src/vectara/types/row.py b/src/vectara/types/row.py new file mode 100644 index 0000000..3c43ba6 --- /dev/null +++ b/src/vectara/types/row.py @@ -0,0 +1,6 @@ +# This file was auto-generated by Fern from our API Definition. + +import typing +from .cell import Cell + +Row = typing.List[Cell] diff --git a/src/vectara/types/structured_document_section.py b/src/vectara/types/structured_document_section.py index af7758a..1c241e8 100644 --- a/src/vectara/types/structured_document_section.py +++ b/src/vectara/types/structured_document_section.py @@ -4,6 +4,7 @@ from ..core.pydantic_utilities import UniversalBaseModel import typing import pydantic +from .table import Table from ..core.pydantic_utilities import IS_PYDANTIC_V2 from ..core.pydantic_utilities import update_forward_refs @@ -30,11 +31,16 @@ class StructuredDocumentSection(UniversalBaseModel): metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = pydantic.Field(default=None) """ - Arbitrary object that becomes document part level metadata on any document part created - by this section. Properties of this object can be used by document part level + Arbitrary object that becomes document part level metadata on any document part created + by this section. Properties of this object can be used by document part level filters if defined as a corpus filter attribute. """ + tables: typing.Optional[typing.List[Table]] = pydantic.Field(default=None) + """ + The tables that this section contains. + """ + sections: typing.Optional[typing.List["StructuredDocumentSection"]] = pydantic.Field(default=None) """ The sections that this section contains. diff --git a/src/vectara/types/table.py b/src/vectara/types/table.py new file mode 100644 index 0000000..0640ae6 --- /dev/null +++ b/src/vectara/types/table.py @@ -0,0 +1,42 @@ +# This file was auto-generated by Fern from our API Definition. + +from ..core.pydantic_utilities import UniversalBaseModel +import typing +import pydantic +from .data import Data +from ..core.pydantic_utilities import IS_PYDANTIC_V2 + + +class Table(UniversalBaseModel): + """ + A table in a document. + """ + + id: typing.Optional[str] = pydantic.Field(default=None) + """ + The unique ID of the table within the document. + """ + + title: typing.Optional[str] = pydantic.Field(default=None) + """ + The title of the table. + """ + + data: typing.Optional[Data] = pydantic.Field(default=None) + """ + The data of the table. + """ + + description: typing.Optional[str] = pydantic.Field(default=None) + """ + The description of the table. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/vectara/types/table_extraction_config.py b/src/vectara/types/table_extraction_config.py new file mode 100644 index 0000000..fac3553 --- /dev/null +++ b/src/vectara/types/table_extraction_config.py @@ -0,0 +1,27 @@ +# This file was auto-generated by Fern from our API Definition. + +from ..core.pydantic_utilities import UniversalBaseModel +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 +import typing + + +class TableExtractionConfig(UniversalBaseModel): + """ + (Optional) Configuration for table extraction from the document. + """ + + extract_tables: bool = pydantic.Field() + """ + If set to true, the platform will attempt to extract tables from the document. + The tables will be indexed as separate document parts. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/vectara/types/update_document_request.py b/src/vectara/types/update_document_request.py new file mode 100644 index 0000000..af2dd23 --- /dev/null +++ b/src/vectara/types/update_document_request.py @@ -0,0 +1,29 @@ +# This file was auto-generated by Fern from our API Definition. + +from ..core.pydantic_utilities import UniversalBaseModel +import typing +import pydantic +from ..core.pydantic_utilities import IS_PYDANTIC_V2 + + +class UpdateDocumentRequest(UniversalBaseModel): + """ + Schema for updating the document. For PUT requests, the request body metadata replaces the existing + metadata. For PATCH requests, the request body metadata is merged with the existing metadata, + adding or modifying only the specified fields. + """ + + metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = pydantic.Field(default=None) + """ + The metadata for a document as an arbitrary object. Properties of this object + can be used by document level filter attributes. + """ + + if IS_PYDANTIC_V2: + model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2 + else: + + class Config: + frozen = True + smart_union = True + extra = pydantic.Extra.allow diff --git a/src/vectara/types/user_function_reranker.py b/src/vectara/types/user_function_reranker.py index fe58c92..f5bee2f 100644 --- a/src/vectara/types/user_function_reranker.py +++ b/src/vectara/types/user_function_reranker.py @@ -15,15 +15,14 @@ class UserFunctionReranker(UniversalBaseModel): limit: typing.Optional[int] = pydantic.Field(default=None) """ - Specifies the maximum number of results to be returned after the reranking process. + Specifies the maximum number of results to be returned after the reranking process. When a reranker is applied, it performs the following steps: - 1. Reranks all input results according to its algorithm. 2. Sorts the reranked results based on their new scores. 3. Returns the top N results, where N is the value specified by this limit. - Note: This limit is applied per reranking stage. In a chain of rerankers, - each reranker can have its own limit, potentially reducing the number of + Note: This limit is applied per reranking stage. In a chain of rerankers, + each reranker can have its own limit, potentially reducing the number of results at each stage. """ @@ -31,7 +30,6 @@ class UserFunctionReranker(UniversalBaseModel): """ Specifies the minimum score threshold for results to be included after the reranking process. When a reranker is applied with a cutoff, it performs the following steps: - 1. Reranks all input results according to its algorithm. 2. Applies the cutoff, removing any results with scores below the specified threshold. 3. Returns the remaining results, sorted by their new scores. diff --git a/src/vectara/upload/client.py b/src/vectara/upload/client.py index 8e1c02a..835b245 100644 --- a/src/vectara/upload/client.py +++ b/src/vectara/upload/client.py @@ -5,6 +5,7 @@ from ..types.corpus_key import CorpusKey from .. import core from ..types.components_schemas_max_chars_chunking_strategy import ComponentsSchemasMaxCharsChunkingStrategy +from ..types.table_extraction_config import TableExtractionConfig from ..core.request_options import RequestOptions from ..types.document import Document from ..core.jsonable_encoder import jsonable_encoder @@ -37,17 +38,18 @@ def file( request_timeout_millis: typing.Optional[int] = None, metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT, chunking_strategy: typing.Optional[ComponentsSchemasMaxCharsChunkingStrategy] = OMIT, + table_extraction_config: typing.Optional[TableExtractionConfig] = OMIT, filename: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> Document: """ Upload files such as PDFs and Word Documents for automatic text extraction and metadata parsing. The request expects a `multipart/form-data` format containing the following parts: - - - `metadata` - (Optional) Specifies a JSON object representing any additional metadata to be associated with the extracted document. For example, `'metadata={"key": "value"};type=application/json'` - - `chunking_strategy` - (Optional) Specifies the chunking strategy for the platform to use. If you do not set this option, the platform uses the default strategy, which creates one chunk per sentence. For example, `'chunking_strategy={"type":"max_chars_chunking_strategy","max_chars_per_chunk":200};type=application/json'` - - `file` - Specifies the file that you want to upload. - - `filename` - Specified as part of the file field with the file name that you want to associate with the uploaded file. For a curl example, use the following syntax: `'file=@/path/to/file/file.pdf;filename=desired_filename.pdf'` + * `metadata` - (Optional) Specifies a JSON object representing any additional metadata to be associated with the extracted document. For example, `'metadata={"key": "value"};type=application/json'` + * `chunking_strategy` - (Optional) Specifies the chunking strategy for the platform to use. If you do not set this option, the platform uses the default strategy, which creates one chunk per sentence. For example, `'chunking_strategy={"type":"max_chars_chunking_strategy","max_chars_per_chunk":200};type=application/json'` + * `table_extraction_config` - (Optional) Specifies whether to extract table data from the uploaded file. If you do not set this option, the platform does not extract tables from PDF files. Example config, `'table_extraction_config={"extract_tables":true};type=application/json'` + * `file` - Specifies the file that you want to upload. + * `filename` - Specified as part of the file field with the file name that you want to associate with the uploaded file. For a curl example, use the following syntax: `'file=@/path/to/file/file.pdf;filename=desired_filename.pdf'` For more detailed information, see this [File Upload API guide.](https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload) @@ -70,6 +72,8 @@ def file( chunking_strategy : typing.Optional[ComponentsSchemasMaxCharsChunkingStrategy] + table_extraction_config : typing.Optional[TableExtractionConfig] + filename : typing.Optional[str] Optional multipart section to override the filename. @@ -110,6 +114,17 @@ def file( if chunking_strategy is not OMIT else {} ), + **( + { + "table_extraction_config": ( + None, + json.dumps(jsonable_encoder(table_extraction_config)), + "application/json", + ) + } + if table_extraction_config is not OMIT + else {} + ), **( {"filename": (None, json.dumps(jsonable_encoder(filename)), "text/plain")} if filename is not OMIT @@ -182,17 +197,18 @@ async def file( request_timeout_millis: typing.Optional[int] = None, metadata: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT, chunking_strategy: typing.Optional[ComponentsSchemasMaxCharsChunkingStrategy] = OMIT, + table_extraction_config: typing.Optional[TableExtractionConfig] = OMIT, filename: typing.Optional[str] = OMIT, request_options: typing.Optional[RequestOptions] = None, ) -> Document: """ Upload files such as PDFs and Word Documents for automatic text extraction and metadata parsing. The request expects a `multipart/form-data` format containing the following parts: - - - `metadata` - (Optional) Specifies a JSON object representing any additional metadata to be associated with the extracted document. For example, `'metadata={"key": "value"};type=application/json'` - - `chunking_strategy` - (Optional) Specifies the chunking strategy for the platform to use. If you do not set this option, the platform uses the default strategy, which creates one chunk per sentence. For example, `'chunking_strategy={"type":"max_chars_chunking_strategy","max_chars_per_chunk":200};type=application/json'` - - `file` - Specifies the file that you want to upload. - - `filename` - Specified as part of the file field with the file name that you want to associate with the uploaded file. For a curl example, use the following syntax: `'file=@/path/to/file/file.pdf;filename=desired_filename.pdf'` + * `metadata` - (Optional) Specifies a JSON object representing any additional metadata to be associated with the extracted document. For example, `'metadata={"key": "value"};type=application/json'` + * `chunking_strategy` - (Optional) Specifies the chunking strategy for the platform to use. If you do not set this option, the platform uses the default strategy, which creates one chunk per sentence. For example, `'chunking_strategy={"type":"max_chars_chunking_strategy","max_chars_per_chunk":200};type=application/json'` + * `table_extraction_config` - (Optional) Specifies whether to extract table data from the uploaded file. If you do not set this option, the platform does not extract tables from PDF files. Example config, `'table_extraction_config={"extract_tables":true};type=application/json'` + * `file` - Specifies the file that you want to upload. + * `filename` - Specified as part of the file field with the file name that you want to associate with the uploaded file. For a curl example, use the following syntax: `'file=@/path/to/file/file.pdf;filename=desired_filename.pdf'` For more detailed information, see this [File Upload API guide.](https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload) @@ -215,6 +231,8 @@ async def file( chunking_strategy : typing.Optional[ComponentsSchemasMaxCharsChunkingStrategy] + table_extraction_config : typing.Optional[TableExtractionConfig] + filename : typing.Optional[str] Optional multipart section to override the filename. @@ -263,6 +281,17 @@ async def main() -> None: if chunking_strategy is not OMIT else {} ), + **( + { + "table_extraction_config": ( + None, + json.dumps(jsonable_encoder(table_extraction_config)), + "application/json", + ) + } + if table_extraction_config is not OMIT + else {} + ), **( {"filename": (None, json.dumps(jsonable_encoder(filename)), "text/plain")} if filename is not OMIT