diff --git a/src/vectara/base_client.py b/src/vectara/base_client.py
index a606e87..bbe33ba 100644
--- a/src/vectara/base_client.py
+++ b/src/vectara/base_client.py
@@ -10,6 +10,7 @@
 from .corpora.client import CorporaClient
 from .upload.client import UploadClient
 from .documents.client import DocumentsClient
+from .index.client import IndexClient
 from .chats.client import ChatsClient
 from .llms.client import LlmsClient
 from .generation_presets.client import GenerationPresetsClient
@@ -19,12 +20,14 @@
 from .users.client import UsersClient
 from .api_keys.client import ApiKeysClient
 from .app_clients.client import AppClientsClient
+from .query_history.client import QueryHistoryClient
 from .auth.client import AuthClient
 from .types.search_corpora_parameters import SearchCorporaParameters
 from .types.generation_parameters import GenerationParameters
 from .core.request_options import RequestOptions
 from .types.query_streamed_response import QueryStreamedResponse
 from .core.serialization import convert_and_respect_annotation_metadata
+import httpx_sse
 from .core.pydantic_utilities import parse_obj_as
 import json
 from .errors.bad_request_error import BadRequestError
@@ -42,6 +45,7 @@
 from .corpora.client import AsyncCorporaClient
 from .upload.client import AsyncUploadClient
 from .documents.client import AsyncDocumentsClient
+from .index.client import AsyncIndexClient
 from .chats.client import AsyncChatsClient
 from .llms.client import AsyncLlmsClient
 from .generation_presets.client import AsyncGenerationPresetsClient
@@ -51,6 +55,7 @@
 from .users.client import AsyncUsersClient
 from .api_keys.client import AsyncApiKeysClient
 from .app_clients.client import AsyncAppClientsClient
+from .query_history.client import AsyncQueryHistoryClient
 from .auth.client import AsyncAuthClient
 
 # this is used as the default value for optional parameters
@@ -151,6 +156,7 @@ def __init__(
         self.corpora = CorporaClient(client_wrapper=self._client_wrapper)
         self.upload = UploadClient(client_wrapper=self._client_wrapper)
         self.documents = DocumentsClient(client_wrapper=self._client_wrapper)
+        self.index = IndexClient(client_wrapper=self._client_wrapper)
         self.chats = ChatsClient(client_wrapper=self._client_wrapper)
         self.llms = LlmsClient(client_wrapper=self._client_wrapper)
         self.generation_presets = GenerationPresetsClient(client_wrapper=self._client_wrapper)
@@ -160,6 +166,7 @@ def __init__(
         self.users = UsersClient(client_wrapper=self._client_wrapper)
         self.api_keys = ApiKeysClient(client_wrapper=self._client_wrapper)
         self.app_clients = AppClientsClient(client_wrapper=self._client_wrapper)
+        self.query_history = QueryHistoryClient(client_wrapper=self._client_wrapper)
         self.auth = AuthClient(client_wrapper=self._client_wrapper)
 
     def query_stream(
@@ -170,17 +177,19 @@ def query_stream(
         request_timeout: typing.Optional[int] = None,
         request_timeout_millis: typing.Optional[int] = None,
         generation: typing.Optional[GenerationParameters] = OMIT,
+        save_history: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.Iterator[QueryStreamedResponse]:
         """
-        Perform a multi-purpose query to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
+        Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
 
-        - Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
-        - Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
-          will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
-        - Specify a RAG-specific LLM like Mockingbird (`mockingbird-1.0-2024-07-16`) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
-        - Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
-        - Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
+        * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
+        * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
+        * Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
+        will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
+        * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
+        * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
+        * Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
 
         For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search).
 
@@ -199,6 +208,9 @@ def query_stream(
 
         generation : typing.Optional[GenerationParameters]
 
+        save_history : typing.Optional[bool]
+            Indicates whether to save the query in the query history.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -212,10 +224,8 @@ def query_stream(
         from vectara import (
             CitationParameters,
             ContextConfiguration,
-            CustomerSpecificReranker,
             GenerationParameters,
             KeyedSearchCorpus,
-            ModelParameters,
             SearchCorporaParameters,
             Vectara,
         )
@@ -226,53 +236,28 @@ def query_stream(
             client_secret="YOUR_CLIENT_SECRET",
         )
         response = client.query_stream(
-            request_timeout=1,
-            request_timeout_millis=1,
-            query="string",
+            query="hello, world?",
             search=SearchCorporaParameters(
                 corpora=[
                     KeyedSearchCorpus(
-                        custom_dimensions={"string": 1.1},
-                        metadata_filter="string",
-                        lexical_interpolation=1.1,
-                        semantics="default",
+                        lexical_interpolation=0.005,
                     )
                 ],
-                offset=1,
-                limit=1,
+                offset=0,
+                limit=10,
                 context_configuration=ContextConfiguration(
-                    characters_before=1,
-                    characters_after=1,
-                    sentences_before=1,
-                    sentences_after=1,
-                    start_tag="string",
-                    end_tag="string",
-                ),
-                reranker=CustomerSpecificReranker(
-                    reranker_id="string",
-                    reranker_name="string",
+                    sentences_before=2,
+                    sentences_after=2,
+                    start_tag="<em>",
+                    end_tag="</em>",
                 ),
             ),
             generation=GenerationParameters(
-                generation_preset_name="string",
-                prompt_name="string",
-                max_used_search_results=1,
-                prompt_template="string",
-                prompt_text="string",
-                max_response_characters=1,
-                response_language="auto",
-                model_parameters=ModelParameters(
-                    max_tokens=1,
-                    temperature=1.1,
-                    frequency_penalty=1.1,
-                    presence_penalty=1.1,
-                ),
+                max_used_search_results=5,
                 citations=CitationParameters(
                     style="none",
-                    url_pattern="string",
-                    text_pattern="string",
                 ),
-                enable_factual_consistency_score=True,
+                response_language="auto",
             ),
         )
         for chunk in response:
@@ -290,6 +275,7 @@ def query_stream(
                 "generation": convert_and_respect_annotation_metadata(
                     object_=generation, annotation=GenerationParameters, direction="write"
                 ),
+                "save_history": save_history,
                 "stream_response": True,
             },
             headers={
@@ -301,15 +287,14 @@ def query_stream(
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    for _text in _response.iter_lines():
+                    _event_source = httpx_sse.EventSource(_response)
+                    for _sse in _event_source.iter_sse():
                         try:
-                            if len(_text) == 0:
-                                continue
                             yield typing.cast(
                                 QueryStreamedResponse,
                                 parse_obj_as(
                                     type_=QueryStreamedResponse,  # type: ignore
-                                    object_=json.loads(_text),
+                                    object_=json.loads(_sse.data),
                                 ),
                             )
                         except:
@@ -359,17 +344,19 @@ def query(
         request_timeout: typing.Optional[int] = None,
         request_timeout_millis: typing.Optional[int] = None,
         generation: typing.Optional[GenerationParameters] = OMIT,
+        save_history: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> QueryFullResponse:
         """
-        Perform a multi-purpose query to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
+        Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
 
-        - Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
-        - Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
-          will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
-        - Specify a RAG-specific LLM like Mockingbird (`mockingbird-1.0-2024-07-16`) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
-        - Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
-        - Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
+        * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
+        * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
+        * Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
+        will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
+        * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
+        * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
+        * Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
 
         For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search).
 
@@ -388,6 +375,9 @@ def query(
 
         generation : typing.Optional[GenerationParameters]
 
+        save_history : typing.Optional[bool]
+            Indicates whether to save the query in the query history.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -422,6 +412,7 @@ def query(
                 "generation": convert_and_respect_annotation_metadata(
                     object_=generation, annotation=GenerationParameters, direction="write"
                 ),
+                "save_history": save_history,
                 "stream_response": False,
             },
             headers={
@@ -484,6 +475,7 @@ def chat_stream(
         request_timeout_millis: typing.Optional[int] = None,
         generation: typing.Optional[GenerationParameters] = OMIT,
         chat: typing.Optional[ChatParameters] = OMIT,
+        save_history: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.Iterator[ChatStreamedResponse]:
         """
@@ -506,6 +498,9 @@ def chat_stream(
 
         chat : typing.Optional[ChatParameters]
 
+        save_history : typing.Optional[bool]
+            Indicates whether to save the chat in both the chat and query history. This overrides `chat.store`.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -516,17 +511,7 @@ def chat_stream(
 
         Examples
         --------
-        from vectara import (
-            ChatParameters,
-            CitationParameters,
-            ContextConfiguration,
-            CustomerSpecificReranker,
-            GenerationParameters,
-            KeyedSearchCorpus,
-            ModelParameters,
-            SearchCorporaParameters,
-            Vectara,
-        )
+        from vectara import SearchCorporaParameters, Vectara
 
         client = Vectara(
             api_key="YOUR_API_KEY",
@@ -534,57 +519,8 @@ def chat_stream(
             client_secret="YOUR_CLIENT_SECRET",
         )
         response = client.chat_stream(
-            request_timeout=1,
-            request_timeout_millis=1,
-            query="string",
-            search=SearchCorporaParameters(
-                corpora=[
-                    KeyedSearchCorpus(
-                        custom_dimensions={"string": 1.1},
-                        metadata_filter="string",
-                        lexical_interpolation=1.1,
-                        semantics="default",
-                    )
-                ],
-                offset=1,
-                limit=1,
-                context_configuration=ContextConfiguration(
-                    characters_before=1,
-                    characters_after=1,
-                    sentences_before=1,
-                    sentences_after=1,
-                    start_tag="string",
-                    end_tag="string",
-                ),
-                reranker=CustomerSpecificReranker(
-                    reranker_id="string",
-                    reranker_name="string",
-                ),
-            ),
-            generation=GenerationParameters(
-                generation_preset_name="string",
-                prompt_name="string",
-                max_used_search_results=1,
-                prompt_template="string",
-                prompt_text="string",
-                max_response_characters=1,
-                response_language="auto",
-                model_parameters=ModelParameters(
-                    max_tokens=1,
-                    temperature=1.1,
-                    frequency_penalty=1.1,
-                    presence_penalty=1.1,
-                ),
-                citations=CitationParameters(
-                    style="none",
-                    url_pattern="string",
-                    text_pattern="string",
-                ),
-                enable_factual_consistency_score=True,
-            ),
-            chat=ChatParameters(
-                store=True,
-            ),
+            query="How can I use the Vectara platform?",
+            search=SearchCorporaParameters(),
         )
         for chunk in response:
             yield chunk
@@ -604,6 +540,7 @@ def chat_stream(
                 "chat": convert_and_respect_annotation_metadata(
                     object_=chat, annotation=ChatParameters, direction="write"
                 ),
+                "save_history": save_history,
                 "stream_response": True,
             },
             headers={
@@ -615,15 +552,14 @@ def chat_stream(
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    for _text in _response.iter_lines():
+                    _event_source = httpx_sse.EventSource(_response)
+                    for _sse in _event_source.iter_sse():
                         try:
-                            if len(_text) == 0:
-                                continue
                             yield typing.cast(
                                 ChatStreamedResponse,
                                 parse_obj_as(
                                     type_=ChatStreamedResponse,  # type: ignore
-                                    object_=json.loads(_text),
+                                    object_=json.loads(_sse.data),
                                 ),
                             )
                         except:
@@ -674,6 +610,7 @@ def chat(
         request_timeout_millis: typing.Optional[int] = None,
         generation: typing.Optional[GenerationParameters] = OMIT,
         chat: typing.Optional[ChatParameters] = OMIT,
+        save_history: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> ChatFullResponse:
         """
@@ -696,6 +633,9 @@ def chat(
 
         chat : typing.Optional[ChatParameters]
 
+        save_history : typing.Optional[bool]
+            Indicates whether to save the chat in both the chat and query history. This overrides `chat.store`.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -733,6 +673,7 @@ def chat(
                 "chat": convert_and_respect_annotation_metadata(
                     object_=chat, annotation=ChatParameters, direction="write"
                 ),
+                "save_history": save_history,
                 "stream_response": False,
             },
             headers={
@@ -881,6 +822,7 @@ def __init__(
         self.corpora = AsyncCorporaClient(client_wrapper=self._client_wrapper)
         self.upload = AsyncUploadClient(client_wrapper=self._client_wrapper)
         self.documents = AsyncDocumentsClient(client_wrapper=self._client_wrapper)
+        self.index = AsyncIndexClient(client_wrapper=self._client_wrapper)
         self.chats = AsyncChatsClient(client_wrapper=self._client_wrapper)
         self.llms = AsyncLlmsClient(client_wrapper=self._client_wrapper)
         self.generation_presets = AsyncGenerationPresetsClient(client_wrapper=self._client_wrapper)
@@ -890,6 +832,7 @@ def __init__(
         self.users = AsyncUsersClient(client_wrapper=self._client_wrapper)
         self.api_keys = AsyncApiKeysClient(client_wrapper=self._client_wrapper)
         self.app_clients = AsyncAppClientsClient(client_wrapper=self._client_wrapper)
+        self.query_history = AsyncQueryHistoryClient(client_wrapper=self._client_wrapper)
         self.auth = AsyncAuthClient(client_wrapper=self._client_wrapper)
 
     async def query_stream(
@@ -900,17 +843,19 @@ async def query_stream(
         request_timeout: typing.Optional[int] = None,
         request_timeout_millis: typing.Optional[int] = None,
         generation: typing.Optional[GenerationParameters] = OMIT,
+        save_history: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.AsyncIterator[QueryStreamedResponse]:
         """
-        Perform a multi-purpose query to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
+        Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
 
-        - Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
-        - Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
-          will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
-        - Specify a RAG-specific LLM like Mockingbird (`mockingbird-1.0-2024-07-16`) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
-        - Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
-        - Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
+        * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
+        * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
+        * Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
+        will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
+        * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
+        * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
+        * Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
 
         For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search).
 
@@ -929,6 +874,9 @@ async def query_stream(
 
         generation : typing.Optional[GenerationParameters]
 
+        save_history : typing.Optional[bool]
+            Indicates whether to save the query in the query history.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -945,10 +893,8 @@ async def query_stream(
             AsyncVectara,
             CitationParameters,
             ContextConfiguration,
-            CustomerSpecificReranker,
             GenerationParameters,
             KeyedSearchCorpus,
-            ModelParameters,
             SearchCorporaParameters,
         )
 
@@ -961,53 +907,28 @@ async def query_stream(
 
         async def main() -> None:
             response = await client.query_stream(
-                request_timeout=1,
-                request_timeout_millis=1,
-                query="string",
+                query="hello, world?",
                 search=SearchCorporaParameters(
                     corpora=[
                         KeyedSearchCorpus(
-                            custom_dimensions={"string": 1.1},
-                            metadata_filter="string",
-                            lexical_interpolation=1.1,
-                            semantics="default",
+                            lexical_interpolation=0.005,
                         )
                     ],
-                    offset=1,
-                    limit=1,
+                    offset=0,
+                    limit=10,
                     context_configuration=ContextConfiguration(
-                        characters_before=1,
-                        characters_after=1,
-                        sentences_before=1,
-                        sentences_after=1,
-                        start_tag="string",
-                        end_tag="string",
-                    ),
-                    reranker=CustomerSpecificReranker(
-                        reranker_id="string",
-                        reranker_name="string",
+                        sentences_before=2,
+                        sentences_after=2,
+                        start_tag="<em>",
+                        end_tag="</em>",
                     ),
                 ),
                 generation=GenerationParameters(
-                    generation_preset_name="string",
-                    prompt_name="string",
-                    max_used_search_results=1,
-                    prompt_template="string",
-                    prompt_text="string",
-                    max_response_characters=1,
-                    response_language="auto",
-                    model_parameters=ModelParameters(
-                        max_tokens=1,
-                        temperature=1.1,
-                        frequency_penalty=1.1,
-                        presence_penalty=1.1,
-                    ),
+                    max_used_search_results=5,
                     citations=CitationParameters(
                         style="none",
-                        url_pattern="string",
-                        text_pattern="string",
                     ),
-                    enable_factual_consistency_score=True,
+                    response_language="auto",
                 ),
             )
             async for chunk in response:
@@ -1028,6 +949,7 @@ async def main() -> None:
                 "generation": convert_and_respect_annotation_metadata(
                     object_=generation, annotation=GenerationParameters, direction="write"
                 ),
+                "save_history": save_history,
                 "stream_response": True,
             },
             headers={
@@ -1039,15 +961,14 @@ async def main() -> None:
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_lines():
+                    _event_source = httpx_sse.EventSource(_response)
+                    async for _sse in _event_source.aiter_sse():
                         try:
-                            if len(_text) == 0:
-                                continue
                             yield typing.cast(
                                 QueryStreamedResponse,
                                 parse_obj_as(
                                     type_=QueryStreamedResponse,  # type: ignore
-                                    object_=json.loads(_text),
+                                    object_=json.loads(_sse.data),
                                 ),
                             )
                         except:
@@ -1097,17 +1018,19 @@ async def query(
         request_timeout: typing.Optional[int] = None,
         request_timeout_millis: typing.Optional[int] = None,
         generation: typing.Optional[GenerationParameters] = OMIT,
+        save_history: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> QueryFullResponse:
         """
-        Perform a multi-purpose query to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
+        Perform a multipurpose query across to retrieve relevant information from one or more corpora and generate a response using Retrieval Augmented Generation (RAG).
 
-        - Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
-        - Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
-          will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
-        - Specify a RAG-specific LLM like Mockingbird (`mockingbird-1.0-2024-07-16`) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
-        - Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
-        - Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
+        * Specify the unique `corpus_key` identifying the corpus to query. The `corpus_key` is [created in the Vectara Console UI](https://docs.vectara.com/docs/console-ui/creating-a-corpus) or the [Create Corpus API definition](https://docs.vectara.com/docs/api-reference/admin-apis/create-corpus). When creating a new corpus, you have the option to assign a custom `corpus_key` following your preferred naming convention. This key serves as a unique identifier for the corpus, allowing it to be referenced in search requests. For more information, see [Corpus Key Definition](https://docs.vectara.com/docs/api-reference/search-apis/search#corpus-key-definition).
+        * Customize your search by specifying the query text (`query`), pagination details (`offset` and `limit`), and metadata filters (`metadata_filter`) to tailor your search results. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#query-definition)
+        * Leverage advanced search capabilities like reranking (`reranker`) and opt-in Retrieval Augmented Generation (RAG) (`generation`) for enhanced query performance. Generation is opt in by setting the `generation` property. By excluding the property or by setting it to null, the response
+        will not include generation. [Learn more](https://docs.vectara.com/docs/learn/grounded-generation/configure-query-summarization)
+        * Specify Vectara's RAG-focused LLM (Mockingbird) for the `generation_preset_name`. [Learn more](https://docs.vectara.com/docs/learn/mockingbird-llm)
+        * Use advanced summarization options that utilize detailed summarization parameters such as `max_response_characters`, `temperature`, and `frequency_penalty` for generating precise and relevant summaries. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#advanced-summarization-customization-options)
+        * Customize citation formats in summaries using the `citations` object to include numeric, HTML, or Markdown links. [Learn more](https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary)
 
         For more detailed information, see this [Query API guide](https://docs.vectara.com/docs/api-reference/search-apis/search).
 
@@ -1126,6 +1049,9 @@ async def query(
 
         generation : typing.Optional[GenerationParameters]
 
+        save_history : typing.Optional[bool]
+            Indicates whether to save the query in the query history.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -1168,6 +1094,7 @@ async def main() -> None:
                 "generation": convert_and_respect_annotation_metadata(
                     object_=generation, annotation=GenerationParameters, direction="write"
                 ),
+                "save_history": save_history,
                 "stream_response": False,
             },
             headers={
@@ -1230,6 +1157,7 @@ async def chat_stream(
         request_timeout_millis: typing.Optional[int] = None,
         generation: typing.Optional[GenerationParameters] = OMIT,
         chat: typing.Optional[ChatParameters] = OMIT,
+        save_history: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.AsyncIterator[ChatStreamedResponse]:
         """
@@ -1252,6 +1180,9 @@ async def chat_stream(
 
         chat : typing.Optional[ChatParameters]
 
+        save_history : typing.Optional[bool]
+            Indicates whether to save the chat in both the chat and query history. This overrides `chat.store`.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -1264,17 +1195,7 @@ async def chat_stream(
         --------
         import asyncio
 
-        from vectara import (
-            AsyncVectara,
-            ChatParameters,
-            CitationParameters,
-            ContextConfiguration,
-            CustomerSpecificReranker,
-            GenerationParameters,
-            KeyedSearchCorpus,
-            ModelParameters,
-            SearchCorporaParameters,
-        )
+        from vectara import AsyncVectara, SearchCorporaParameters
 
         client = AsyncVectara(
             api_key="YOUR_API_KEY",
@@ -1285,57 +1206,8 @@ async def chat_stream(
 
         async def main() -> None:
             response = await client.chat_stream(
-                request_timeout=1,
-                request_timeout_millis=1,
-                query="string",
-                search=SearchCorporaParameters(
-                    corpora=[
-                        KeyedSearchCorpus(
-                            custom_dimensions={"string": 1.1},
-                            metadata_filter="string",
-                            lexical_interpolation=1.1,
-                            semantics="default",
-                        )
-                    ],
-                    offset=1,
-                    limit=1,
-                    context_configuration=ContextConfiguration(
-                        characters_before=1,
-                        characters_after=1,
-                        sentences_before=1,
-                        sentences_after=1,
-                        start_tag="string",
-                        end_tag="string",
-                    ),
-                    reranker=CustomerSpecificReranker(
-                        reranker_id="string",
-                        reranker_name="string",
-                    ),
-                ),
-                generation=GenerationParameters(
-                    generation_preset_name="string",
-                    prompt_name="string",
-                    max_used_search_results=1,
-                    prompt_template="string",
-                    prompt_text="string",
-                    max_response_characters=1,
-                    response_language="auto",
-                    model_parameters=ModelParameters(
-                        max_tokens=1,
-                        temperature=1.1,
-                        frequency_penalty=1.1,
-                        presence_penalty=1.1,
-                    ),
-                    citations=CitationParameters(
-                        style="none",
-                        url_pattern="string",
-                        text_pattern="string",
-                    ),
-                    enable_factual_consistency_score=True,
-                ),
-                chat=ChatParameters(
-                    store=True,
-                ),
+                query="How can I use the Vectara platform?",
+                search=SearchCorporaParameters(),
             )
             async for chunk in response:
                 yield chunk
@@ -1358,6 +1230,7 @@ async def main() -> None:
                 "chat": convert_and_respect_annotation_metadata(
                     object_=chat, annotation=ChatParameters, direction="write"
                 ),
+                "save_history": save_history,
                 "stream_response": True,
             },
             headers={
@@ -1369,15 +1242,14 @@ async def main() -> None:
         ) as _response:
             try:
                 if 200 <= _response.status_code < 300:
-                    async for _text in _response.aiter_lines():
+                    _event_source = httpx_sse.EventSource(_response)
+                    async for _sse in _event_source.aiter_sse():
                         try:
-                            if len(_text) == 0:
-                                continue
                             yield typing.cast(
                                 ChatStreamedResponse,
                                 parse_obj_as(
                                     type_=ChatStreamedResponse,  # type: ignore
-                                    object_=json.loads(_text),
+                                    object_=json.loads(_sse.data),
                                 ),
                             )
                         except:
@@ -1428,6 +1300,7 @@ async def chat(
         request_timeout_millis: typing.Optional[int] = None,
         generation: typing.Optional[GenerationParameters] = OMIT,
         chat: typing.Optional[ChatParameters] = OMIT,
+        save_history: typing.Optional[bool] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> ChatFullResponse:
         """
@@ -1450,6 +1323,9 @@ async def chat(
 
         chat : typing.Optional[ChatParameters]
 
+        save_history : typing.Optional[bool]
+            Indicates whether to save the chat in both the chat and query history. This overrides `chat.store`.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -1495,6 +1371,7 @@ async def main() -> None:
                 "chat": convert_and_respect_annotation_metadata(
                     object_=chat, annotation=ChatParameters, direction="write"
                 ),
+                "save_history": save_history,
                 "stream_response": False,
             },
             headers={