diff --git a/code/backend/batch/utilities/helpers/env_helper.py b/code/backend/batch/utilities/helpers/env_helper.py index b860a79c2..7319c0499 100644 --- a/code/backend/batch/utilities/helpers/env_helper.py +++ b/code/backend/batch/utilities/helpers/env_helper.py @@ -36,8 +36,8 @@ def __load_config(self, **kwargs) -> None: # Azure Search self.AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE", "") self.AZURE_SEARCH_INDEX = os.getenv("AZURE_SEARCH_INDEX", "") - self.AZURE_SEARCH_USE_SEMANTIC_SEARCH = ( - os.getenv("AZURE_SEARCH_USE_SEMANTIC_SEARCH", "False").lower() == "true" + self.AZURE_SEARCH_USE_SEMANTIC_SEARCH = self.get_env_var_bool( + "AZURE_SEARCH_USE_SEMANTIC_SEARCH", "False" ) self.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = os.getenv( "AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG", "default" diff --git a/code/backend/batch/utilities/search/azure_search_handler.py b/code/backend/batch/utilities/search/azure_search_handler.py index 064454747..815e6a89f 100644 --- a/code/backend/batch/utilities/search/azure_search_handler.py +++ b/code/backend/batch/utilities/search/azure_search_handler.py @@ -85,7 +85,7 @@ def _semantic_search(self, question: str, tokenised_question: list[int]): ], filter=self.env_helper.AZURE_SEARCH_FILTER, query_type="semantic", - semantic_configuration_name=self.env_helper.AZURE_SEARCH_SEMANTIC_CONFIG_NAME, + semantic_configuration_name=self.env_helper.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG, query_caption="extractive", query_answer="extractive", top=self.env_helper.AZURE_SEARCH_TOP_K, diff --git a/code/backend/batch/utilities/search/integrated_vectorization_search_handler.py b/code/backend/batch/utilities/search/integrated_vectorization_search_handler.py index a85b6e44c..bba5c4106 100644 --- a/code/backend/batch/utilities/search/integrated_vectorization_search_handler.py +++ b/code/backend/batch/utilities/search/integrated_vectorization_search_handler.py @@ -71,19 +71,43 @@ def delete_files(self, files): def query_search(self, question) -> List[SourceDocument]: if self._check_index_exists(): - vector_query = VectorizableTextQuery( - text=question, - k_nearest_neighbors=self.env_helper.AZURE_SEARCH_TOP_K, - fields="content_vector", - exhaustive=True, - ) - search_results = self.search_client.search( - search_text=question, - vector_queries=[vector_query], - top=self.env_helper.AZURE_SEARCH_TOP_K, - ) + if self.env_helper.AZURE_SEARCH_USE_SEMANTIC_SEARCH: + search_results = self._semantic_search(question) + else: + search_results = self._hybrid_search(question) return self._convert_to_source_documents(search_results) + def _hybrid_search(self, question: str): + vector_query = VectorizableTextQuery( + text=question, + k_nearest_neighbors=self.env_helper.AZURE_SEARCH_TOP_K, + fields="content_vector", + exhaustive=True, + ) + return self.search_client.search( + search_text=question, + vector_queries=[vector_query], + top=self.env_helper.AZURE_SEARCH_TOP_K, + ) + + def _semantic_search(self, question: str): + vector_query = VectorizableTextQuery( + text=question, + k_nearest_neighbors=self.env_helper.AZURE_SEARCH_TOP_K, + fields="content_vector", + exhaustive=True, + ) + return self.search_client.search( + search_text=question, + vector_queries=[vector_query], + filter=self.env_helper.AZURE_SEARCH_FILTER, + query_type="semantic", + semantic_configuration_name=self.env_helper.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG, + query_caption="extractive", + query_answer="extractive", + top=self.env_helper.AZURE_SEARCH_TOP_K, + ) + def _convert_to_source_documents(self, search_results) -> List[SourceDocument]: source_documents = [] for source in search_results: diff --git a/code/tests/search_utilities/test_azure_search_handler.py b/code/tests/search_utilities/test_azure_search_handler.py index 1841d8dfa..fa23780ae 100644 --- a/code/tests/search_utilities/test_azure_search_handler.py +++ b/code/tests/search_utilities/test_azure_search_handler.py @@ -209,7 +209,7 @@ def test_query_search_performs_semantic_search( ], filter=handler.env_helper.AZURE_SEARCH_FILTER, query_type="semantic", - semantic_configuration_name=handler.env_helper.AZURE_SEARCH_SEMANTIC_CONFIG_NAME, + semantic_configuration_name=handler.env_helper.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG, query_caption="extractive", query_answer="extractive", top=handler.env_helper.AZURE_SEARCH_TOP_K, diff --git a/code/tests/search_utilities/test_integrated_vectorization_search_handler.py b/code/tests/search_utilities/test_integrated_vectorization_search_handler.py index c85e67107..b721bc4a1 100644 --- a/code/tests/search_utilities/test_integrated_vectorization_search_handler.py +++ b/code/tests/search_utilities/test_integrated_vectorization_search_handler.py @@ -161,9 +161,10 @@ def test_get_files(handler, search_client_mock): ) -def test_query_search_performs_search(handler, env_helper_mock): +def test_query_search_performs_search_hybrid(handler, env_helper_mock): # given question = "test question" + env_helper_mock.AZURE_SEARCH_USE_SEMANTIC_SEARCH = False vector_query = VectorizableTextQuery( text=question, k_nearest_neighbors=env_helper_mock.AZURE_SEARCH_TOP_K, @@ -182,6 +183,34 @@ def test_query_search_performs_search(handler, env_helper_mock): ) +def test_query_search_performs_search_semantic(handler, env_helper_mock): + # given + question = "test question" + env_helper_mock.AZURE_SEARCH_USE_SEMANTIC_SEARCH = True + env_helper_mock.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = "some-semantic-config" + vector_query = VectorizableTextQuery( + text=question, + k_nearest_neighbors=env_helper_mock.AZURE_SEARCH_TOP_K, + fields="content_vector", + exhaustive=True, + ) + + # when + handler.query_search(question) + + # then + handler.search_client.search.assert_called_once_with( + search_text=question, + vector_queries=[vector_query], + filter=env_helper_mock.AZURE_SEARCH_FILTER, + query_type="semantic", + semantic_configuration_name=env_helper_mock.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG, + query_caption="extractive", + query_answer="extractive", + top=env_helper_mock.AZURE_SEARCH_TOP_K, + ) + + def test_query_search_converts_results_to_source_documents(handler): # given question = "test question" diff --git a/infra/main.bicep b/infra/main.bicep index 461fb0816..81a59711b 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -55,7 +55,7 @@ param applicationInsightsName string = 'appinsights-${resourceToken}' param workbookDisplayName string = 'workbook-${resourceToken}' @description('Use semantic search') -param azureSearchUseSemanticSearch string = 'false' +param azureSearchUseSemanticSearch bool = false @description('Semantic search config') param azureSearchSemanticSearchConfig string = 'default' @@ -458,6 +458,7 @@ module search './core/search/search-services.bicep' = { aadAuthFailureMode: 'http403' } } + semanticSearch: azureSearchUseSemanticSearch ? 'free' : null } } @@ -1027,7 +1028,7 @@ output AZURE_OPENAI_API_KEY string = useKeyVault ? storekeys.outputs.OPENAI_KEY_ output AZURE_RESOURCE_GROUP string = rgName output AZURE_SEARCH_KEY string = useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : '' output AZURE_SEARCH_SERVICE string = search.outputs.endpoint -output AZURE_SEARCH_USE_SEMANTIC_SEARCH string = azureSearchUseSemanticSearch +output AZURE_SEARCH_USE_SEMANTIC_SEARCH bool = azureSearchUseSemanticSearch output AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG string = azureSearchSemanticSearchConfig output AZURE_SEARCH_INDEX_IS_PRECHUNKED string = azureSearchIndexIsPrechunked output AZURE_SEARCH_TOP_K string = azureSearchTopK diff --git a/infra/main.bicepparam b/infra/main.bicepparam index e19c2656e..01dc369ab 100644 --- a/infra/main.bicepparam +++ b/infra/main.bicepparam @@ -13,7 +13,7 @@ param hostingModel = readEnvironmentVariable('AZURE_APP_SERVICE_HOSTING_MODEL', // Feature flags param azureSearchUseIntegratedVectorization = bool(readEnvironmentVariable('AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION', 'false')) -param azureSearchUseSemanticSearch = readEnvironmentVariable('AZURE_SEARCH_USE_SEMANTIC_SEARCH', 'false') +param azureSearchUseSemanticSearch = bool(readEnvironmentVariable('AZURE_SEARCH_USE_SEMANTIC_SEARCH', 'false')) param orchestrationStrategy = readEnvironmentVariable('ORCHESTRATION_STRATEGY', 'openai_function') param logLevel = readEnvironmentVariable('LOGLEVEL', 'INFO') param recognizedLanguages = readEnvironmentVariable('AZURE_SPEECH_RECOGNIZER_LANGUAGES', 'en-US,fr-FR,de-DE,it-IT') diff --git a/infra/main.json b/infra/main.json index 9404b20cc..4939310fd 100644 --- a/infra/main.json +++ b/infra/main.json @@ -5,7 +5,7 @@ "_generator": { "name": "bicep", "version": "0.27.1.19265", - "templateHash": "9021391279672164541" + "templateHash": "5735866947841378196" } }, "parameters": { @@ -100,8 +100,8 @@ } }, "azureSearchUseSemanticSearch": { - "type": "string", - "defaultValue": "false", + "type": "bool", + "defaultValue": false, "metadata": { "description": "Use semantic search" } @@ -1668,7 +1668,8 @@ "aadAuthFailureMode": "http403" } } - } + }, + "semanticSearch": "[if(parameters('azureSearchUseSemanticSearch'), createObject('value', 'free'), createObject('value', null()))]" }, "template": { "$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#", @@ -10913,7 +10914,7 @@ "value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('azureAISearchName')), '2022-09-01').outputs.endpoint.value]" }, "AZURE_SEARCH_USE_SEMANTIC_SEARCH": { - "type": "string", + "type": "bool", "value": "[parameters('azureSearchUseSemanticSearch')]" }, "AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG": {