Skip to content

Commit

Permalink
feat: [IV] Enable semantic search when semantic search true (#942)
Browse files Browse the repository at this point in the history
  • Loading branch information
komalg1 authored May 21, 2024
1 parent 5feca88 commit 0d1f2e0
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 24 deletions.
4 changes: 2 additions & 2 deletions code/backend/batch/utilities/helpers/env_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def __load_config(self, **kwargs) -> None:
# Azure Search
self.AZURE_SEARCH_SERVICE = os.getenv("AZURE_SEARCH_SERVICE", "")
self.AZURE_SEARCH_INDEX = os.getenv("AZURE_SEARCH_INDEX", "")
self.AZURE_SEARCH_USE_SEMANTIC_SEARCH = (
os.getenv("AZURE_SEARCH_USE_SEMANTIC_SEARCH", "False").lower() == "true"
self.AZURE_SEARCH_USE_SEMANTIC_SEARCH = self.get_env_var_bool(
"AZURE_SEARCH_USE_SEMANTIC_SEARCH", "False"
)
self.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = os.getenv(
"AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG", "default"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def _semantic_search(self, question: str, tokenised_question: list[int]):
],
filter=self.env_helper.AZURE_SEARCH_FILTER,
query_type="semantic",
semantic_configuration_name=self.env_helper.AZURE_SEARCH_SEMANTIC_CONFIG_NAME,
semantic_configuration_name=self.env_helper.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG,
query_caption="extractive",
query_answer="extractive",
top=self.env_helper.AZURE_SEARCH_TOP_K,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,19 +71,43 @@ def delete_files(self, files):

def query_search(self, question) -> List[SourceDocument]:
if self._check_index_exists():
vector_query = VectorizableTextQuery(
text=question,
k_nearest_neighbors=self.env_helper.AZURE_SEARCH_TOP_K,
fields="content_vector",
exhaustive=True,
)
search_results = self.search_client.search(
search_text=question,
vector_queries=[vector_query],
top=self.env_helper.AZURE_SEARCH_TOP_K,
)
if self.env_helper.AZURE_SEARCH_USE_SEMANTIC_SEARCH:
search_results = self._semantic_search(question)
else:
search_results = self._hybrid_search(question)
return self._convert_to_source_documents(search_results)

def _hybrid_search(self, question: str):
vector_query = VectorizableTextQuery(
text=question,
k_nearest_neighbors=self.env_helper.AZURE_SEARCH_TOP_K,
fields="content_vector",
exhaustive=True,
)
return self.search_client.search(
search_text=question,
vector_queries=[vector_query],
top=self.env_helper.AZURE_SEARCH_TOP_K,
)

def _semantic_search(self, question: str):
vector_query = VectorizableTextQuery(
text=question,
k_nearest_neighbors=self.env_helper.AZURE_SEARCH_TOP_K,
fields="content_vector",
exhaustive=True,
)
return self.search_client.search(
search_text=question,
vector_queries=[vector_query],
filter=self.env_helper.AZURE_SEARCH_FILTER,
query_type="semantic",
semantic_configuration_name=self.env_helper.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG,
query_caption="extractive",
query_answer="extractive",
top=self.env_helper.AZURE_SEARCH_TOP_K,
)

def _convert_to_source_documents(self, search_results) -> List[SourceDocument]:
source_documents = []
for source in search_results:
Expand Down
2 changes: 1 addition & 1 deletion code/tests/search_utilities/test_azure_search_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def test_query_search_performs_semantic_search(
],
filter=handler.env_helper.AZURE_SEARCH_FILTER,
query_type="semantic",
semantic_configuration_name=handler.env_helper.AZURE_SEARCH_SEMANTIC_CONFIG_NAME,
semantic_configuration_name=handler.env_helper.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG,
query_caption="extractive",
query_answer="extractive",
top=handler.env_helper.AZURE_SEARCH_TOP_K,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,10 @@ def test_get_files(handler, search_client_mock):
)


def test_query_search_performs_search(handler, env_helper_mock):
def test_query_search_performs_search_hybrid(handler, env_helper_mock):
# given
question = "test question"
env_helper_mock.AZURE_SEARCH_USE_SEMANTIC_SEARCH = False
vector_query = VectorizableTextQuery(
text=question,
k_nearest_neighbors=env_helper_mock.AZURE_SEARCH_TOP_K,
Expand All @@ -182,6 +183,34 @@ def test_query_search_performs_search(handler, env_helper_mock):
)


def test_query_search_performs_search_semantic(handler, env_helper_mock):
# given
question = "test question"
env_helper_mock.AZURE_SEARCH_USE_SEMANTIC_SEARCH = True
env_helper_mock.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG = "some-semantic-config"
vector_query = VectorizableTextQuery(
text=question,
k_nearest_neighbors=env_helper_mock.AZURE_SEARCH_TOP_K,
fields="content_vector",
exhaustive=True,
)

# when
handler.query_search(question)

# then
handler.search_client.search.assert_called_once_with(
search_text=question,
vector_queries=[vector_query],
filter=env_helper_mock.AZURE_SEARCH_FILTER,
query_type="semantic",
semantic_configuration_name=env_helper_mock.AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG,
query_caption="extractive",
query_answer="extractive",
top=env_helper_mock.AZURE_SEARCH_TOP_K,
)


def test_query_search_converts_results_to_source_documents(handler):
# given
question = "test question"
Expand Down
5 changes: 3 additions & 2 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ param applicationInsightsName string = 'appinsights-${resourceToken}'
param workbookDisplayName string = 'workbook-${resourceToken}'

@description('Use semantic search')
param azureSearchUseSemanticSearch string = 'false'
param azureSearchUseSemanticSearch bool = false

@description('Semantic search config')
param azureSearchSemanticSearchConfig string = 'default'
Expand Down Expand Up @@ -458,6 +458,7 @@ module search './core/search/search-services.bicep' = {
aadAuthFailureMode: 'http403'
}
}
semanticSearch: azureSearchUseSemanticSearch ? 'free' : null
}
}

Expand Down Expand Up @@ -1027,7 +1028,7 @@ output AZURE_OPENAI_API_KEY string = useKeyVault ? storekeys.outputs.OPENAI_KEY_
output AZURE_RESOURCE_GROUP string = rgName
output AZURE_SEARCH_KEY string = useKeyVault ? storekeys.outputs.SEARCH_KEY_NAME : ''
output AZURE_SEARCH_SERVICE string = search.outputs.endpoint
output AZURE_SEARCH_USE_SEMANTIC_SEARCH string = azureSearchUseSemanticSearch
output AZURE_SEARCH_USE_SEMANTIC_SEARCH bool = azureSearchUseSemanticSearch
output AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG string = azureSearchSemanticSearchConfig
output AZURE_SEARCH_INDEX_IS_PRECHUNKED string = azureSearchIndexIsPrechunked
output AZURE_SEARCH_TOP_K string = azureSearchTopK
Expand Down
2 changes: 1 addition & 1 deletion infra/main.bicepparam
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ param hostingModel = readEnvironmentVariable('AZURE_APP_SERVICE_HOSTING_MODEL',

// Feature flags
param azureSearchUseIntegratedVectorization = bool(readEnvironmentVariable('AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION', 'false'))
param azureSearchUseSemanticSearch = readEnvironmentVariable('AZURE_SEARCH_USE_SEMANTIC_SEARCH', 'false')
param azureSearchUseSemanticSearch = bool(readEnvironmentVariable('AZURE_SEARCH_USE_SEMANTIC_SEARCH', 'false'))
param orchestrationStrategy = readEnvironmentVariable('ORCHESTRATION_STRATEGY', 'openai_function')
param logLevel = readEnvironmentVariable('LOGLEVEL', 'INFO')
param recognizedLanguages = readEnvironmentVariable('AZURE_SPEECH_RECOGNIZER_LANGUAGES', 'en-US,fr-FR,de-DE,it-IT')
Expand Down
11 changes: 6 additions & 5 deletions infra/main.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"_generator": {
"name": "bicep",
"version": "0.27.1.19265",
"templateHash": "9021391279672164541"
"templateHash": "5735866947841378196"
}
},
"parameters": {
Expand Down Expand Up @@ -100,8 +100,8 @@
}
},
"azureSearchUseSemanticSearch": {
"type": "string",
"defaultValue": "false",
"type": "bool",
"defaultValue": false,
"metadata": {
"description": "Use semantic search"
}
Expand Down Expand Up @@ -1668,7 +1668,8 @@
"aadAuthFailureMode": "http403"
}
}
}
},
"semanticSearch": "[if(parameters('azureSearchUseSemanticSearch'), createObject('value', 'free'), createObject('value', null()))]"
},
"template": {
"$schema": "https://schema.management.azure.com/schemas/2019-04-01/deploymentTemplate.json#",
Expand Down Expand Up @@ -10913,7 +10914,7 @@
"value": "[reference(extensionResourceId(format('/subscriptions/{0}/resourceGroups/{1}', subscription().subscriptionId, variables('rgName')), 'Microsoft.Resources/deployments', parameters('azureAISearchName')), '2022-09-01').outputs.endpoint.value]"
},
"AZURE_SEARCH_USE_SEMANTIC_SEARCH": {
"type": "string",
"type": "bool",
"value": "[parameters('azureSearchUseSemanticSearch')]"
},
"AZURE_SEARCH_SEMANTIC_SEARCH_CONFIG": {
Expand Down

0 comments on commit 0d1f2e0

Please sign in to comment.