diff --git a/sdk/ai/azure-ai-inference/_meta.json b/sdk/ai/azure-ai-inference/_meta.json new file mode 100644 index 000000000000..e62f3588ea70 --- /dev/null +++ b/sdk/ai/azure-ai-inference/_meta.json @@ -0,0 +1,6 @@ +{ + "commit": "7dca60dfb7fda9a5e4aaeb4494db564b992c5c43", + "repository_url": "https://github.com/Azure/azure-rest-api-specs", + "typespec_src": "specification/ai/ModelClient", + "@azure-tools/typespec-python": "0.38.1" +} \ No newline at end of file diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py index 5e73e91ea2b2..0cde08ffa7cc 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_client.py @@ -36,8 +36,8 @@ class ChatCompletionsClient(ChatCompletionsClientOperationsMixin): :param endpoint: Service host. Required. :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is one of the - following types: AzureKeyCredential, AzureKeyCredential, TokenCredential Required. + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is @@ -114,8 +114,8 @@ class EmbeddingsClient(EmbeddingsClientOperationsMixin): :param endpoint: Service host. Required. :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is one of the - following types: AzureKeyCredential, AzureKeyCredential, TokenCredential Required. + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is @@ -192,8 +192,8 @@ class ImageEmbeddingsClient(ImageEmbeddingsClientOperationsMixin): :param endpoint: Service host. Required. :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is one of the - following types: AzureKeyCredential, AzureKeyCredential, TokenCredential Required. + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py index 8158dd310196..894ec657140f 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_configuration.py @@ -25,8 +25,8 @@ class ChatCompletionsClientConfiguration: # pylint: disable=too-many-instance-a :param endpoint: Service host. Required. :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is one of the - following types: AzureKeyCredential, AzureKeyCredential, TokenCredential Required. + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is @@ -82,8 +82,8 @@ class EmbeddingsClientConfiguration: # pylint: disable=too-many-instance-attrib :param endpoint: Service host. Required. :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is one of the - following types: AzureKeyCredential, AzureKeyCredential, TokenCredential Required. + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is @@ -139,8 +139,8 @@ class ImageEmbeddingsClientConfiguration: # pylint: disable=too-many-instance-a :param endpoint: Service host. Required. :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is one of the - following types: AzureKeyCredential, AzureKeyCredential, TokenCredential Required. + :param credential: Credential used to authenticate requests to the service. Is either a key + credential type or a token credential type. Required. :type credential: ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.AzureKeyCredential or ~azure.core.credentials.TokenCredential :keyword api_version: The API version to use for this operation. Default value is diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py index 4122dde84de9..7f73b97b23ef 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_model_base.py @@ -1,4 +1,4 @@ -# pylint: disable=too-many-lines,arguments-differ,signature-differs,no-member +# pylint: disable=too-many-lines # coding=utf-8 # -------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py index 9c41e5e3f0d8..d79af04a49bf 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_operations/_operations.py @@ -1,4 +1,3 @@ -# pylint: disable=too-many-locals # coding=utf-8 # -------------------------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. @@ -194,7 +193,7 @@ def _complete( def _complete( self, *, - messages: List[_models._models.ChatRequestMessage], + messages: List[_models.ChatRequestMessage], extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, content_type: str = "application/json", frequency_penalty: Optional[float] = None, @@ -203,7 +202,7 @@ def _complete( temperature: Optional[float] = None, top_p: Optional[float] = None, max_tokens: Optional[int] = None, - response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None, + response_format: Optional[_models.ChatCompletionsResponseFormat] = None, stop: Optional[List[str]] = None, tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, tool_choice: Optional[ @@ -228,7 +227,7 @@ def _complete( self, body: Union[JSON, IO[bytes]] = _Unset, *, - messages: List[_models._models.ChatRequestMessage] = _Unset, + messages: List[_models.ChatRequestMessage] = _Unset, extra_params: Optional[Union[str, _models._enums.ExtraParameters]] = None, frequency_penalty: Optional[float] = None, stream_parameter: Optional[bool] = None, @@ -236,7 +235,7 @@ def _complete( temperature: Optional[float] = None, top_p: Optional[float] = None, max_tokens: Optional[int] = None, - response_format: Optional[_models._models.ChatCompletionsResponseFormat] = None, + response_format: Optional[_models.ChatCompletionsResponseFormat] = None, stop: Optional[List[str]] = None, tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, tool_choice: Optional[ @@ -259,7 +258,7 @@ def _complete( Typical usage begins with a chat message for the System role that provides instructions for the behavior of the assistant, followed by alternating messages between the User and Assistant roles. Required. - :paramtype messages: list[~azure.ai.inference.models._models.ChatRequestMessage] + :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] :keyword extra_params: Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload. This sets the HTTP request header ``extra-parameters``. Known values are: "error", "drop", and @@ -316,7 +315,7 @@ def _complete( seemingly "stuck" request. Also note that the message content may be partially cut off if ``finish_reason="length"``\\ , which indicates the generation exceeded ``max_tokens`` or the conversation exceeded the max context length. Default value is None. - :paramtype response_format: ~azure.ai.inference.models._models.ChatCompletionsResponseFormat + :paramtype response_format: ~azure.ai.inference.models.ChatCompletionsResponseFormat :keyword stop: A collection of textual sequences that will end completions generation. Default value is None. :paramtype stop: list[str] diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py index 0862989beef2..f7dd32510333 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_patch.py @@ -1,4 +1,3 @@ -# pylint: disable=too-many-lines # ------------------------------------ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. @@ -6,1350 +5,10 @@ """Customize generated code here. Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize - -Why do we patch auto-generated code? Below is a summary of the changes made in all _patch files (not just this one): -1. Add support for input argument `model_extras` (all clients) -2. Add support for function load_client -3. Add support for setting sticky chat completions/embeddings input arguments in the client constructor -4. Add support for get_model_info, while caching the result (all clients) -5. Add support for chat completion streaming (ChatCompletionsClient client only) -6. Add support for friendly print of result objects (__str__ method) (all clients) -7. Add support for load() method in ImageUrl class (see /models/_patch.py) -8. Add support for sending two auth headers for api-key auth (all clients) -9. Simplify how chat completions "response_format" is set. Define "response_format" as a flat Union of strings and - JsonSchemaFormat object, instead of using auto-generated base/derived classes named - ChatCompletionsResponseFormatXxxInternal. -10. Allow UserMessage("my message") in addition to UserMessage(content="my message"). Same applies to -AssistantMessage, SystemMessage and ToolMessage. - """ -import json -import logging -import sys - -from io import IOBase -from typing import Any, Dict, Union, IO, List, Literal, Optional, overload, Type, TYPE_CHECKING, Iterable - -from azure.core.pipeline import PipelineResponse -from azure.core.credentials import AzureKeyCredential -from azure.core.tracing.decorator import distributed_trace -from azure.core.utils import case_insensitive_dict -from azure.core.exceptions import ( - ClientAuthenticationError, - HttpResponseError, - map_error, - ResourceExistsError, - ResourceNotFoundError, - ResourceNotModifiedError, -) -from . import models as _models -from ._model_base import SdkJSONEncoder, _deserialize -from ._serialization import Serializer -from ._operations._operations import ( - build_chat_completions_complete_request, - build_embeddings_embed_request, - build_image_embeddings_embed_request, -) -from ._client import ChatCompletionsClient as ChatCompletionsClientGenerated -from ._client import EmbeddingsClient as EmbeddingsClientGenerated -from ._client import ImageEmbeddingsClient as ImageEmbeddingsClientGenerated - -if sys.version_info >= (3, 9): - from collections.abc import MutableMapping -else: - from typing import MutableMapping # type: ignore # pylint: disable=ungrouped-imports - -if TYPE_CHECKING: - # pylint: disable=unused-import,ungrouped-imports - from azure.core.credentials import TokenCredential - -JSON = MutableMapping[str, Any] # pylint: disable=unsubscriptable-object -_Unset: Any = object() - -_SERIALIZER = Serializer() -_SERIALIZER.client_side_validation = False - -_LOGGER = logging.getLogger(__name__) - - -def _get_internal_response_format( - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] -) -> Optional[_models._models.ChatCompletionsResponseFormat]: - """ - Internal helper method to convert between the public response format type that's supported in the `complete` method, - and the internal response format type that's used in the generated code. - - :param response_format: Response format. Required. - :type response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] - :return: Internal response format. - :rtype: ~azure.ai.inference._models._models.ChatCompletionsResponseFormat - """ - if response_format is not None: - - # To make mypy tool happy, start by declaring the type as the base class - internal_response_format: _models._models.ChatCompletionsResponseFormat - - if isinstance(response_format, str) and response_format == "text": - internal_response_format = ( - _models._models.ChatCompletionsResponseFormatText() # pylint: disable=protected-access - ) - elif isinstance(response_format, str) and response_format == "json_object": - internal_response_format = ( - _models._models.ChatCompletionsResponseFormatJsonObject() # pylint: disable=protected-access - ) - elif isinstance(response_format, _models.JsonSchemaFormat): - internal_response_format = _models._models.ChatCompletionsResponseFormatJsonSchema( # pylint: disable=protected-access - json_schema=response_format - ) - else: - raise ValueError(f"Unsupported `response_format` {response_format}") - - return internal_response_format - - return None - - -def load_client( - endpoint: str, credential: Union[AzureKeyCredential, "TokenCredential"], **kwargs: Any -) -> Union["ChatCompletionsClient", "EmbeddingsClient", "ImageEmbeddingsClient"]: - """ - Load a client from a given endpoint URL. The method makes a REST API call to the `/info` route - on the given endpoint, to determine the model type and therefore which client to instantiate. - Keyword arguments are passed to the appropriate client's constructor, so if you need to set things like - `api_version`, `logging_enable`, `user_agent`, etc., you can do so here. - This method will only work when using Serverless API or Managed Compute endpoint. - It will not work for GitHub Models endpoint or Azure OpenAI endpoint. - - :param endpoint: Service host. Required. - :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is either a - AzureKeyCredential type or a TokenCredential type. Required. - :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.TokenCredential - :return: The appropriate synchronous client associated with the given endpoint - :rtype: ~azure.ai.inference.ChatCompletionsClient or ~azure.ai.inference.EmbeddingsClient - or ~azure.ai.inference.ImageEmbeddingsClient - :raises ~azure.core.exceptions.HttpResponseError: - """ - - with ChatCompletionsClient( - endpoint, credential, **kwargs - ) as client: # Pick any of the clients, it does not matter. - model_info = client.get_model_info() # type: ignore - - _LOGGER.info("model_info=%s", model_info) - if not model_info.model_type: - raise ValueError( - "The AI model information is missing a value for `model type`. Cannot create an appropriate client." - ) - - # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type - if model_info.model_type in ( - _models.ModelType.CHAT_COMPLETION, - "chat_completions", - "chat", - "completion", - "chat-completion", - "chat-completions", - "chat completion", - "chat completions", - ): - chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs) - chat_completion_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init - model_info - ) - return chat_completion_client - - if model_info.model_type in ( - _models.ModelType.EMBEDDINGS, - "embedding", - "text_embedding", - "text-embeddings", - "text embedding", - "text embeddings", - ): - embedding_client = EmbeddingsClient(endpoint, credential, **kwargs) - embedding_client._model_info = model_info # pylint: disable=protected-access,attribute-defined-outside-init - return embedding_client - - if model_info.model_type in ( - _models.ModelType.IMAGE_EMBEDDINGS, - "image_embedding", - "image-embeddings", - "image-embedding", - "image embedding", - "image embeddings", - ): - image_embedding_client = ImageEmbeddingsClient(endpoint, credential, **kwargs) - image_embedding_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init - model_info - ) - return image_embedding_client - - raise ValueError(f"No client available to support AI model type `{model_info.model_type}`") - - -class ChatCompletionsClient(ChatCompletionsClientGenerated): # pylint: disable=too-many-instance-attributes - """ChatCompletionsClient. - - :param endpoint: Service host. Required. - :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is either a - AzureKeyCredential type or a TokenCredential type. Required. - :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.TokenCredential - :keyword frequency_penalty: A value that influences the probability of generated tokens - appearing based on their cumulative frequency in generated text. - Positive values will make tokens less likely to appear as their frequency increases and - decrease the likelihood of the model repeating the same statements verbatim. - Supported range is [-2, 2]. - Default value is None. - :paramtype frequency_penalty: float - :keyword presence_penalty: A value that influences the probability of generated tokens - appearing based on their existing - presence in generated text. - Positive values will make tokens less likely to appear when they already exist and increase - the model's likelihood to output new topics. - Supported range is [-2, 2]. - Default value is None. - :paramtype presence_penalty: float - :keyword temperature: The sampling temperature to use that controls the apparent creativity of - generated completions. - Higher values will make output more random while lower values will make results more focused - and deterministic. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype temperature: float - :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value - causes the - model to consider the results of tokens with the provided probability mass. As an example, a - value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be - considered. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype top_p: float - :keyword max_tokens: The maximum number of tokens to generate. Default value is None. - :paramtype max_tokens: int - :keyword response_format: The format that the AI model must output. AI chat completions models typically output - unformatted text by default. This is equivalent to setting "text" as the response_format. - To output JSON format, without adhering to any schema, set to "json_object". - To output JSON format adhering to a provided schema, set this to an object of the class - ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. - :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] - :keyword stop: A collection of textual sequences that will end completions generation. Default - value is None. - :paramtype stop: list[str] - :keyword tools: The available tool definitions that the chat completions request can use, - including caller-defined functions. Default value is None. - :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] - :keyword tool_choice: If specified, the model will configure which of the provided tools it can - use for the chat completions response. Is either a Union[str, - "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. - Default value is None. - :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or - ~azure.ai.inference.models.ChatCompletionsNamedToolChoice - :keyword seed: If specified, the system will make a best effort to sample deterministically - such that repeated requests with the - same seed and parameters should return the same result. Determinism is not guaranteed. - Default value is None. - :paramtype seed: int - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :keyword api_version: The API version to use for this operation. Default value is - "2024-05-01-preview". Note that overriding this default value may result in unsupported - behavior. - :paramtype api_version: str - """ - - def __init__( - self, - endpoint: str, - credential: Union[AzureKeyCredential, "TokenCredential"], - *, - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> None: - - self._model_info: Optional[_models.ModelInfo] = None - - # Store default chat completions settings, to be applied in all future service calls - # unless overridden by arguments in the `complete` method. - self._frequency_penalty = frequency_penalty - self._presence_penalty = presence_penalty - self._temperature = temperature - self._top_p = top_p - self._max_tokens = max_tokens - self._internal_response_format = _get_internal_response_format(response_format) - self._stop = stop - self._tools = tools - self._tool_choice = tool_choice - self._seed = seed - self._model = model - self._model_extras = model_extras - - # For Key auth, we need to send these two auth HTTP request headers simultaneously: - # 1. "Authorization: Bearer " - # 2. "api-key: " - # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, - # and Azure OpenAI and the new Unified Inference endpoints support the second header. - # The first header will be taken care of by auto-generated code. - # The second one is added here. - if isinstance(credential, AzureKeyCredential): - headers = kwargs.pop("headers", {}) - if "api-key" not in headers: - headers["api-key"] = credential.key - kwargs["headers"] = headers - - super().__init__(endpoint, credential, **kwargs) - - @overload - def complete( - self, - *, - messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]], - stream: Literal[False] = False, - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.ChatCompletions: ... - - @overload - def complete( - self, - *, - messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]], - stream: Literal[True], - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> Iterable[_models.StreamingChatCompletionsUpdate]: ... - - @overload - def complete( - self, - *, - messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]], - stream: Optional[bool] = None, - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: - # pylint: disable=line-too-long - """Gets chat completions for the provided chat messages. - Completions support a wide variety of tasks and generate text that continues from or - "completes" provided prompt data. The method makes a REST API call to the `/chat/completions` route - on the given endpoint. - When using this method with `stream=True`, the response is streamed - back to the client. Iterate over the resulting StreamingChatCompletions - object to get content updates as they arrive. By default, the response is a ChatCompletions object - (non-streaming). - - :keyword messages: The collection of context messages associated with this chat completions - request. - Typical usage begins with a chat message for the System role that provides instructions for - the behavior of the assistant, followed by alternating messages between the User and - Assistant roles. Required. - :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] or list[dict[str, Any]] - :keyword stream: A value indicating whether chat completions should be streamed for this request. - Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions. - Otherwise the response will be a ChatCompletions. - :paramtype stream: bool - :keyword frequency_penalty: A value that influences the probability of generated tokens - appearing based on their cumulative frequency in generated text. - Positive values will make tokens less likely to appear as their frequency increases and - decrease the likelihood of the model repeating the same statements verbatim. - Supported range is [-2, 2]. - Default value is None. - :paramtype frequency_penalty: float - :keyword presence_penalty: A value that influences the probability of generated tokens - appearing based on their existing - presence in generated text. - Positive values will make tokens less likely to appear when they already exist and increase - the model's likelihood to output new topics. - Supported range is [-2, 2]. - Default value is None. - :paramtype presence_penalty: float - :keyword temperature: The sampling temperature to use that controls the apparent creativity of - generated completions. - Higher values will make output more random while lower values will make results more focused - and deterministic. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype temperature: float - :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value - causes the - model to consider the results of tokens with the provided probability mass. As an example, a - value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be - considered. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype top_p: float - :keyword max_tokens: The maximum number of tokens to generate. Default value is None. - :paramtype max_tokens: int - :keyword response_format: The format that the AI model must output. AI chat completions models typically output - unformatted text by default. This is equivalent to setting "text" as the response_format. - To output JSON format, without adhering to any schema, set to "json_object". - To output JSON format adhering to a provided schema, set this to an object of the class - ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. - :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] - :keyword stop: A collection of textual sequences that will end completions generation. Default - value is None. - :paramtype stop: list[str] - :keyword tools: The available tool definitions that the chat completions request can use, - including caller-defined functions. Default value is None. - :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] - :keyword tool_choice: If specified, the model will configure which of the provided tools it can - use for the chat completions response. Is either a Union[str, - "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. - Default value is None. - :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or - ~azure.ai.inference.models.ChatCompletionsNamedToolChoice - :keyword seed: If specified, the system will make a best effort to sample deterministically - such that repeated requests with the - same seed and parameters should return the same result. Determinism is not guaranteed. - Default value is None. - :paramtype seed: int - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming. - :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - def complete( - self, - body: JSON, - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: - # pylint: disable=line-too-long - """Gets chat completions for the provided chat messages. - Completions support a wide variety of tasks and generate text that continues from or - "completes" provided prompt data. - - :param body: An object of type MutableMapping[str, Any], such as a dictionary, that - specifies the full request payload. Required. - :type body: JSON - :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. - Default value is "application/json". - :paramtype content_type: str - :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming. - :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - def complete( - self, - body: IO[bytes], - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: - # pylint: disable=line-too-long - # pylint: disable=too-many-locals - """Gets chat completions for the provided chat messages. - Completions support a wide variety of tasks and generate text that continues from or - "completes" provided prompt data. - - :param body: Specifies the full request payload. Required. - :type body: IO[bytes] - :keyword content_type: Body Parameter content-type. Content type parameter for binary body. - Default value is "application/json". - :paramtype content_type: str - :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming. - :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions - :raises ~azure.core.exceptions.HttpResponseError: - """ - - # pylint:disable=client-method-missing-tracing-decorator - def complete( - self, - body: Union[JSON, IO[bytes]] = _Unset, - *, - messages: Union[List[_models.ChatRequestMessage], List[Dict[str, Any]]] = _Unset, - stream: Optional[bool] = None, - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> Union[Iterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: - # pylint: disable=line-too-long - # pylint: disable=too-many-locals - """Gets chat completions for the provided chat messages. - Completions support a wide variety of tasks and generate text that continues from or - "completes" provided prompt data. When using this method with `stream=True`, the response is streamed - back to the client. Iterate over the resulting :class:`~azure.ai.inference.models.StreamingChatCompletions` - object to get content updates as they arrive. - - :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type - that specifies the full request payload. Required. - :type body: JSON or IO[bytes] - :keyword messages: The collection of context messages associated with this chat completions - request. - Typical usage begins with a chat message for the System role that provides instructions for - the behavior of the assistant, followed by alternating messages between the User and - Assistant roles. Required. - :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] or list[dict[str, Any]] - :keyword stream: A value indicating whether chat completions should be streamed for this request. - Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions. - Otherwise the response will be a ChatCompletions. - :paramtype stream: bool - :keyword frequency_penalty: A value that influences the probability of generated tokens - appearing based on their cumulative frequency in generated text. - Positive values will make tokens less likely to appear as their frequency increases and - decrease the likelihood of the model repeating the same statements verbatim. - Supported range is [-2, 2]. - Default value is None. - :paramtype frequency_penalty: float - :keyword presence_penalty: A value that influences the probability of generated tokens - appearing based on their existing - presence in generated text. - Positive values will make tokens less likely to appear when they already exist and increase - the model's likelihood to output new topics. - Supported range is [-2, 2]. - Default value is None. - :paramtype presence_penalty: float - :keyword temperature: The sampling temperature to use that controls the apparent creativity of - generated completions. - Higher values will make output more random while lower values will make results more focused - and deterministic. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype temperature: float - :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value - causes the - model to consider the results of tokens with the provided probability mass. As an example, a - value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be - considered. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype top_p: float - :keyword max_tokens: The maximum number of tokens to generate. Default value is None. - :paramtype max_tokens: int - :keyword response_format: The format that the AI model must output. AI chat completions models typically output - unformatted text by default. This is equivalent to setting "text" as the response_format. - To output JSON format, without adhering to any schema, set to "json_object". - To output JSON format adhering to a provided schema, set this to an object of the class - ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. - :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] - :keyword stop: A collection of textual sequences that will end completions generation. Default - value is None. - :paramtype stop: list[str] - :keyword tools: The available tool definitions that the chat completions request can use, - including caller-defined functions. Default value is None. - :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] - :keyword tool_choice: If specified, the model will configure which of the provided tools it can - use for the chat completions response. Is either a Union[str, - "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. - Default value is None. - :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or - ~azure.ai.inference.models.ChatCompletionsNamedToolChoice - :keyword seed: If specified, the system will make a best effort to sample deterministically - such that repeated requests with the - same seed and parameters should return the same result. Determinism is not guaranteed. - Default value is None. - :paramtype seed: int - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: ChatCompletions for non-streaming, or Iterable[StreamingChatCompletionsUpdate] for streaming. - :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.StreamingChatCompletions - :raises ~azure.core.exceptions.HttpResponseError: - """ - error_map = { - 401: ClientAuthenticationError, - 404: ResourceNotFoundError, - 409: ResourceExistsError, - 304: ResourceNotModifiedError, - } - error_map.update(kwargs.pop("error_map", {}) or {}) - - _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) - _params = kwargs.pop("params", {}) or {} - _extra_parameters: Union[_models._enums.ExtraParameters, None] = None - - content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) - - internal_response_format = _get_internal_response_format(response_format) - - if body is _Unset: - if messages is _Unset: - raise TypeError("missing required argument: messages") - body = { - "messages": messages, - "stream": stream, - "frequency_penalty": frequency_penalty if frequency_penalty is not None else self._frequency_penalty, - "max_tokens": max_tokens if max_tokens is not None else self._max_tokens, - "model": model if model is not None else self._model, - "presence_penalty": presence_penalty if presence_penalty is not None else self._presence_penalty, - "response_format": ( - internal_response_format if internal_response_format is not None else self._internal_response_format - ), - "seed": seed if seed is not None else self._seed, - "stop": stop if stop is not None else self._stop, - "temperature": temperature if temperature is not None else self._temperature, - "tool_choice": tool_choice if tool_choice is not None else self._tool_choice, - "tools": tools if tools is not None else self._tools, - "top_p": top_p if top_p is not None else self._top_p, - } - if model_extras is not None and bool(model_extras): - body.update(model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - elif self._model_extras is not None and bool(self._model_extras): - body.update(self._model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - body = {k: v for k, v in body.items() if v is not None} - elif isinstance(body, dict) and "stream" in body and isinstance(body["stream"], bool): - stream = body["stream"] - content_type = content_type or "application/json" - _content = None - if isinstance(body, (IOBase, bytes)): - _content = body - else: - _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore - - _request = build_chat_completions_complete_request( - extra_params=_extra_parameters, - content_type=content_type, - api_version=self._config.api_version, - content=_content, - headers=_headers, - params=_params, - ) - path_format_arguments = { - "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), - } - _request.url = self._client.format_url(_request.url, **path_format_arguments) - - _stream = stream or False - pipeline_response: PipelineResponse = self._client._pipeline.run( # pylint: disable=protected-access - _request, stream=_stream, **kwargs - ) - - response = pipeline_response.http_response - - if response.status_code not in [200]: - if _stream: - response.read() # Load the body in memory and close the socket - map_error(status_code=response.status_code, response=response, error_map=error_map) - raise HttpResponseError(response=response) - - if _stream: - return _models.StreamingChatCompletions(response) - - return _deserialize(_models._patch.ChatCompletions, response.json()) # pylint: disable=protected-access - - @distributed_trace - def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: - # pylint: disable=line-too-long - """Returns information about the AI model. - The method makes a REST API call to the ``/info`` route on the given endpoint. - This method will only work when using Serverless API or Managed Compute endpoint. - It will not work for GitHub Models endpoint or Azure OpenAI endpoint. - - :return: ModelInfo. The ModelInfo is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.ModelInfo - :raises ~azure.core.exceptions.HttpResponseError: - """ - if not self._model_info: - self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init - return self._model_info - - def __str__(self) -> str: - # pylint: disable=client-method-name-no-double-underscore - return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() - - -class EmbeddingsClient(EmbeddingsClientGenerated): - """EmbeddingsClient. - - :param endpoint: Service host. Required. - :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is either a - AzureKeyCredential type or a TokenCredential type. Required. - :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.TokenCredential - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :keyword api_version: The API version to use for this operation. Default value is - "2024-05-01-preview". Note that overriding this default value may result in unsupported - behavior. - :paramtype api_version: str - """ - - def __init__( - self, - endpoint: str, - credential: Union[AzureKeyCredential, "TokenCredential"], - *, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> None: - - self._model_info: Optional[_models.ModelInfo] = None - - # Store default embeddings settings, to be applied in all future service calls - # unless overridden by arguments in the `embed` method. - self._dimensions = dimensions - self._encoding_format = encoding_format - self._input_type = input_type - self._model = model - self._model_extras = model_extras - - # For Key auth, we need to send these two auth HTTP request headers simultaneously: - # 1. "Authorization: Bearer " - # 2. "api-key: " - # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, - # and Azure OpenAI and the new Unified Inference endpoints support the second header. - # The first header will be taken care of by auto-generated code. - # The second one is added here. - if isinstance(credential, AzureKeyCredential): - headers = kwargs.pop("headers", {}) - if "api-key" not in headers: - headers["api-key"] = credential.key - kwargs["headers"] = headers - - super().__init__(endpoint, credential, **kwargs) - - @overload - def embed( - self, - *, - input: List[str], - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given text prompts. - The method makes a REST API call to the `/embeddings` route on the given endpoint. - - :keyword input: Input text to embed, encoded as a string or array of tokens. - To embed multiple inputs in a single request, pass an array - of strings or array of token arrays. Required. - :paramtype input: list[str] - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - def embed( - self, - body: JSON, - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given text prompts. - The method makes a REST API call to the `/embeddings` route on the given endpoint. - - :param body: An object of type MutableMapping[str, Any], such as a dictionary, that - specifies the full request payload. Required. - :type body: JSON - :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. - Default value is "application/json". - :paramtype content_type: str - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - def embed( - self, - body: IO[bytes], - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given text prompts. - The method makes a REST API call to the `/embeddings` route on the given endpoint. - - :param body: Specifies the full request payload. Required. - :type body: IO[bytes] - :keyword content_type: Body Parameter content-type. Content type parameter for binary body. - Default value is "application/json". - :paramtype content_type: str - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @distributed_trace - def embed( - self, - body: Union[JSON, IO[bytes]] = _Unset, - *, - input: List[str] = _Unset, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.EmbeddingsResult: - # pylint: disable=line-too-long - """Return the embedding vectors for given text prompts. - The method makes a REST API call to the `/embeddings` route on the given endpoint. - - :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type - that specifies the full request payload. Required. - :type body: JSON or IO[bytes] - :keyword input: Input text to embed, encoded as a string or array of tokens. - To embed multiple inputs in a single request, pass an array - of strings or array of token arrays. Required. - :paramtype input: list[str] - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - error_map: MutableMapping[int, Type[HttpResponseError]] = { - 401: ClientAuthenticationError, - 404: ResourceNotFoundError, - 409: ResourceExistsError, - 304: ResourceNotModifiedError, - } - error_map.update(kwargs.pop("error_map", {}) or {}) - - _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) - _params = kwargs.pop("params", {}) or {} - _extra_parameters: Union[_models._enums.ExtraParameters, None] = None - - content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) - - if body is _Unset: - if input is _Unset: - raise TypeError("missing required argument: input") - body = { - "input": input, - "dimensions": dimensions if dimensions is not None else self._dimensions, - "encoding_format": encoding_format if encoding_format is not None else self._encoding_format, - "input_type": input_type if input_type is not None else self._input_type, - "model": model if model is not None else self._model, - } - if model_extras is not None and bool(model_extras): - body.update(model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - elif self._model_extras is not None and bool(self._model_extras): - body.update(self._model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - body = {k: v for k, v in body.items() if v is not None} - content_type = content_type or "application/json" - _content = None - if isinstance(body, (IOBase, bytes)): - _content = body - else: - _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore - - _request = build_embeddings_embed_request( - extra_params=_extra_parameters, - content_type=content_type, - api_version=self._config.api_version, - content=_content, - headers=_headers, - params=_params, - ) - path_format_arguments = { - "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), - } - _request.url = self._client.format_url(_request.url, **path_format_arguments) - - _stream = kwargs.pop("stream", False) - pipeline_response: PipelineResponse = self._client._pipeline.run( # pylint: disable=protected-access - _request, stream=_stream, **kwargs - ) - - response = pipeline_response.http_response - - if response.status_code not in [200]: - if _stream: - response.read() # Load the body in memory and close the socket - map_error(status_code=response.status_code, response=response, error_map=error_map) - raise HttpResponseError(response=response) - - if _stream: - deserialized = response.iter_bytes() - else: - deserialized = _deserialize( - _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access - ) - - return deserialized # type: ignore - - @distributed_trace - def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: - # pylint: disable=line-too-long - """Returns information about the AI model. - The method makes a REST API call to the ``/info`` route on the given endpoint. - This method will only work when using Serverless API or Managed Compute endpoint. - It will not work for GitHub Models endpoint or Azure OpenAI endpoint. - - :return: ModelInfo. The ModelInfo is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.ModelInfo - :raises ~azure.core.exceptions.HttpResponseError: - """ - if not self._model_info: - self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init - return self._model_info - - def __str__(self) -> str: - # pylint: disable=client-method-name-no-double-underscore - return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() - - -class ImageEmbeddingsClient(ImageEmbeddingsClientGenerated): - """ImageEmbeddingsClient. - - :param endpoint: Service host. Required. - :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is either a - AzureKeyCredential type or a TokenCredential type. Required. - :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials.TokenCredential - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :keyword api_version: The API version to use for this operation. Default value is - "2024-05-01-preview". Note that overriding this default value may result in unsupported - behavior. - :paramtype api_version: str - """ - - def __init__( - self, - endpoint: str, - credential: Union[AzureKeyCredential, "TokenCredential"], - *, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> None: - - self._model_info: Optional[_models.ModelInfo] = None - - # Store default embeddings settings, to be applied in all future service calls - # unless overridden by arguments in the `embed` method. - self._dimensions = dimensions - self._encoding_format = encoding_format - self._input_type = input_type - self._model = model - self._model_extras = model_extras - - # For Key auth, we need to send these two auth HTTP request headers simultaneously: - # 1. "Authorization: Bearer " - # 2. "api-key: " - # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, - # and Azure OpenAI and the new Unified Inference endpoints support the second header. - # The first header will be taken care of by auto-generated code. - # The second one is added here. - if isinstance(credential, AzureKeyCredential): - headers = kwargs.pop("headers", {}) - if "api-key" not in headers: - headers["api-key"] = credential.key - kwargs["headers"] = headers - - super().__init__(endpoint, credential, **kwargs) - - @overload - def embed( - self, - *, - input: List[_models.ImageEmbeddingInput], - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given images. - The method makes a REST API call to the `/images/embeddings` route on the given endpoint. - - :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an - array. - The input must not exceed the max input tokens for the model. Required. - :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - def embed( - self, - body: JSON, - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given images. - The method makes a REST API call to the `/images/embeddings` route on the given endpoint. - - :param body: An object of type MutableMapping[str, Any], such as a dictionary, that - specifies the full request payload. Required. - :type body: JSON - :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. - Default value is "application/json". - :paramtype content_type: str - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - def embed( - self, - body: IO[bytes], - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given images. - The method makes a REST API call to the `/images/embeddings` route on the given endpoint. - - :param body: Specifies the full request payload. Required. - :type body: IO[bytes] - :keyword content_type: Body Parameter content-type. Content type parameter for binary body. - Default value is "application/json". - :paramtype content_type: str - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @distributed_trace - def embed( - self, - body: Union[JSON, IO[bytes]] = _Unset, - *, - input: List[_models.ImageEmbeddingInput] = _Unset, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.EmbeddingsResult: - # pylint: disable=line-too-long - """Return the embedding vectors for given images. - The method makes a REST API call to the `/images/embeddings` route on the given endpoint. - - :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type - that specifies the full request payload. Required. - :type body: JSON or IO[bytes] - :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an - array. - The input must not exceed the max input tokens for the model. Required. - :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - error_map: MutableMapping[int, Type[HttpResponseError]] = { - 401: ClientAuthenticationError, - 404: ResourceNotFoundError, - 409: ResourceExistsError, - 304: ResourceNotModifiedError, - } - error_map.update(kwargs.pop("error_map", {}) or {}) - - _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) - _params = kwargs.pop("params", {}) or {} - _extra_parameters: Union[_models._enums.ExtraParameters, None] = None - - content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) - - if body is _Unset: - if input is _Unset: - raise TypeError("missing required argument: input") - body = { - "input": input, - "dimensions": dimensions if dimensions is not None else self._dimensions, - "encoding_format": encoding_format if encoding_format is not None else self._encoding_format, - "input_type": input_type if input_type is not None else self._input_type, - "model": model if model is not None else self._model, - } - if model_extras is not None and bool(model_extras): - body.update(model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - elif self._model_extras is not None and bool(self._model_extras): - body.update(self._model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - body = {k: v for k, v in body.items() if v is not None} - content_type = content_type or "application/json" - _content = None - if isinstance(body, (IOBase, bytes)): - _content = body - else: - _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore - - _request = build_image_embeddings_embed_request( - extra_params=_extra_parameters, - content_type=content_type, - api_version=self._config.api_version, - content=_content, - headers=_headers, - params=_params, - ) - path_format_arguments = { - "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), - } - _request.url = self._client.format_url(_request.url, **path_format_arguments) - - _stream = kwargs.pop("stream", False) - pipeline_response: PipelineResponse = self._client._pipeline.run( # pylint: disable=protected-access - _request, stream=_stream, **kwargs - ) - - response = pipeline_response.http_response - - if response.status_code not in [200]: - if _stream: - response.read() # Load the body in memory and close the socket - map_error(status_code=response.status_code, response=response, error_map=error_map) - raise HttpResponseError(response=response) - - if _stream: - deserialized = response.iter_bytes() - else: - deserialized = _deserialize( - _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access - ) - - return deserialized # type: ignore - - @distributed_trace - def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: - # pylint: disable=line-too-long - """Returns information about the AI model. - The method makes a REST API call to the ``/info`` route on the given endpoint. - This method will only work when using Serverless API or Managed Compute endpoint. - It will not work for GitHub Models endpoint or Azure OpenAI endpoint. - - :return: ModelInfo. The ModelInfo is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.ModelInfo - :raises ~azure.core.exceptions.HttpResponseError: - """ - if not self._model_info: - self._model_info = self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init - return self._model_info - - def __str__(self) -> str: - # pylint: disable=client-method-name-no-double-underscore - return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() - +from typing import List -__all__: List[str] = [ - "load_client", - "ChatCompletionsClient", - "EmbeddingsClient", - "ImageEmbeddingsClient", -] # Add all objects you want publicly available to users at this package level +__all__: List[str] = [] # Add all objects you want publicly available to users at this package level def patch_sdk(): diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py b/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py index b24ab2885450..670738f0789c 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/_serialization.py @@ -185,73 +185,7 @@ def deserialize_from_http_generics(cls, body_bytes: Optional[Union[AnyStr, IO]], except NameError: _long_type = int - -class UTC(datetime.tzinfo): - """Time Zone info for handling UTC""" - - def utcoffset(self, dt): - """UTF offset for UTC is 0. - - :param datetime.datetime dt: The datetime - :returns: The offset - :rtype: datetime.timedelta - """ - return datetime.timedelta(0) - - def tzname(self, dt): - """Timestamp representation. - - :param datetime.datetime dt: The datetime - :returns: The timestamp representation - :rtype: str - """ - return "Z" - - def dst(self, dt): - """No daylight saving for UTC. - - :param datetime.datetime dt: The datetime - :returns: The daylight saving time - :rtype: datetime.timedelta - """ - return datetime.timedelta(hours=1) - - -try: - from datetime import timezone as _FixedOffset # type: ignore -except ImportError: # Python 2.7 - - class _FixedOffset(datetime.tzinfo): # type: ignore - """Fixed offset in minutes east from UTC. - Copy/pasted from Python doc - :param datetime.timedelta offset: offset in timedelta format - """ - - def __init__(self, offset) -> None: - self.__offset = offset - - def utcoffset(self, dt): - return self.__offset - - def tzname(self, dt): - return str(self.__offset.total_seconds() / 3600) - - def __repr__(self): - return "".format(self.tzname(None)) - - def dst(self, dt): - return datetime.timedelta(0) - - def __getinitargs__(self): - return (self.__offset,) - - -try: - from datetime import timezone - - TZ_UTC = timezone.utc -except ImportError: - TZ_UTC = UTC() # type: ignore +TZ_UTC = datetime.timezone.utc _FLATTEN = re.compile(r"(?= (3, 9): - from collections.abc import MutableMapping -else: - from typing import MutableMapping # type: ignore # pylint: disable=ungrouped-imports - -JSON = MutableMapping[str, Any] # pylint: disable=unsubscriptable-object -_Unset: Any = object() -_LOGGER = logging.getLogger(__name__) - - -async def load_client( - endpoint: str, credential: Union[AzureKeyCredential, "AsyncTokenCredential"], **kwargs: Any -) -> Union["ChatCompletionsClient", "EmbeddingsClient", "ImageEmbeddingsClient"]: - """ - Load a client from a given endpoint URL. The method makes a REST API call to the `/info` route - on the given endpoint, to determine the model type and therefore which client to instantiate. - Keyword arguments are passed to the appropriate client constructor, so if you need to set things like - `api_version`, `logging_enable`, `user_agent`, etc., you can do so here. - This method will only work when using Serverless API or Managed Compute endpoint. - It will not work for GitHub Models endpoint or Azure OpenAI endpoint. - - :param endpoint: Service host. Required. - :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is either a - AzureKeyCredential type or a AsyncTokenCredential type. Required. - :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials_async.AsyncTokenCredential - :return: The appropriate asynchronous client associated with the given endpoint - :rtype: ~azure.ai.inference.aio.ChatCompletionsClient or ~azure.ai.inference.aio.EmbeddingsClient - or ~azure.ai.inference.aio.ImageEmbeddingsClient - :raises ~azure.core.exceptions.HttpResponseError: - """ - - async with ChatCompletionsClient( - endpoint, credential, **kwargs - ) as client: # Pick any of the clients, it does not matter. - model_info = await client.get_model_info() # type: ignore - - _LOGGER.info("model_info=%s", model_info) - if not model_info.model_type: - raise ValueError( - "The AI model information is missing a value for `model type`. Cannot create an appropriate client." - ) - - # TODO: Remove "completions", "chat-comletions" and "embedding" once Mistral Large and Cohere fixes their model type - if model_info.model_type in ( - _models.ModelType.CHAT_COMPLETION, - "chat_completions", - "chat", - "completion", - "chat-completion", - "chat-completions", - "chat completion", - "chat completions", - ): - chat_completion_client = ChatCompletionsClient(endpoint, credential, **kwargs) - chat_completion_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init - model_info - ) - return chat_completion_client - - if model_info.model_type in ( - _models.ModelType.EMBEDDINGS, - "embedding", - "text_embedding", - "text-embeddings", - "text embedding", - "text embeddings", - ): - embedding_client = EmbeddingsClient(endpoint, credential, **kwargs) - embedding_client._model_info = model_info # pylint: disable=protected-access,attribute-defined-outside-init - return embedding_client - - if model_info.model_type in ( - _models.ModelType.IMAGE_EMBEDDINGS, - "image_embedding", - "image-embeddings", - "image-embedding", - "image embedding", - "image embeddings", - ): - image_embedding_client = ImageEmbeddingsClient(endpoint, credential, **kwargs) - image_embedding_client._model_info = ( # pylint: disable=protected-access,attribute-defined-outside-init - model_info - ) - return image_embedding_client - - raise ValueError(f"No client available to support AI model type `{model_info.model_type}`") - - -class ChatCompletionsClient(ChatCompletionsClientGenerated): # pylint: disable=too-many-instance-attributes - """ChatCompletionsClient. - - :param endpoint: Service host. Required. - :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is either a - AzureKeyCredential type or a AsyncTokenCredential type. Required. - :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials_async.AsyncTokenCredential - :keyword frequency_penalty: A value that influences the probability of generated tokens - appearing based on their cumulative frequency in generated text. - Positive values will make tokens less likely to appear as their frequency increases and - decrease the likelihood of the model repeating the same statements verbatim. - Supported range is [-2, 2]. - Default value is None. - :paramtype frequency_penalty: float - :keyword presence_penalty: A value that influences the probability of generated tokens - appearing based on their existing - presence in generated text. - Positive values will make tokens less likely to appear when they already exist and increase - the model's likelihood to output new topics. - Supported range is [-2, 2]. - Default value is None. - :paramtype presence_penalty: float - :keyword temperature: The sampling temperature to use that controls the apparent creativity of - generated completions. - Higher values will make output more random while lower values will make results more focused - and deterministic. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype temperature: float - :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value - causes the - model to consider the results of tokens with the provided probability mass. As an example, a - value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be - considered. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype top_p: float - :keyword max_tokens: The maximum number of tokens to generate. Default value is None. - :paramtype max_tokens: int - :keyword response_format: The format that the AI model must output. AI chat completions models typically output - unformatted text by default. This is equivalent to setting "text" as the response_format. - To output JSON format, without adhering to any schema, set to "json_object". - To output JSON format adhering to a provided schema, set this to an object of the class - ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. - :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] - :keyword stop: A collection of textual sequences that will end completions generation. Default - value is None. - :paramtype stop: list[str] - :keyword tools: The available tool definitions that the chat completions request can use, - including caller-defined functions. Default value is None. - :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] - :keyword tool_choice: If specified, the model will configure which of the provided tools it can - use for the chat completions response. Is either a Union[str, - "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. - Default value is None. - :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or - ~azure.ai.inference.models.ChatCompletionsNamedToolChoice - :keyword seed: If specified, the system will make a best effort to sample deterministically - such that repeated requests with the - same seed and parameters should return the same result. Determinism is not guaranteed. - Default value is None. - :paramtype seed: int - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :keyword api_version: The API version to use for this operation. Default value is - "2024-05-01-preview". Note that overriding this default value may result in unsupported - behavior. - :paramtype api_version: str - """ - - def __init__( - self, - endpoint: str, - credential: Union[AzureKeyCredential, "AsyncTokenCredential"], - *, - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> None: - - self._model_info: Optional[_models.ModelInfo] = None - - # Store default chat completions settings, to be applied in all future service calls - # unless overridden by arguments in the `complete` method. - self._frequency_penalty = frequency_penalty - self._presence_penalty = presence_penalty - self._temperature = temperature - self._top_p = top_p - self._max_tokens = max_tokens - self._internal_response_format = _get_internal_response_format(response_format) - self._stop = stop - self._tools = tools - self._tool_choice = tool_choice - self._seed = seed - self._model = model - self._model_extras = model_extras - - # For Key auth, we need to send these two auth HTTP request headers simultaneously: - # 1. "Authorization: Bearer " - # 2. "api-key: " - # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, - # and Azure OpenAI and the new Unified Inference endpoints support the second header. - # The first header will be taken care of by auto-generated code. - # The second one is added here. - if isinstance(credential, AzureKeyCredential): - headers = kwargs.pop("headers", {}) - if "api-key" not in headers: - headers["api-key"] = credential.key - kwargs["headers"] = headers - - super().__init__(endpoint, credential, **kwargs) - - @overload - async def complete( - self, - *, - messages: List[_models.ChatRequestMessage], - stream: Literal[False] = False, - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.ChatCompletions: ... - - @overload - async def complete( - self, - *, - messages: List[_models.ChatRequestMessage], - stream: Literal[True], - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> AsyncIterable[_models.StreamingChatCompletionsUpdate]: ... - - @overload - async def complete( - self, - *, - messages: List[_models.ChatRequestMessage], - stream: Optional[bool] = None, - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: - # pylint: disable=line-too-long - """Gets chat completions for the provided chat messages. - Completions support a wide variety of tasks and generate text that continues from or - "completes" provided prompt data. The method makes a REST API call to the `/chat/completions` route - on the given endpoint. - When using this method with `stream=True`, the response is streamed - back to the client. Iterate over the resulting StreamingChatCompletions - object to get content updates as they arrive. By default, the response is a ChatCompletions object - (non-streaming). - - :keyword messages: The collection of context messages associated with this chat completions - request. - Typical usage begins with a chat message for the System role that provides instructions for - the behavior of the assistant, followed by alternating messages between the User and - Assistant roles. Required. - :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] - :keyword stream: A value indicating whether chat completions should be streamed for this request. - Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions. - Otherwise the response will be a ChatCompletions. - :paramtype stream: bool - :keyword frequency_penalty: A value that influences the probability of generated tokens - appearing based on their cumulative frequency in generated text. - Positive values will make tokens less likely to appear as their frequency increases and - decrease the likelihood of the model repeating the same statements verbatim. - Supported range is [-2, 2]. - Default value is None. - :paramtype frequency_penalty: float - :keyword presence_penalty: A value that influences the probability of generated tokens - appearing based on their existing - presence in generated text. - Positive values will make tokens less likely to appear when they already exist and increase - the model's likelihood to output new topics. - Supported range is [-2, 2]. - Default value is None. - :paramtype presence_penalty: float - :keyword temperature: The sampling temperature to use that controls the apparent creativity of - generated completions. - Higher values will make output more random while lower values will make results more focused - and deterministic. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype temperature: float - :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value - causes the - model to consider the results of tokens with the provided probability mass. As an example, a - value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be - considered. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype top_p: float - :keyword max_tokens: The maximum number of tokens to generate. Default value is None. - :paramtype max_tokens: int - :keyword response_format: The format that the AI model must output. AI chat completions models typically output - unformatted text by default. This is equivalent to setting "text" as the response_format. - To output JSON format, without adhering to any schema, set to "json_object". - To output JSON format adhering to a provided schema, set this to an object of the class - ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. - :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] - :keyword stop: A collection of textual sequences that will end completions generation. Default - value is None. - :paramtype stop: list[str] - :keyword tools: The available tool definitions that the chat completions request can use, - including caller-defined functions. Default value is None. - :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] - :keyword tool_choice: If specified, the model will configure which of the provided tools it can - use for the chat completions response. Is either a Union[str, - "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. - Default value is None. - :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or - ~azure.ai.inference.models.ChatCompletionsNamedToolChoice - :keyword seed: If specified, the system will make a best effort to sample deterministically - such that repeated requests with the - same seed and parameters should return the same result. Determinism is not guaranteed. - Default value is None. - :paramtype seed: int - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. - :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - async def complete( - self, - body: JSON, - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: - # pylint: disable=line-too-long - """Gets chat completions for the provided chat messages. - Completions support a wide variety of tasks and generate text that continues from or - "completes" provided prompt data. - - :param body: An object of type MutableMapping[str, Any], such as a dictionary, that - specifies the full request payload. Required. - :type body: JSON - :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. - Default value is "application/json". - :paramtype content_type: str - :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. - :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - async def complete( - self, - body: IO[bytes], - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: - # pylint: disable=line-too-long - """Gets chat completions for the provided chat messages. - Completions support a wide variety of tasks and generate text that continues from or - "completes" provided prompt data. - - :param body: Specifies the full request payload. Required. - :type body: IO[bytes] - :keyword content_type: Body Parameter content-type. Content type parameter for binary body. - Default value is "application/json". - :paramtype content_type: str - :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. - :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions - :raises ~azure.core.exceptions.HttpResponseError: - """ - - # pylint:disable=client-method-missing-tracing-decorator-async - async def complete( - self, - body: Union[JSON, IO[bytes]] = _Unset, - *, - messages: List[_models.ChatRequestMessage] = _Unset, - stream: Optional[bool] = None, - frequency_penalty: Optional[float] = None, - presence_penalty: Optional[float] = None, - temperature: Optional[float] = None, - top_p: Optional[float] = None, - max_tokens: Optional[int] = None, - response_format: Optional[Union[Literal["text", "json_object"], _models.JsonSchemaFormat]] = None, - stop: Optional[List[str]] = None, - tools: Optional[List[_models.ChatCompletionsToolDefinition]] = None, - tool_choice: Optional[ - Union[str, _models.ChatCompletionsToolChoicePreset, _models.ChatCompletionsNamedToolChoice] - ] = None, - seed: Optional[int] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> Union[AsyncIterable[_models.StreamingChatCompletionsUpdate], _models.ChatCompletions]: - # pylint: disable=line-too-long - # pylint: disable=too-many-locals - """Gets chat completions for the provided chat messages. - Completions support a wide variety of tasks and generate text that continues from or - "completes" provided prompt data. When using this method with `stream=True`, the response is streamed - back to the client. Iterate over the resulting :class:`~azure.ai.inference.models.StreamingChatCompletions` - object to get content updates as they arrive. - - :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type - that specifies the full request payload. Required. - :type body: JSON or IO[bytes] - :keyword messages: The collection of context messages associated with this chat completions - request. - Typical usage begins with a chat message for the System role that provides instructions for - the behavior of the assistant, followed by alternating messages between the User and - Assistant roles. Required. - :paramtype messages: list[~azure.ai.inference.models.ChatRequestMessage] - :keyword stream: A value indicating whether chat completions should be streamed for this request. - Default value is False. If streaming is enabled, the response will be a StreamingChatCompletions. - Otherwise the response will be a ChatCompletions. - :paramtype stream: bool - :keyword frequency_penalty: A value that influences the probability of generated tokens - appearing based on their cumulative frequency in generated text. - Positive values will make tokens less likely to appear as their frequency increases and - decrease the likelihood of the model repeating the same statements verbatim. - Supported range is [-2, 2]. - Default value is None. - :paramtype frequency_penalty: float - :keyword presence_penalty: A value that influences the probability of generated tokens - appearing based on their existing - presence in generated text. - Positive values will make tokens less likely to appear when they already exist and increase - the model's likelihood to output new topics. - Supported range is [-2, 2]. - Default value is None. - :paramtype presence_penalty: float - :keyword temperature: The sampling temperature to use that controls the apparent creativity of - generated completions. - Higher values will make output more random while lower values will make results more focused - and deterministic. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype temperature: float - :keyword top_p: An alternative to sampling with temperature called nucleus sampling. This value - causes the - model to consider the results of tokens with the provided probability mass. As an example, a - value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be - considered. - It is not recommended to modify temperature and top_p for the same completions request as the - interaction of these two settings is difficult to predict. - Supported range is [0, 1]. - Default value is None. - :paramtype top_p: float - :keyword max_tokens: The maximum number of tokens to generate. Default value is None. - :paramtype max_tokens: int - :keyword response_format: The format that the AI model must output. AI chat completions models typically output - unformatted text by default. This is equivalent to setting "text" as the response_format. - To output JSON format, without adhering to any schema, set to "json_object". - To output JSON format adhering to a provided schema, set this to an object of the class - ~azure.ai.inference.models.JsonSchemaFormat. Default value is None. - :paramtype response_format: Union[Literal['text', 'json_object'], ~azure.ai.inference.models.JsonSchemaFormat] - :keyword stop: A collection of textual sequences that will end completions generation. Default - value is None. - :paramtype stop: list[str] - :keyword tools: The available tool definitions that the chat completions request can use, - including caller-defined functions. Default value is None. - :paramtype tools: list[~azure.ai.inference.models.ChatCompletionsToolDefinition] - :keyword tool_choice: If specified, the model will configure which of the provided tools it can - use for the chat completions response. Is either a Union[str, - "_models.ChatCompletionsToolChoicePreset"] type or a ChatCompletionsNamedToolChoice type. - Default value is None. - :paramtype tool_choice: str or ~azure.ai.inference.models.ChatCompletionsToolChoicePreset or - ~azure.ai.inference.models.ChatCompletionsNamedToolChoice - :keyword seed: If specified, the system will make a best effort to sample deterministically - such that repeated requests with the - same seed and parameters should return the same result. Determinism is not guaranteed. - Default value is None. - :paramtype seed: int - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: ChatCompletions for non-streaming, or AsyncIterable[StreamingChatCompletionsUpdate] for streaming. - :rtype: ~azure.ai.inference.models.ChatCompletions or ~azure.ai.inference.models.AsyncStreamingChatCompletions - :raises ~azure.core.exceptions.HttpResponseError: - """ - error_map = { - 401: ClientAuthenticationError, - 404: ResourceNotFoundError, - 409: ResourceExistsError, - 304: ResourceNotModifiedError, - } - error_map.update(kwargs.pop("error_map", {}) or {}) - - _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) - _params = kwargs.pop("params", {}) or {} - _extra_parameters: Union[_models._enums.ExtraParameters, None] = None - - content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) - - internal_response_format = _get_internal_response_format(response_format) - - if body is _Unset: - if messages is _Unset: - raise TypeError("missing required argument: messages") - body = { - "messages": messages, - "stream": stream, - "frequency_penalty": frequency_penalty if frequency_penalty is not None else self._frequency_penalty, - "max_tokens": max_tokens if max_tokens is not None else self._max_tokens, - "model": model if model is not None else self._model, - "presence_penalty": presence_penalty if presence_penalty is not None else self._presence_penalty, - "response_format": ( - internal_response_format if internal_response_format is not None else self._internal_response_format - ), - "seed": seed if seed is not None else self._seed, - "stop": stop if stop is not None else self._stop, - "temperature": temperature if temperature is not None else self._temperature, - "tool_choice": tool_choice if tool_choice is not None else self._tool_choice, - "tools": tools if tools is not None else self._tools, - "top_p": top_p if top_p is not None else self._top_p, - } - if model_extras is not None and bool(model_extras): - body.update(model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - elif self._model_extras is not None and bool(self._model_extras): - body.update(self._model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - body = {k: v for k, v in body.items() if v is not None} - elif isinstance(body, dict) and "stream" in body and isinstance(body["stream"], bool): - stream = body["stream"] - content_type = content_type or "application/json" - _content = None - if isinstance(body, (IOBase, bytes)): - _content = body - else: - _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore - - _request = build_chat_completions_complete_request( - extra_params=_extra_parameters, - content_type=content_type, - api_version=self._config.api_version, - content=_content, - headers=_headers, - params=_params, - ) - path_format_arguments = { - "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), - } - _request.url = self._client.format_url(_request.url, **path_format_arguments) - - _stream = stream or False - pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access - _request, stream=_stream, **kwargs - ) - - response = pipeline_response.http_response - - if response.status_code not in [200]: - if _stream: - await response.read() # Load the body in memory and close the socket - map_error(status_code=response.status_code, response=response, error_map=error_map) - raise HttpResponseError(response=response) - - if _stream: - return _models.AsyncStreamingChatCompletions(response) - - return _deserialize(_models._patch.ChatCompletions, response.json()) # pylint: disable=protected-access - - @distributed_trace_async - async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: - # pylint: disable=line-too-long - """Returns information about the AI model. - The method makes a REST API call to the ``/info`` route on the given endpoint. - This method will only work when using Serverless API or Managed Compute endpoint. - It will not work for GitHub Models endpoint or Azure OpenAI endpoint. - - :return: ModelInfo. The ModelInfo is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.ModelInfo - :raises ~azure.core.exceptions.HttpResponseError: - """ - if not self._model_info: - self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init - return self._model_info - - def __str__(self) -> str: - # pylint: disable=client-method-name-no-double-underscore - return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() - - -class EmbeddingsClient(EmbeddingsClientGenerated): - """EmbeddingsClient. - - :param endpoint: Service host. Required. - :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is either a - AzureKeyCredential type or a AsyncTokenCredential type. Required. - :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials_async.AsyncTokenCredential - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :keyword api_version: The API version to use for this operation. Default value is - "2024-05-01-preview". Note that overriding this default value may result in unsupported - behavior. - :paramtype api_version: str - """ - - def __init__( - self, - endpoint: str, - credential: Union[AzureKeyCredential, "AsyncTokenCredential"], - *, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> None: - - self._model_info: Optional[_models.ModelInfo] = None - - # Store default embeddings settings, to be applied in all future service calls - # unless overridden by arguments in the `embed` method. - self._dimensions = dimensions - self._encoding_format = encoding_format - self._input_type = input_type - self._model = model - self._model_extras = model_extras - - # For Key auth, we need to send these two auth HTTP request headers simultaneously: - # 1. "Authorization: Bearer " - # 2. "api-key: " - # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, - # and Azure OpenAI and the new Unified Inference endpoints support the second header. - # The first header will be taken care of by auto-generated code. - # The second one is added here. - if isinstance(credential, AzureKeyCredential): - headers = kwargs.pop("headers", {}) - if "api-key" not in headers: - headers["api-key"] = credential.key - kwargs["headers"] = headers - - super().__init__(endpoint, credential, **kwargs) - - @overload - async def embed( - self, - *, - input: List[str], - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given text prompts. - The method makes a REST API call to the `/embeddings` route on the given endpoint. - - :keyword input: Input text to embed, encoded as a string or array of tokens. - To embed multiple inputs in a single request, pass an array - of strings or array of token arrays. Required. - :paramtype input: list[str] - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - async def embed( - self, - body: JSON, - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given text prompts. - The method makes a REST API call to the `/embeddings` route on the given endpoint. - - :param body: An object of type MutableMapping[str, Any], such as a dictionary, that - specifies the full request payload. Required. - :type body: JSON - :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. - Default value is "application/json". - :paramtype content_type: str - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - async def embed( - self, - body: IO[bytes], - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given text prompts. - The method makes a REST API call to the `/embeddings` route on the given endpoint. - - :param body: Specifies the full request payload. Required. - :type body: IO[bytes] - :keyword content_type: Body Parameter content-type. Content type parameter for binary body. - Default value is "application/json". - :paramtype content_type: str - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @distributed_trace_async - async def embed( - self, - body: Union[JSON, IO[bytes]] = _Unset, - *, - input: List[str] = _Unset, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.EmbeddingsResult: - # pylint: disable=line-too-long - """Return the embedding vectors for given text prompts. - The method makes a REST API call to the `/embeddings` route on the given endpoint. - - :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type - that specifies the full request payload. Required. - :type body: JSON or IO[bytes] - :keyword input: Input text to embed, encoded as a string or array of tokens. - To embed multiple inputs in a single request, pass an array - of strings or array of token arrays. Required. - :paramtype input: list[str] - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - error_map: MutableMapping[int, Type[HttpResponseError]] = { - 401: ClientAuthenticationError, - 404: ResourceNotFoundError, - 409: ResourceExistsError, - 304: ResourceNotModifiedError, - } - error_map.update(kwargs.pop("error_map", {}) or {}) - - _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) - _params = kwargs.pop("params", {}) or {} - _extra_parameters: Union[_models._enums.ExtraParameters, None] = None - - content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) - - if body is _Unset: - if input is _Unset: - raise TypeError("missing required argument: input") - body = { - "input": input, - "dimensions": dimensions if dimensions is not None else self._dimensions, - "encoding_format": encoding_format if encoding_format is not None else self._encoding_format, - "input_type": input_type if input_type is not None else self._input_type, - "model": model if model is not None else self._model, - } - if model_extras is not None and bool(model_extras): - body.update(model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - elif self._model_extras is not None and bool(self._model_extras): - body.update(self._model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - body = {k: v for k, v in body.items() if v is not None} - content_type = content_type or "application/json" - _content = None - if isinstance(body, (IOBase, bytes)): - _content = body - else: - _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore - - _request = build_embeddings_embed_request( - extra_params=_extra_parameters, - content_type=content_type, - api_version=self._config.api_version, - content=_content, - headers=_headers, - params=_params, - ) - path_format_arguments = { - "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), - } - _request.url = self._client.format_url(_request.url, **path_format_arguments) - - _stream = kwargs.pop("stream", False) - pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access - _request, stream=_stream, **kwargs - ) - - response = pipeline_response.http_response - - if response.status_code not in [200]: - if _stream: - await response.read() # Load the body in memory and close the socket - map_error(status_code=response.status_code, response=response, error_map=error_map) - raise HttpResponseError(response=response) - - if _stream: - deserialized = response.iter_bytes() - else: - deserialized = _deserialize( - _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access - ) - - return deserialized # type: ignore - - @distributed_trace_async - async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: - # pylint: disable=line-too-long - """Returns information about the AI model. - The method makes a REST API call to the ``/info`` route on the given endpoint. - This method will only work when using Serverless API or Managed Compute endpoint. - It will not work for GitHub Models endpoint or Azure OpenAI endpoint. - - :return: ModelInfo. The ModelInfo is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.ModelInfo - :raises ~azure.core.exceptions.HttpResponseError: - """ - if not self._model_info: - self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init - return self._model_info - - def __str__(self) -> str: - # pylint: disable=client-method-name-no-double-underscore - return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() - - -class ImageEmbeddingsClient(ImageEmbeddingsClientGenerated): - """ImageEmbeddingsClient. - - :param endpoint: Service host. Required. - :type endpoint: str - :param credential: Credential used to authenticate requests to the service. Is either a - AzureKeyCredential type or a AsyncTokenCredential type. Required. - :type credential: ~azure.core.credentials.AzureKeyCredential or - ~azure.core.credentials_async.AsyncTokenCredential - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :keyword api_version: The API version to use for this operation. Default value is - "2024-05-01-preview". Note that overriding this default value may result in unsupported - behavior. - :paramtype api_version: str - """ - - def __init__( - self, - endpoint: str, - credential: Union[AzureKeyCredential, "AsyncTokenCredential"], - *, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> None: - - self._model_info: Optional[_models.ModelInfo] = None - - # Store default embeddings settings, to be applied in all future service calls - # unless overridden by arguments in the `embed` method. - self._dimensions = dimensions - self._encoding_format = encoding_format - self._input_type = input_type - self._model = model - self._model_extras = model_extras - - # For Key auth, we need to send these two auth HTTP request headers simultaneously: - # 1. "Authorization: Bearer " - # 2. "api-key: " - # This is because Serverless API, Managed Compute and GitHub endpoints support the first header, - # and Azure OpenAI and the new Unified Inference endpoints support the second header. - # The first header will be taken care of by auto-generated code. - # The second one is added here. - if isinstance(credential, AzureKeyCredential): - headers = kwargs.pop("headers", {}) - if "api-key" not in headers: - headers["api-key"] = credential.key - kwargs["headers"] = headers - - super().__init__(endpoint, credential, **kwargs) - - @overload - async def embed( - self, - *, - input: List[_models.ImageEmbeddingInput], - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given images. - The method makes a REST API call to the `/images/embeddings` route on the given endpoint. - - :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an - array. - The input must not exceed the max input tokens for the model. Required. - :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - async def embed( - self, - body: JSON, - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given images. - The method makes a REST API call to the `/images/embeddings` route on the given endpoint. - - :param body: An object of type MutableMapping[str, Any], such as a dictionary, that - specifies the full request payload. Required. - :type body: JSON - :keyword content_type: Body Parameter content-type. Content type parameter for JSON body. - Default value is "application/json". - :paramtype content_type: str - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @overload - async def embed( - self, - body: IO[bytes], - *, - content_type: str = "application/json", - **kwargs: Any, - ) -> _models.EmbeddingsResult: - """Return the embedding vectors for given images. - The method makes a REST API call to the `/images/embeddings` route on the given endpoint. - - :param body: Specifies the full request payload. Required. - :type body: IO[bytes] - :keyword content_type: Body Parameter content-type. Content type parameter for binary body. - Default value is "application/json". - :paramtype content_type: str - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - - @distributed_trace_async - async def embed( - self, - body: Union[JSON, IO[bytes]] = _Unset, - *, - input: List[_models.ImageEmbeddingInput] = _Unset, - dimensions: Optional[int] = None, - encoding_format: Optional[Union[str, _models.EmbeddingEncodingFormat]] = None, - input_type: Optional[Union[str, _models.EmbeddingInputType]] = None, - model: Optional[str] = None, - model_extras: Optional[Dict[str, Any]] = None, - **kwargs: Any, - ) -> _models.EmbeddingsResult: - # pylint: disable=line-too-long - """Return the embedding vectors for given images. - The method makes a REST API call to the `/images/embeddings` route on the given endpoint. - - :param body: Is either a MutableMapping[str, Any] type (like a dictionary) or a IO[bytes] type - that specifies the full request payload. Required. - :type body: JSON or IO[bytes] - :keyword input: Input image to embed. To embed multiple inputs in a single request, pass an - array. - The input must not exceed the max input tokens for the model. Required. - :paramtype input: list[~azure.ai.inference.models.ImageEmbeddingInput] - :keyword dimensions: Optional. The number of dimensions the resulting output embeddings should - have. Default value is None. - :paramtype dimensions: int - :keyword encoding_format: Optional. The desired format for the returned embeddings. - Known values are: - "base64", "binary", "float", "int8", "ubinary", and "uint8". Default value is None. - :paramtype encoding_format: str or ~azure.ai.inference.models.EmbeddingEncodingFormat - :keyword input_type: Optional. The type of the input. Known values are: - "text", "query", and "document". Default value is None. - :paramtype input_type: str or ~azure.ai.inference.models.EmbeddingInputType - :keyword model: ID of the specific AI model to use, if more than one model is available on the - endpoint. Default value is None. - :paramtype model: str - :keyword model_extras: Additional, model-specific parameters that are not in the - standard request payload. They will be added as-is to the root of the JSON in the request body. - How the service handles these extra parameters depends on the value of the - ``extra-parameters`` request header. Default value is None. - :paramtype model_extras: dict[str, Any] - :return: EmbeddingsResult. The EmbeddingsResult is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.EmbeddingsResult - :raises ~azure.core.exceptions.HttpResponseError: - """ - error_map: MutableMapping[int, Type[HttpResponseError]] = { - 401: ClientAuthenticationError, - 404: ResourceNotFoundError, - 409: ResourceExistsError, - 304: ResourceNotModifiedError, - } - error_map.update(kwargs.pop("error_map", {}) or {}) - - _headers = case_insensitive_dict(kwargs.pop("headers", {}) or {}) - _params = kwargs.pop("params", {}) or {} - _extra_parameters: Union[_models._enums.ExtraParameters, None] = None - - content_type: Optional[str] = kwargs.pop("content_type", _headers.pop("Content-Type", None)) - - if body is _Unset: - if input is _Unset: - raise TypeError("missing required argument: input") - body = { - "input": input, - "dimensions": dimensions if dimensions is not None else self._dimensions, - "encoding_format": encoding_format if encoding_format is not None else self._encoding_format, - "input_type": input_type if input_type is not None else self._input_type, - "model": model if model is not None else self._model, - } - if model_extras is not None and bool(model_extras): - body.update(model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - elif self._model_extras is not None and bool(self._model_extras): - body.update(self._model_extras) - _extra_parameters = _models._enums.ExtraParameters.PASS_THROUGH # pylint: disable=protected-access - body = {k: v for k, v in body.items() if v is not None} - content_type = content_type or "application/json" - _content = None - if isinstance(body, (IOBase, bytes)): - _content = body - else: - _content = json.dumps(body, cls=SdkJSONEncoder, exclude_readonly=True) # type: ignore - - _request = build_image_embeddings_embed_request( - extra_params=_extra_parameters, - content_type=content_type, - api_version=self._config.api_version, - content=_content, - headers=_headers, - params=_params, - ) - path_format_arguments = { - "endpoint": self._serialize.url("self._config.endpoint", self._config.endpoint, "str", skip_quote=True), - } - _request.url = self._client.format_url(_request.url, **path_format_arguments) - - _stream = kwargs.pop("stream", False) - pipeline_response: PipelineResponse = await self._client._pipeline.run( # type: ignore # pylint: disable=protected-access - _request, stream=_stream, **kwargs - ) - - response = pipeline_response.http_response - - if response.status_code not in [200]: - if _stream: - await response.read() # Load the body in memory and close the socket - map_error(status_code=response.status_code, response=response, error_map=error_map) - raise HttpResponseError(response=response) - - if _stream: - deserialized = response.iter_bytes() - else: - deserialized = _deserialize( - _models._patch.EmbeddingsResult, response.json() # pylint: disable=protected-access - ) - - return deserialized # type: ignore - - @distributed_trace_async - async def get_model_info(self, **kwargs: Any) -> _models.ModelInfo: - # pylint: disable=line-too-long - """Returns information about the AI model. - The method makes a REST API call to the ``/info`` route on the given endpoint. - This method will only work when using Serverless API or Managed Compute endpoint. - It will not work for GitHub Models endpoint or Azure OpenAI endpoint. - - :return: ModelInfo. The ModelInfo is compatible with MutableMapping - :rtype: ~azure.ai.inference.models.ModelInfo - :raises ~azure.core.exceptions.HttpResponseError: - """ - if not self._model_info: - self._model_info = await self._get_model_info(**kwargs) # pylint: disable=attribute-defined-outside-init - return self._model_info - - def __str__(self) -> str: - # pylint: disable=client-method-name-no-double-underscore - return super().__str__() + f"\n{self._model_info}" if self._model_info else super().__str__() - - -__all__: List[str] = [ - "load_client", - "ChatCompletionsClient", - "EmbeddingsClient", - "ImageEmbeddingsClient", -] # Add all objects you want publicly available to users at this package level +__all__: List[str] = [] # Add all objects you want publicly available to users at this package level def patch_sdk(): diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py index 8c21439455ca..413f4c7b6e47 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/__init__.py @@ -14,12 +14,18 @@ from ._models import ( # type: ignore + AssistantMessage, ChatChoice, ChatCompletions, ChatCompletionsNamedToolChoice, ChatCompletionsNamedToolChoiceFunction, + ChatCompletionsResponseFormat, + ChatCompletionsResponseFormatJsonObject, + ChatCompletionsResponseFormatJsonSchema, + ChatCompletionsResponseFormatText, ChatCompletionsToolCall, ChatCompletionsToolDefinition, + ChatRequestMessage, ChatResponseMessage, CompletionsUsage, ContentItem, @@ -37,7 +43,10 @@ StreamingChatCompletionsUpdate, StreamingChatResponseMessageUpdate, StreamingChatResponseToolCallUpdate, + SystemMessage, TextContentItem, + ToolMessage, + UserMessage, ) from ._enums import ( # type: ignore @@ -54,12 +63,18 @@ from ._patch import patch_sdk as _patch_sdk __all__ = [ + "AssistantMessage", "ChatChoice", "ChatCompletions", "ChatCompletionsNamedToolChoice", "ChatCompletionsNamedToolChoiceFunction", + "ChatCompletionsResponseFormat", + "ChatCompletionsResponseFormatJsonObject", + "ChatCompletionsResponseFormatJsonSchema", + "ChatCompletionsResponseFormatText", "ChatCompletionsToolCall", "ChatCompletionsToolDefinition", + "ChatRequestMessage", "ChatResponseMessage", "CompletionsUsage", "ContentItem", @@ -77,7 +92,10 @@ "StreamingChatCompletionsUpdate", "StreamingChatResponseMessageUpdate", "StreamingChatResponseToolCallUpdate", + "SystemMessage", "TextContentItem", + "ToolMessage", + "UserMessage", "ChatCompletionsToolChoicePreset", "ChatRole", "CompletionsFinishReason", diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py index dd9c123b8ab9..f1b97e0c1747 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_models.py @@ -19,6 +19,88 @@ from .. import models as _models +class ChatRequestMessage(_model_base.Model): + """An abstract representation of a chat message as provided in a request. + + You probably want to use the sub-classes and not this class directly. Known sub-classes are: + AssistantMessage, SystemMessage, ToolMessage, UserMessage + + All required parameters must be populated in order to send to server. + + :ivar role: The chat role associated with this message. Required. Known values are: "system", + "user", "assistant", and "tool". + :vartype role: str or ~azure.ai.inference.models.ChatRole + """ + + __mapping__: Dict[str, _model_base.Model] = {} + role: str = rest_discriminator(name="role") + """The chat role associated with this message. Required. Known values are: \"system\", \"user\", + \"assistant\", and \"tool\".""" + + @overload + def __init__( + self, + *, + role: str, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + + +class AssistantMessage(ChatRequestMessage, discriminator="assistant"): + """A request chat message representing response or action from the assistant. + + All required parameters must be populated in order to send to server. + + :ivar role: The chat role associated with this message, which is always 'assistant' for + assistant messages. Required. The role that provides responses to system-instructed, + user-prompted input. + :vartype role: str or ~azure.ai.inference.models.ASSISTANT + :ivar content: The content of the message. + :vartype content: str + :ivar tool_calls: The tool calls that must be resolved and have their outputs appended to + subsequent input messages for the chat + completions request to resolve as configured. + :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall] + """ + + role: Literal[ChatRole.ASSISTANT] = rest_discriminator(name="role") # type: ignore + """The chat role associated with this message, which is always 'assistant' for assistant messages. + Required. The role that provides responses to system-instructed, user-prompted input.""" + content: Optional[str] = rest_field() + """The content of the message.""" + tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field() + """The tool calls that must be resolved and have their outputs appended to subsequent input + messages for the chat + completions request to resolve as configured.""" + + @overload + def __init__( + self, + *, + content: Optional[str] = None, + tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, role=ChatRole.ASSISTANT, **kwargs) + + class ChatChoice(_model_base.Model): """The representation of a single prompt completion as part of an overall chat completions request. @@ -78,13 +160,13 @@ class ChatCompletions(_model_base.Model): :vartype created: ~datetime.datetime :ivar model: The model used for the chat completion. Required. :vartype model: str + :ivar usage: Usage information for tokens processed and generated as part of this completions + operation. Required. + :vartype usage: ~azure.ai.inference.models.CompletionsUsage :ivar choices: The collection of completions choices associated with this completions response. Generally, ``n`` choices are generated per provided prompt with a default value of 1. Token limits and other settings may limit the number of choices generated. Required. :vartype choices: list[~azure.ai.inference.models.ChatChoice] - :ivar usage: Usage information for tokens processed and generated as part of this completions - operation. Required. - :vartype usage: ~azure.ai.inference.models.CompletionsUsage """ id: str = rest_field() @@ -94,13 +176,13 @@ class ChatCompletions(_model_base.Model): represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required.""" model: str = rest_field() """The model used for the chat completion. Required.""" + usage: "_models.CompletionsUsage" = rest_field() + """Usage information for tokens processed and generated as part of this completions operation. + Required.""" choices: List["_models.ChatChoice"] = rest_field() """The collection of completions choices associated with this completions response. Generally, ``n`` choices are generated per provided prompt with a default value of 1. Token limits and other settings may limit the number of choices generated. Required.""" - usage: "_models.CompletionsUsage" = rest_field() - """Usage information for tokens processed and generated as part of this completions operation. - Required.""" @overload def __init__( @@ -109,8 +191,8 @@ def __init__( id: str, # pylint: disable=redefined-builtin created: datetime.datetime, model: str, - choices: List["_models.ChatChoice"], usage: "_models.CompletionsUsage", + choices: List["_models.ChatChoice"], ) -> None: ... @overload @@ -130,6 +212,8 @@ class ChatCompletionsNamedToolChoice(_model_base.Model): Readonly variables are only populated by the server, and will be ignored when sending a request. + All required parameters must be populated in order to send to server. + :ivar type: The type of the tool. Currently, only ``function`` is supported. Required. Default value is "function". :vartype type: str @@ -166,6 +250,8 @@ class ChatCompletionsNamedToolChoiceFunction(_model_base.Model): """A tool selection of a specific, named function tool that will limit chat completions to using the named function. + All required parameters must be populated in order to send to server. + :ivar name: The name of the function that should be called. Required. :vartype name: str """ @@ -202,6 +288,8 @@ class ChatCompletionsResponseFormat(_model_base.Model): ChatCompletionsResponseFormatJsonObject, ChatCompletionsResponseFormatJsonSchema, ChatCompletionsResponseFormatText + All required parameters must be populated in order to send to server. + :ivar type: The response format type to use for chat completions. Required. Default value is None. :vartype type: str @@ -235,6 +323,8 @@ class ChatCompletionsResponseFormatJsonObject(ChatCompletionsResponseFormat, dis produce JSON via a system or user message. + All required parameters must be populated in order to send to server. + :ivar type: Response format type: always 'json_object' for this object. Required. Default value is "json_object". :vartype type: str @@ -265,6 +355,8 @@ class ChatCompletionsResponseFormatJsonSchema(ChatCompletionsResponseFormat, dis with a JSON schema specified by the caller. + All required parameters must be populated in order to send to server. + :ivar type: The type of response format being defined: ``json_schema``. Required. Default value is "json_schema". :vartype type: str @@ -301,6 +393,8 @@ class ChatCompletionsResponseFormatText(ChatCompletionsResponseFormat, discrimin """A response format for Chat Completions that emits text responses. This is the default response format. + All required parameters must be populated in order to send to server. + :ivar type: Response format type: always 'text' for this object. Required. Default value is "text". :vartype type: str @@ -373,6 +467,8 @@ class ChatCompletionsToolDefinition(_model_base.Model): Readonly variables are only populated by the server, and will be ignored when sending a request. + All required parameters must be populated in order to send to server. + :ivar type: The type of the tool. Currently, only ``function`` is supported. Required. Default value is "function". :vartype type: str @@ -405,197 +501,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.type: Literal["function"] = "function" -class ChatRequestMessage(_model_base.Model): - """An abstract representation of a chat message as provided in a request. - - You probably want to use the sub-classes and not this class directly. Known sub-classes are: - ChatRequestAssistantMessage, ChatRequestSystemMessage, ChatRequestToolMessage, - ChatRequestUserMessage - - :ivar role: The chat role associated with this message. Required. Known values are: "system", - "user", "assistant", and "tool". - :vartype role: str or ~azure.ai.inference.models.ChatRole - """ - - __mapping__: Dict[str, _model_base.Model] = {} - role: str = rest_discriminator(name="role") - """The chat role associated with this message. Required. Known values are: \"system\", \"user\", - \"assistant\", and \"tool\".""" - - @overload - def __init__( - self, - *, - role: str, - ) -> None: ... - - @overload - def __init__(self, mapping: Mapping[str, Any]) -> None: - """ - :param mapping: raw JSON to initialize the model. - :type mapping: Mapping[str, Any] - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, **kwargs) - - -class ChatRequestAssistantMessage(ChatRequestMessage, discriminator="assistant"): - """A request chat message representing response or action from the assistant. - - :ivar role: The chat role associated with this message, which is always 'assistant' for - assistant messages. Required. The role that provides responses to system-instructed, - user-prompted input. - :vartype role: str or ~azure.ai.inference.models.ASSISTANT - :ivar content: The content of the message. - :vartype content: str - :ivar tool_calls: The tool calls that must be resolved and have their outputs appended to - subsequent input messages for the chat - completions request to resolve as configured. - :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall] - """ - - role: Literal[ChatRole.ASSISTANT] = rest_discriminator(name="role") # type: ignore - """The chat role associated with this message, which is always 'assistant' for assistant messages. - Required. The role that provides responses to system-instructed, user-prompted input.""" - content: Optional[str] = rest_field() - """The content of the message.""" - tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field() - """The tool calls that must be resolved and have their outputs appended to subsequent input - messages for the chat - completions request to resolve as configured.""" - - @overload - def __init__( - self, - *, - content: Optional[str] = None, - tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = None, - ) -> None: ... - - @overload - def __init__(self, mapping: Mapping[str, Any]) -> None: - """ - :param mapping: raw JSON to initialize the model. - :type mapping: Mapping[str, Any] - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, role=ChatRole.ASSISTANT, **kwargs) - - -class ChatRequestSystemMessage(ChatRequestMessage, discriminator="system"): - """A request chat message containing system instructions that influence how the model will - generate a chat completions - response. - - :ivar role: The chat role associated with this message, which is always 'system' for system - messages. Required. The role that instructs or sets the behavior of the assistant. - :vartype role: str or ~azure.ai.inference.models.SYSTEM - :ivar content: The contents of the system message. Required. - :vartype content: str - """ - - role: Literal[ChatRole.SYSTEM] = rest_discriminator(name="role") # type: ignore - """The chat role associated with this message, which is always 'system' for system messages. - Required. The role that instructs or sets the behavior of the assistant.""" - content: str = rest_field() - """The contents of the system message. Required.""" - - @overload - def __init__( - self, - *, - content: str, - ) -> None: ... - - @overload - def __init__(self, mapping: Mapping[str, Any]) -> None: - """ - :param mapping: raw JSON to initialize the model. - :type mapping: Mapping[str, Any] - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, role=ChatRole.SYSTEM, **kwargs) - - -class ChatRequestToolMessage(ChatRequestMessage, discriminator="tool"): - """A request chat message representing requested output from a configured tool. - - :ivar role: The chat role associated with this message, which is always 'tool' for tool - messages. Required. The role that represents extension tool activity within a chat completions - operation. - :vartype role: str or ~azure.ai.inference.models.TOOL - :ivar content: The content of the message. - :vartype content: str - :ivar tool_call_id: The ID of the tool call resolved by the provided content. Required. - :vartype tool_call_id: str - """ - - role: Literal[ChatRole.TOOL] = rest_discriminator(name="role") # type: ignore - """The chat role associated with this message, which is always 'tool' for tool messages. Required. - The role that represents extension tool activity within a chat completions operation.""" - content: Optional[str] = rest_field() - """The content of the message.""" - tool_call_id: str = rest_field() - """The ID of the tool call resolved by the provided content. Required.""" - - @overload - def __init__( - self, - *, - tool_call_id: str, - content: Optional[str] = None, - ) -> None: ... - - @overload - def __init__(self, mapping: Mapping[str, Any]) -> None: - """ - :param mapping: raw JSON to initialize the model. - :type mapping: Mapping[str, Any] - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, role=ChatRole.TOOL, **kwargs) - - -class ChatRequestUserMessage(ChatRequestMessage, discriminator="user"): - """A request chat message representing user input to the assistant. - - :ivar role: The chat role associated with this message, which is always 'user' for user - messages. Required. The role that provides input for chat completions. - :vartype role: str or ~azure.ai.inference.models.USER - :ivar content: The contents of the user message, with available input types varying by selected - model. Required. Is either a str type or a [ContentItem] type. - :vartype content: str or list[~azure.ai.inference.models.ContentItem] - """ - - role: Literal[ChatRole.USER] = rest_discriminator(name="role") # type: ignore - """The chat role associated with this message, which is always 'user' for user messages. Required. - The role that provides input for chat completions.""" - content: Union["str", List["_models.ContentItem"]] = rest_field() - """The contents of the user message, with available input types varying by selected model. - Required. Is either a str type or a [ContentItem] type.""" - - @overload - def __init__( - self, - *, - content: Union[str, List["_models.ContentItem"]], - ) -> None: ... - - @overload - def __init__(self, mapping: Mapping[str, Any]) -> None: - """ - :param mapping: raw JSON to initialize the model. - :type mapping: Mapping[str, Any] - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - super().__init__(*args, role=ChatRole.USER, **kwargs) - - class ChatResponseMessage(_model_base.Model): """A representation of a chat message as received in a response. @@ -691,6 +596,8 @@ class ContentItem(_model_base.Model): You probably want to use the sub-classes and not this class directly. Known sub-classes are: ImageContentItem, TextContentItem + All required parameters must be populated in order to send to server. + :ivar type: The discriminated object type. Required. Default value is None. :vartype type: str """ @@ -730,7 +637,7 @@ class EmbeddingItem(_model_base.Model): :vartype index: int """ - embedding: Union["str", List[float]] = rest_field() + embedding: Union[str, List[float]] = rest_field() """List of embedding values for the input prompt. These represent a measurement of the vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector. Required. Is either a str type or a [float] type.""" @@ -885,6 +792,8 @@ class FunctionDefinition(_model_base.Model): """The definition of a caller-specified function that chat completions may invoke in response to matching user input. + All required parameters must be populated in order to send to server. + :ivar name: The name of the function to be called. Required. :vartype name: str :ivar description: A description of what the function does. The model will use this description @@ -927,6 +836,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: class ImageContentItem(ContentItem, discriminator="image_url"): """A structured chat content item containing an image reference. + All required parameters must be populated in order to send to server. + :ivar type: The discriminated object type: always 'image_url' for this type. Required. Default value is "image_url". :vartype type: str @@ -963,6 +874,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: class ImageEmbeddingInput(_model_base.Model): """Represents an image with optional text. + All required parameters must be populated in order to send to server. + :ivar image: The input image encoded in base64 string as a data URL. Example: ``data:image/{format};base64,{data}``. Required. :vartype image: str @@ -1000,6 +913,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: class ImageUrl(_model_base.Model): """An internet location from which the model may retrieve an image. + All required parameters must be populated in order to send to server. + :ivar url: The URL of the image. Required. :vartype url: str :ivar detail: The evaluation quality setting to use, which controls relative prioritization of @@ -1035,8 +950,10 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: class JsonSchemaFormat(_model_base.Model): - """Defines the response format for chat completions as JSON with a given schema. - The AI model will need to adhere to this schema when generating completions. + """Defines the response format for chat completions as JSON with a given schema. The AI model + will need to adhere to this schema when generating completions. + + All required parameters must be populated in order to send to server. :ivar name: A name that labels this JSON schema. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64. Required. @@ -1044,7 +961,8 @@ class JsonSchemaFormat(_model_base.Model): :ivar schema: The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema. Note that AI models usually only support a subset of the keywords defined by JSON schema. - Consult your AI model documentation to determine what is supported. Required. + Consult your AI model documentation + to determine what is supported. Required. :vartype schema: dict[str, any] :ivar description: A description of the response format, used by the AI model to determine how to generate responses in this format. @@ -1053,8 +971,9 @@ class JsonSchemaFormat(_model_base.Model): keywords not supported by the AI model. An example of such keyword may be ``maxLength`` for JSON type ``string``. - If false, and the provided JSON schema contains keywords not supported by the AI model, - the AI model will not error out. Instead it will ignore the unsupported keywords. + If false, and the provided JSON schema contains keywords not supported + by the AI model, the AI model will not error out. Instead it will ignore the unsupported + keywords. :vartype strict: bool """ @@ -1064,7 +983,8 @@ class JsonSchemaFormat(_model_base.Model): schema: Dict[str, Any] = rest_field() """The definition of the JSON schema. See https://json-schema.org/overview/what-is-jsonschema. Note that AI models usually only support a subset of the keywords defined by JSON schema. - Consult your AI model documentation to determine what is supported. Required.""" + Consult your AI model documentation + to determine what is supported. Required.""" description: Optional[str] = rest_field() """A description of the response format, used by the AI model to determine how to generate responses in this format.""" @@ -1072,8 +992,9 @@ class JsonSchemaFormat(_model_base.Model): """If set to true, the service will error out if the provided JSON schema contains keywords not supported by the AI model. An example of such keyword may be ``maxLength`` for JSON type ``string``. - If false, and the provided JSON schema contains keywords not supported by the AI model, - the AI model will not error out. Instead it will ignore the unsupported keywords.""" + If false, and the provided JSON schema contains keywords not supported + by the AI model, the AI model will not error out. Instead it will ignore the unsupported + keywords.""" @overload def __init__( @@ -1201,14 +1122,14 @@ class StreamingChatCompletionsUpdate(_model_base.Model): :vartype created: ~datetime.datetime :ivar model: The model used for the chat completion. Required. :vartype model: str + :ivar usage: Usage information for tokens processed and generated as part of this completions + operation. Required. + :vartype usage: ~azure.ai.inference.models.CompletionsUsage :ivar choices: An update to the collection of completion choices associated with this completions response. Generally, ``n`` choices are generated per provided prompt with a default value of 1. Token limits and other settings may limit the number of choices generated. Required. :vartype choices: list[~azure.ai.inference.models.StreamingChatChoiceUpdate] - :ivar usage: Usage information for tokens processed and generated as part of this completions - operation. - :vartype usage: ~azure.ai.inference.models.CompletionsUsage """ id: str = rest_field() @@ -1218,12 +1139,13 @@ class StreamingChatCompletionsUpdate(_model_base.Model): represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required.""" model: str = rest_field() """The model used for the chat completion. Required.""" + usage: "_models.CompletionsUsage" = rest_field() + """Usage information for tokens processed and generated as part of this completions operation. + Required.""" choices: List["_models.StreamingChatChoiceUpdate"] = rest_field() """An update to the collection of completion choices associated with this completions response. Generally, ``n`` choices are generated per provided prompt with a default value of 1. Token limits and other settings may limit the number of choices generated. Required.""" - usage: Optional["_models.CompletionsUsage"] = rest_field() - """Usage information for tokens processed and generated as part of this completions operation.""" @overload def __init__( @@ -1232,8 +1154,8 @@ def __init__( id: str, # pylint: disable=redefined-builtin created: datetime.datetime, model: str, + usage: "_models.CompletionsUsage", choices: List["_models.StreamingChatChoiceUpdate"], - usage: Optional["_models.CompletionsUsage"] = None, ) -> None: ... @overload @@ -1325,9 +1247,49 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, **kwargs) +class SystemMessage(ChatRequestMessage, discriminator="system"): + """A request chat message containing system instructions that influence how the model will + generate a chat completions + response. + + All required parameters must be populated in order to send to server. + + :ivar role: The chat role associated with this message, which is always 'system' for system + messages. Required. The role that instructs or sets the behavior of the assistant. + :vartype role: str or ~azure.ai.inference.models.SYSTEM + :ivar content: The contents of the system message. Required. + :vartype content: str + """ + + role: Literal[ChatRole.SYSTEM] = rest_discriminator(name="role") # type: ignore + """The chat role associated with this message, which is always 'system' for system messages. + Required. The role that instructs or sets the behavior of the assistant.""" + content: str = rest_field() + """The contents of the system message. Required.""" + + @overload + def __init__( + self, + *, + content: str, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, role=ChatRole.SYSTEM, **kwargs) + + class TextContentItem(ContentItem, discriminator="text"): """A structured chat content item containing plain text. + All required parameters must be populated in order to send to server. + :ivar type: The discriminated object type: always 'text' for this type. Required. Default value is "text". :vartype type: str @@ -1357,3 +1319,83 @@ def __init__(self, mapping: Mapping[str, Any]) -> None: def __init__(self, *args: Any, **kwargs: Any) -> None: super().__init__(*args, type="text", **kwargs) + + +class ToolMessage(ChatRequestMessage, discriminator="tool"): + """A request chat message representing requested output from a configured tool. + + All required parameters must be populated in order to send to server. + + :ivar role: The chat role associated with this message, which is always 'tool' for tool + messages. Required. The role that represents extension tool activity within a chat completions + operation. + :vartype role: str or ~azure.ai.inference.models.TOOL + :ivar content: The content of the message. + :vartype content: str + :ivar tool_call_id: The ID of the tool call resolved by the provided content. Required. + :vartype tool_call_id: str + """ + + role: Literal[ChatRole.TOOL] = rest_discriminator(name="role") # type: ignore + """The chat role associated with this message, which is always 'tool' for tool messages. Required. + The role that represents extension tool activity within a chat completions operation.""" + content: Optional[str] = rest_field() + """The content of the message.""" + tool_call_id: str = rest_field() + """The ID of the tool call resolved by the provided content. Required.""" + + @overload + def __init__( + self, + *, + tool_call_id: str, + content: Optional[str] = None, + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, role=ChatRole.TOOL, **kwargs) + + +class UserMessage(ChatRequestMessage, discriminator="user"): + """A request chat message representing user input to the assistant. + + All required parameters must be populated in order to send to server. + + :ivar role: The chat role associated with this message, which is always 'user' for user + messages. Required. The role that provides input for chat completions. + :vartype role: str or ~azure.ai.inference.models.USER + :ivar content: The contents of the user message, with available input types varying by selected + model. Required. Is either a str type or a [ContentItem] type. + :vartype content: str or list[~azure.ai.inference.models.ContentItem] + """ + + role: Literal[ChatRole.USER] = rest_discriminator(name="role") # type: ignore + """The chat role associated with this message, which is always 'user' for user messages. Required. + The role that provides input for chat completions.""" + content: Union[str, List["_models.ContentItem"]] = rest_field() + """The contents of the user message, with available input types varying by selected model. + Required. Is either a str type or a [ContentItem] type.""" + + @overload + def __init__( + self, + *, + content: Union[str, List["_models.ContentItem"]], + ) -> None: ... + + @overload + def __init__(self, mapping: Mapping[str, Any]) -> None: + """ + :param mapping: raw JSON to initialize the model. + :type mapping: Mapping[str, Any] + """ + + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, role=ChatRole.USER, **kwargs) diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_patch.py index d44b32e99019..f7dd32510333 100644 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/models/_patch.py +++ b/sdk/ai/azure-ai-inference/azure/ai/inference/models/_patch.py @@ -6,483 +6,9 @@ Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize """ -import asyncio -import base64 -import json -import logging -import queue -import re -import sys +from typing import List -from typing import Mapping, Literal, Any, List, AsyncIterator, Iterator, Optional, Union, overload -from azure.core.rest import HttpResponse, AsyncHttpResponse -from ._enums import ChatRole -from .._model_base import rest_discriminator, rest_field -from ._models import ChatRequestMessage -from ._models import ImageUrl as ImageUrlGenerated -from ._models import ChatCompletions as ChatCompletionsGenerated -from ._models import EmbeddingsResult as EmbeddingsResultGenerated -from ._models import ImageEmbeddingInput as EmbeddingInputGenerated -from .. import models as _models - -if sys.version_info >= (3, 11): - from typing import Self -else: - from typing_extensions import Self - -logger = logging.getLogger(__name__) - - -class UserMessage(ChatRequestMessage, discriminator="user"): - """A request chat message representing user input to the assistant. - - :ivar role: The chat role associated with this message, which is always 'user' for user - messages. Required. The role that provides input for chat completions. - :vartype role: str or ~azure.ai.inference.models.USER - :ivar content: The contents of the user message, with available input types varying by selected - model. Required. Is either a str type or a [ContentItem] type. - :vartype content: str or list[~azure.ai.inference.models.ContentItem] - """ - - role: Literal[ChatRole.USER] = rest_discriminator(name="role") # type: ignore - """The chat role associated with this message, which is always 'user' for user messages. Required. - The role that provides input for chat completions.""" - content: Union["str", List["_models.ContentItem"]] = rest_field() - """The contents of the user message, with available input types varying by selected model. - Required. Is either a str type or a [ContentItem] type.""" - - @overload - def __init__( - self, - content: Union[str, List["_models.ContentItem"]], - ) -> None: ... - - @overload - def __init__(self, mapping: Mapping[str, Any]) -> None: - """ - :param mapping: raw JSON to initialize the model. - :type mapping: Mapping[str, Any] - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - if len(args) == 1 and isinstance(args[0], (List, str)): - if kwargs.get("content") is not None: - raise ValueError("content cannot be provided as positional and keyword arguments") - kwargs["content"] = args[0] - args = tuple() - super().__init__(*args, role=ChatRole.USER, **kwargs) - - -class SystemMessage(ChatRequestMessage, discriminator="system"): - """A request chat message containing system instructions that influence how the model will - generate a chat completions - response. - - :ivar role: The chat role associated with this message, which is always 'system' for system - messages. Required. The role that instructs or sets the behavior of the assistant. - :vartype role: str or ~azure.ai.inference.models.SYSTEM - :ivar content: The contents of the system message. Required. - :vartype content: str - """ - - role: Literal[ChatRole.SYSTEM] = rest_discriminator(name="role") # type: ignore - """The chat role associated with this message, which is always 'system' for system messages. - Required. The role that instructs or sets the behavior of the assistant.""" - content: str = rest_field() - """The contents of the system message. Required.""" - - @overload - def __init__( - self, - content: str, - ) -> None: ... - - @overload - def __init__(self, mapping: Mapping[str, Any]) -> None: - """ - :param mapping: raw JSON to initialize the model. - :type mapping: Mapping[str, Any] - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - if len(args) == 1 and isinstance(args[0], str): - if kwargs.get("content") is not None: - raise ValueError("content cannot be provided as positional and keyword arguments") - kwargs["content"] = args[0] - args = tuple() - super().__init__(*args, role=ChatRole.SYSTEM, **kwargs) - - -class AssistantMessage(ChatRequestMessage, discriminator="assistant"): - """A request chat message representing response or action from the assistant. - - :ivar role: The chat role associated with this message, which is always 'assistant' for - assistant messages. Required. The role that provides responses to system-instructed, - user-prompted input. - :vartype role: str or ~azure.ai.inference.models.ASSISTANT - :ivar content: The content of the message. - :vartype content: str - :ivar tool_calls: The tool calls that must be resolved and have their outputs appended to - subsequent input messages for the chat - completions request to resolve as configured. - :vartype tool_calls: list[~azure.ai.inference.models.ChatCompletionsToolCall] - """ - - role: Literal[ChatRole.ASSISTANT] = rest_discriminator(name="role") # type: ignore - """The chat role associated with this message, which is always 'assistant' for assistant messages. - Required. The role that provides responses to system-instructed, user-prompted input.""" - content: Optional[str] = rest_field() - """The content of the message.""" - tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = rest_field() - """The tool calls that must be resolved and have their outputs appended to subsequent input - messages for the chat - completions request to resolve as configured.""" - - @overload - def __init__( - self, - content: Optional[str] = None, - *, - tool_calls: Optional[List["_models.ChatCompletionsToolCall"]] = None, - ) -> None: ... - - @overload - def __init__(self, mapping: Mapping[str, Any]) -> None: - """ - :param mapping: raw JSON to initialize the model. - :type mapping: Mapping[str, Any] - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - if len(args) == 1 and isinstance(args[0], str): - if kwargs.get("content") is not None: - raise ValueError("content cannot be provided as positional and keyword arguments") - kwargs["content"] = args[0] - args = tuple() - super().__init__(*args, role=ChatRole.ASSISTANT, **kwargs) - - -class ToolMessage(ChatRequestMessage, discriminator="tool"): - """A request chat message representing requested output from a configured tool. - - :ivar role: The chat role associated with this message, which is always 'tool' for tool - messages. Required. The role that represents extension tool activity within a chat completions - operation. - :vartype role: str or ~azure.ai.inference.models.TOOL - :ivar content: The content of the message. - :vartype content: str - :ivar tool_call_id: The ID of the tool call resolved by the provided content. Required. - :vartype tool_call_id: str - """ - - role: Literal[ChatRole.TOOL] = rest_discriminator(name="role") # type: ignore - """The chat role associated with this message, which is always 'tool' for tool messages. Required. - The role that represents extension tool activity within a chat completions operation.""" - content: Optional[str] = rest_field() - """The content of the message.""" - tool_call_id: str = rest_field() - """The ID of the tool call resolved by the provided content. Required.""" - - @overload - def __init__( - self, - content: Optional[str] = None, - *, - tool_call_id: str, - ) -> None: ... - - @overload - def __init__(self, mapping: Mapping[str, Any]) -> None: - """ - :param mapping: raw JSON to initialize the model. - :type mapping: Mapping[str, Any] - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - if len(args) == 1 and isinstance(args[0], str): - if kwargs.get("content") is not None: - raise ValueError("content cannot be provided as positional and keyword arguments") - kwargs["content"] = args[0] - args = tuple() - super().__init__(*args, role=ChatRole.TOOL, **kwargs) - - -class ChatCompletions(ChatCompletionsGenerated): - """Representation of the response data from a chat completions request. - Completions support a wide variety of tasks and generate text that continues from or - "completes" - provided prompt data. - - - :ivar id: A unique identifier associated with this chat completions response. Required. - :vartype id: str - :ivar created: The first timestamp associated with generation activity for this completions - response, - represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970. Required. - :vartype created: ~datetime.datetime - :ivar model: The model used for the chat completion. Required. - :vartype model: str - :ivar usage: Usage information for tokens processed and generated as part of this completions - operation. Required. - :vartype usage: ~azure.ai.inference.models.CompletionsUsage - :ivar choices: The collection of completions choices associated with this completions response. - Generally, ``n`` choices are generated per provided prompt with a default value of 1. - Token limits and other settings may limit the number of choices generated. Required. - :vartype choices: list[~azure.ai.inference.models.ChatChoice] - """ - - def __str__(self) -> str: - # pylint: disable=client-method-name-no-double-underscore - return json.dumps(self.as_dict(), indent=2) - - -class EmbeddingsResult(EmbeddingsResultGenerated): - """Representation of the response data from an embeddings request. - Embeddings measure the relatedness of text strings and are commonly used for search, - clustering, - recommendations, and other similar scenarios. - - - :ivar data: Embedding values for the prompts submitted in the request. Required. - :vartype data: list[~azure.ai.inference.models.EmbeddingItem] - :ivar usage: Usage counts for tokens input using the embeddings API. Required. - :vartype usage: ~azure.ai.inference.models.EmbeddingsUsage - :ivar model: The model ID used to generate this result. Required. - :vartype model: str - """ - - def __str__(self) -> str: - # pylint: disable=client-method-name-no-double-underscore - return json.dumps(self.as_dict(), indent=2) - - -class ImageUrl(ImageUrlGenerated): - - @classmethod - def load( - cls, *, image_file: str, image_format: str, detail: Optional[Union[str, "_models.ImageDetailLevel"]] = None - ) -> Self: - """ - Create an ImageUrl object from a local image file. The method reads the image - file and encodes it as a base64 string, which together with the image format - is then used to format the JSON `url` value passed in the request payload. - - :keyword image_file: The name of the local image file to load. Required. - :paramtype image_file: str - :keyword image_format: The MIME type format of the image. For example: "jpeg", "png". Required. - :paramtype image_format: str - :keyword detail: The evaluation quality setting to use, which controls relative prioritization of - speed, token consumption, and accuracy. Known values are: "auto", "low", and "high". - :paramtype detail: str or ~azure.ai.inference.models.ImageDetailLevel - :return: An ImageUrl object with the image data encoded as a base64 string. - :rtype: ~azure.ai.inference.models.ImageUrl - :raises FileNotFoundError: when the image file could not be opened. - """ - with open(image_file, "rb") as f: - image_data = base64.b64encode(f.read()).decode("utf-8") - url = f"data:image/{image_format};base64,{image_data}" - return cls(url=url, detail=detail) - - -class ImageEmbeddingInput(EmbeddingInputGenerated): - - @classmethod - def load(cls, *, image_file: str, image_format: str, text: Optional[str] = None) -> Self: - """ - Create an ImageEmbeddingInput object from a local image file. The method reads the image - file and encodes it as a base64 string, which together with the image format - is then used to format the JSON `url` value passed in the request payload. - - :keyword image_file: The name of the local image file to load. Required. - :paramtype image_file: str - :keyword image_format: The MIME type format of the image. For example: "jpeg", "png". Required. - :paramtype image_format: str - :keyword text: Optional. The text input to feed into the model (like DINO, CLIP). - Returns a 422 error if the model doesn't support the value or parameter. - :paramtype text: str - :return: An ImageEmbeddingInput object with the image data encoded as a base64 string. - :rtype: ~azure.ai.inference.models.EmbeddingsInput - :raises FileNotFoundError: when the image file could not be opened. - """ - with open(image_file, "rb") as f: - image_data = base64.b64encode(f.read()).decode("utf-8") - image_uri = f"data:image/{image_format};base64,{image_data}" - return cls(image=image_uri, text=text) - - -class BaseStreamingChatCompletions: - """A base class for the sync and async streaming chat completions responses, holding any common code - to deserializes the Server Sent Events (SSE) response stream into chat completions updates, each one - represented by a StreamingChatCompletionsUpdate object. - """ - - # Enable detailed logs of SSE parsing. For development only, should be `False` by default. - _ENABLE_CLASS_LOGS = False - - # The prefix of each line in the SSE stream that contains a JSON string - # to deserialize into a StreamingChatCompletionsUpdate object - _SSE_DATA_EVENT_PREFIX = "data: " - - # The line indicating the end of the SSE stream - _SSE_DATA_EVENT_DONE = "data: [DONE]" - - def __init__(self): - self._queue: "queue.Queue[_models.StreamingChatCompletionsUpdate]" = queue.Queue() - self._incomplete_json = "" - self._done = False # Will be set to True when reading 'data: [DONE]' line - - def _deserialize_and_add_to_queue(self, element: bytes) -> bool: - - # Clear the queue of StreamingChatCompletionsUpdate before processing the next block - self._queue.queue.clear() - - # Convert `bytes` to string and split the string by newline, while keeping the new line char. - # the last may be a partial "line" that does not contain a newline char at the end. - line_list: List[str] = re.split(r"(?<=\n)", element.decode("utf-8")) - for index, line in enumerate(line_list): - - if self._ENABLE_CLASS_LOGS: - logger.debug("[Original line] %s", repr(line)) - - if index == 0: - line = self._incomplete_json + line - self._incomplete_json = "" - - if index == len(line_list) - 1 and not line.endswith("\n"): - self._incomplete_json = line - return False - - if self._ENABLE_CLASS_LOGS: - logger.debug("[Modified line] %s", repr(line)) - - if line == "\n": # Empty line, indicating flush output to client - continue - - if not line.startswith(self._SSE_DATA_EVENT_PREFIX): - raise ValueError(f"SSE event not supported (line `{line}`)") - - if line.startswith(self._SSE_DATA_EVENT_DONE): - if self._ENABLE_CLASS_LOGS: - logger.debug("[Done]") - return True - - # If you reached here, the line should contain `data: {...}\n` - # where the curly braces contain a valid JSON object. - # Deserialize it into a StreamingChatCompletionsUpdate object - # and add it to the queue. - # pylint: disable=W0212 # Access to a protected member _deserialize of a client class - update = _models.StreamingChatCompletionsUpdate._deserialize( - json.loads(line[len(self._SSE_DATA_EVENT_PREFIX) : -1]), [] - ) - - # We skip any update that has a None or empty choices list - # (this is what OpenAI Python SDK does) - if update.choices: - - # We update all empty content strings to None - # (this is what OpenAI Python SDK does) - # for choice in update.choices: - # if not choice.delta.content: - # choice.delta.content = None - - self._queue.put(update) - - if self._ENABLE_CLASS_LOGS: - logger.debug("[Added to queue]") - - return False - - -class StreamingChatCompletions(BaseStreamingChatCompletions): - """Represents an interator over StreamingChatCompletionsUpdate objects. It can be used for either synchronous or - asynchronous iterations. The class deserializes the Server Sent Events (SSE) response stream - into chat completions updates, each one represented by a StreamingChatCompletionsUpdate object. - """ - - def __init__(self, response: HttpResponse): - super().__init__() - self._response = response - self._bytes_iterator: Iterator[bytes] = response.iter_bytes() - - def __iter__(self) -> Any: - return self - - def __next__(self) -> "_models.StreamingChatCompletionsUpdate": - while self._queue.empty() and not self._done: - self._done = self._read_next_block() - if self._queue.empty(): - raise StopIteration - return self._queue.get() - - def _read_next_block(self) -> bool: - if self._ENABLE_CLASS_LOGS: - logger.debug("[Reading next block]") - try: - element = self._bytes_iterator.__next__() - except StopIteration: - self.close() - return True - return self._deserialize_and_add_to_queue(element) - - def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: # type: ignore - self.close() - - def close(self) -> None: - self._response.close() - - -class AsyncStreamingChatCompletions(BaseStreamingChatCompletions): - """Represents an async interator over StreamingChatCompletionsUpdate objects. - It can be used for either synchronous or asynchronous iterations. The class - deserializes the Server Sent Events (SSE) response stream into chat - completions updates, each one represented by a StreamingChatCompletionsUpdate object. - """ - - def __init__(self, response: AsyncHttpResponse): - super().__init__() - self._response = response - self._bytes_iterator: AsyncIterator[bytes] = response.iter_bytes() - - def __aiter__(self) -> Any: - return self - - async def __anext__(self) -> "_models.StreamingChatCompletionsUpdate": - while self._queue.empty() and not self._done: - self._done = await self._read_next_block_async() - if self._queue.empty(): - raise StopAsyncIteration - return self._queue.get() - - async def _read_next_block_async(self) -> bool: - if self._ENABLE_CLASS_LOGS: - logger.debug("[Reading next block]") - try: - element = await self._bytes_iterator.__anext__() - except StopAsyncIteration: - await self.aclose() - return True - return self._deserialize_and_add_to_queue(element) - - def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None: # type: ignore - asyncio.run(self.aclose()) - - async def aclose(self) -> None: - await self._response.close() - - -__all__: List[str] = [ - "AssistantMessage", - "AsyncStreamingChatCompletions", - "ChatCompletions", - "ChatRequestMessage", - "EmbeddingsResult", - "ImageEmbeddingInput", - "ImageUrl", - "StreamingChatCompletions", - "SystemMessage", - "ToolMessage", - "UserMessage", -] # Add all objects you want publicly available to users at this package level +__all__: List[str] = [] # Add all objects you want publicly available to users at this package level def patch_sdk(): diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py deleted file mode 100644 index 2e11b31cb6a4..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# pylint: disable=unused-import -from ._patch import patch_sdk as _patch_sdk, PromptTemplate - -_patch_sdk() diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py deleted file mode 100644 index ec6702995149..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_core.py +++ /dev/null @@ -1,312 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# mypy: disable-error-code="assignment,attr-defined,index,arg-type" -# pylint: disable=line-too-long,R,consider-iterating-dictionary,raise-missing-from,dangerous-default-value -from __future__ import annotations -import os -from dataclasses import dataclass, field, asdict -from pathlib import Path -from typing import Any, AsyncIterator, Dict, Iterator, List, Literal, Union -from ._tracer import Tracer, to_dict -from ._utils import load_json - - -@dataclass -class ToolCall: - id: str - name: str - arguments: str - - -@dataclass -class PropertySettings: - """PropertySettings class to define the properties of the model - - Attributes - ---------- - type : str - The type of the property - default : Any - The default value of the property - description : str - The description of the property - """ - - type: Literal["string", "number", "array", "object", "boolean"] - default: Union[str, int, float, List, Dict, bool, None] = field(default=None) - description: str = field(default="") - - -@dataclass -class ModelSettings: - """ModelSettings class to define the model of the prompty - - Attributes - ---------- - api : str - The api of the model - configuration : Dict - The configuration of the model - parameters : Dict - The parameters of the model - response : Dict - The response of the model - """ - - api: str = field(default="") - configuration: Dict = field(default_factory=dict) - parameters: Dict = field(default_factory=dict) - response: Dict = field(default_factory=dict) - - -@dataclass -class TemplateSettings: - """TemplateSettings class to define the template of the prompty - - Attributes - ---------- - type : str - The type of the template - parser : str - The parser of the template - """ - - type: str = field(default="mustache") - parser: str = field(default="") - - -@dataclass -class Prompty: - """Prompty class to define the prompty - - Attributes - ---------- - name : str - The name of the prompty - description : str - The description of the prompty - authors : List[str] - The authors of the prompty - tags : List[str] - The tags of the prompty - version : str - The version of the prompty - base : str - The base of the prompty - basePrompty : Prompty - The base prompty - model : ModelSettings - The model of the prompty - sample : Dict - The sample of the prompty - inputs : Dict[str, PropertySettings] - The inputs of the prompty - outputs : Dict[str, PropertySettings] - The outputs of the prompty - template : TemplateSettings - The template of the prompty - file : FilePath - The file of the prompty - content : Union[str, List[str], Dict] - The content of the prompty - """ - - # metadata - name: str = field(default="") - description: str = field(default="") - authors: List[str] = field(default_factory=list) - tags: List[str] = field(default_factory=list) - version: str = field(default="") - base: str = field(default="") - basePrompty: Union[Prompty, None] = field(default=None) - # model - model: ModelSettings = field(default_factory=ModelSettings) - - # sample - sample: Dict = field(default_factory=dict) - - # input / output - inputs: Dict[str, PropertySettings] = field(default_factory=dict) - outputs: Dict[str, PropertySettings] = field(default_factory=dict) - - # template - template: TemplateSettings = field(default_factory=TemplateSettings) - - file: Union[Path, str] = field(default="") - content: Union[str, List[str], Dict] = field(default="") - - def to_safe_dict(self) -> Dict[str, Any]: - d = {} - if self.model: - d["model"] = asdict(self.model) - _mask_secrets(d, ["model", "configuration"]) - if self.template: - d["template"] = asdict(self.template) - if self.inputs: - d["inputs"] = {k: asdict(v) for k, v in self.inputs.items()} - if self.outputs: - d["outputs"] = {k: asdict(v) for k, v in self.outputs.items()} - if self.file: - d["file"] = str(self.file.as_posix()) if isinstance(self.file, Path) else self.file - return d - - @staticmethod - def hoist_base_prompty(top: Prompty, base: Prompty) -> Prompty: - top.name = base.name if top.name == "" else top.name - top.description = base.description if top.description == "" else top.description - top.authors = list(set(base.authors + top.authors)) - top.tags = list(set(base.tags + top.tags)) - top.version = base.version if top.version == "" else top.version - - top.model.api = base.model.api if top.model.api == "" else top.model.api - top.model.configuration = param_hoisting(top.model.configuration, base.model.configuration) - top.model.parameters = param_hoisting(top.model.parameters, base.model.parameters) - top.model.response = param_hoisting(top.model.response, base.model.response) - - top.sample = param_hoisting(top.sample, base.sample) - - top.basePrompty = base - - return top - - @staticmethod - def _process_file(file: str, parent: Path) -> Any: - file_path = Path(parent / Path(file)).resolve().absolute() - if file_path.exists(): - items = load_json(file_path) - if isinstance(items, list): - return [Prompty.normalize(value, parent) for value in items] - elif isinstance(items, Dict): - return {key: Prompty.normalize(value, parent) for key, value in items.items()} - else: - return items - else: - raise FileNotFoundError(f"File {file} not found") - - @staticmethod - def _process_env(variable: str, env_error=True, default: Union[str, None] = None) -> Any: - if variable in os.environ.keys(): - return os.environ[variable] - else: - if default: - return default - if env_error: - raise ValueError(f"Variable {variable} not found in environment") - - return "" - - @staticmethod - def normalize(attribute: Any, parent: Path, env_error=True) -> Any: - if isinstance(attribute, str): - attribute = attribute.strip() - if attribute.startswith("${") and attribute.endswith("}"): - # check if env or file - variable = attribute[2:-1].split(":") - if variable[0] == "env" and len(variable) > 1: - return Prompty._process_env( - variable[1], - env_error, - variable[2] if len(variable) > 2 else None, - ) - elif variable[0] == "file" and len(variable) > 1: - return Prompty._process_file(variable[1], parent) - else: - raise ValueError(f"Invalid attribute format ({attribute})") - else: - return attribute - elif isinstance(attribute, list): - return [Prompty.normalize(value, parent) for value in attribute] - elif isinstance(attribute, Dict): - return {key: Prompty.normalize(value, parent) for key, value in attribute.items()} - else: - return attribute - - -def param_hoisting(top: Dict[str, Any], bottom: Dict[str, Any], top_key: Union[str, None] = None) -> Dict[str, Any]: - if top_key: - new_dict = {**top[top_key]} if top_key in top else {} - else: - new_dict = {**top} - for key, value in bottom.items(): - if not key in new_dict: - new_dict[key] = value - return new_dict - - -class PromptyStream(Iterator): - """PromptyStream class to iterate over LLM stream. - Necessary for Prompty to handle streaming data when tracing.""" - - def __init__(self, name: str, iterator: Iterator): - self.name = name - self.iterator = iterator - self.items: List[Any] = [] - self.__name__ = "PromptyStream" - - def __iter__(self): - return self - - def __next__(self): - try: - # enumerate but add to list - o = self.iterator.__next__() - self.items.append(o) - return o - - except StopIteration: - # StopIteration is raised - # contents are exhausted - if len(self.items) > 0: - with Tracer.start("PromptyStream") as trace: - trace("signature", f"{self.name}.PromptyStream") - trace("inputs", "None") - trace("result", [to_dict(s) for s in self.items]) - - raise StopIteration - - -class AsyncPromptyStream(AsyncIterator): - """AsyncPromptyStream class to iterate over LLM stream. - Necessary for Prompty to handle streaming data when tracing.""" - - def __init__(self, name: str, iterator: AsyncIterator): - self.name = name - self.iterator = iterator - self.items: List[Any] = [] - self.__name__ = "AsyncPromptyStream" - - def __aiter__(self): - return self - - async def __anext__(self): - try: - # enumerate but add to list - o = await self.iterator.__anext__() - self.items.append(o) - return o - - except StopAsyncIteration: - # StopIteration is raised - # contents are exhausted - if len(self.items) > 0: - with Tracer.start("AsyncPromptyStream") as trace: - trace("signature", f"{self.name}.AsyncPromptyStream") - trace("inputs", "None") - trace("result", [to_dict(s) for s in self.items]) - - raise StopAsyncIteration - - -def _mask_secrets(d: Dict[str, Any], path: list[str], patterns: list[str] = ["key", "secret"]) -> bool: - sub_d = d - for key in path: - if key not in sub_d: - return False - sub_d = sub_d[key] - - for k, v in sub_d.items(): - if any([pattern in k.lower() for pattern in patterns]): - sub_d[k] = "*" * len(v) - return True diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py deleted file mode 100644 index d682662e7b01..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_invoker.py +++ /dev/null @@ -1,295 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# mypy: disable-error-code="return-value,operator" -# pylint: disable=line-too-long,R,docstring-missing-param,docstring-missing-return,docstring-missing-rtype,unnecessary-pass -import abc -from typing import Any, Callable, Dict, Literal -from ._tracer import trace -from ._core import Prompty - - -class Invoker(abc.ABC): - """Abstract class for Invoker - - Attributes - ---------- - prompty : Prompty - The prompty object - name : str - The name of the invoker - - """ - - def __init__(self, prompty: Prompty) -> None: - self.prompty = prompty - self.name = self.__class__.__name__ - - @abc.abstractmethod - def invoke(self, data: Any) -> Any: - """Abstract method to invoke the invoker - - Parameters - ---------- - data : Any - The data to be invoked - - Returns - ------- - Any - The invoked - """ - pass - - @abc.abstractmethod - async def invoke_async(self, data: Any) -> Any: - """Abstract method to invoke the invoker asynchronously - - Parameters - ---------- - data : Any - The data to be invoked - - Returns - ------- - Any - The invoked - """ - pass - - @trace - def run(self, data: Any) -> Any: - """Method to run the invoker - - Parameters - ---------- - data : Any - The data to be invoked - - Returns - ------- - Any - The invoked - """ - return self.invoke(data) - - @trace - async def run_async(self, data: Any) -> Any: - """Method to run the invoker asynchronously - - Parameters - ---------- - data : Any - The data to be invoked - - Returns - ------- - Any - The invoked - """ - return await self.invoke_async(data) - - -class InvokerFactory: - """Factory class for Invoker""" - - _renderers: Dict[str, Invoker] = {} - _parsers: Dict[str, Invoker] = {} - _executors: Dict[str, Invoker] = {} - _processors: Dict[str, Invoker] = {} - - @classmethod - def add_renderer(cls, name: str, invoker: Invoker) -> None: - cls._renderers[name] = invoker - - @classmethod - def add_parser(cls, name: str, invoker: Invoker) -> None: - cls._parsers[name] = invoker - - @classmethod - def add_executor(cls, name: str, invoker: Invoker) -> None: - cls._executors[name] = invoker - - @classmethod - def add_processor(cls, name: str, invoker: Invoker) -> None: - cls._processors[name] = invoker - - @classmethod - def register_renderer(cls, name: str) -> Callable: - def inner_wrapper(wrapped_class: Invoker) -> Callable: - cls._renderers[name] = wrapped_class - return wrapped_class # type: ignore - - return inner_wrapper - - @classmethod - def register_parser(cls, name: str) -> Callable: - def inner_wrapper(wrapped_class: Invoker) -> Callable: - cls._parsers[name] = wrapped_class - return wrapped_class # type: ignore - - return inner_wrapper - - @classmethod - def register_executor(cls, name: str) -> Callable: - def inner_wrapper(wrapped_class: Invoker) -> Callable: - cls._executors[name] = wrapped_class - return wrapped_class # type: ignore - - return inner_wrapper - - @classmethod - def register_processor(cls, name: str) -> Callable: - def inner_wrapper(wrapped_class: Invoker) -> Callable: - cls._processors[name] = wrapped_class - return wrapped_class # type: ignore - - return inner_wrapper - - @classmethod - def _get_name( - cls, - type: Literal["renderer", "parser", "executor", "processor"], - prompty: Prompty, - ) -> str: - if type == "renderer": - return prompty.template.type - elif type == "parser": - return f"{prompty.template.parser}.{prompty.model.api}" - elif type == "executor": - return prompty.model.configuration["type"] - elif type == "processor": - return prompty.model.configuration["type"] - else: - raise ValueError(f"Type {type} not found") - - @classmethod - def _get_invoker( - cls, - type: Literal["renderer", "parser", "executor", "processor"], - prompty: Prompty, - ) -> Invoker: - if type == "renderer": - name = prompty.template.type - if name not in cls._renderers: - raise ValueError(f"Renderer {name} not found") - - return cls._renderers[name](prompty) # type: ignore - - elif type == "parser": - name = f"{prompty.template.parser}.{prompty.model.api}" - if name not in cls._parsers: - raise ValueError(f"Parser {name} not found") - - return cls._parsers[name](prompty) # type: ignore - - elif type == "executor": - name = prompty.model.configuration["type"] - if name not in cls._executors: - raise ValueError(f"Executor {name} not found") - - return cls._executors[name](prompty) # type: ignore - - elif type == "processor": - name = prompty.model.configuration["type"] - if name not in cls._processors: - raise ValueError(f"Processor {name} not found") - - return cls._processors[name](prompty) # type: ignore - - else: - raise ValueError(f"Type {type} not found") - - @classmethod - def run( - cls, - type: Literal["renderer", "parser", "executor", "processor"], - prompty: Prompty, - data: Any, - default: Any = None, - ): - name = cls._get_name(type, prompty) - if name.startswith("NOOP") and default is not None: - return default - elif name.startswith("NOOP"): - return data - - invoker = cls._get_invoker(type, prompty) - value = invoker.run(data) - return value - - @classmethod - async def run_async( - cls, - type: Literal["renderer", "parser", "executor", "processor"], - prompty: Prompty, - data: Any, - default: Any = None, - ): - name = cls._get_name(type, prompty) - if name.startswith("NOOP") and default is not None: - return default - elif name.startswith("NOOP"): - return data - invoker = cls._get_invoker(type, prompty) - value = await invoker.run_async(data) - return value - - @classmethod - def run_renderer(cls, prompty: Prompty, data: Any, default: Any = None) -> Any: - return cls.run("renderer", prompty, data, default) - - @classmethod - async def run_renderer_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any: - return await cls.run_async("renderer", prompty, data, default) - - @classmethod - def run_parser(cls, prompty: Prompty, data: Any, default: Any = None) -> Any: - return cls.run("parser", prompty, data, default) - - @classmethod - async def run_parser_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any: - return await cls.run_async("parser", prompty, data, default) - - @classmethod - def run_executor(cls, prompty: Prompty, data: Any, default: Any = None) -> Any: - return cls.run("executor", prompty, data, default) - - @classmethod - async def run_executor_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any: - return await cls.run_async("executor", prompty, data, default) - - @classmethod - def run_processor(cls, prompty: Prompty, data: Any, default: Any = None) -> Any: - return cls.run("processor", prompty, data, default) - - @classmethod - async def run_processor_async(cls, prompty: Prompty, data: Any, default: Any = None) -> Any: - return await cls.run_async("processor", prompty, data, default) - - -class InvokerException(Exception): - """Exception class for Invoker""" - - def __init__(self, message: str, type: str) -> None: - super().__init__(message) - self.type = type - - def __str__(self) -> str: - return f"{super().__str__()}. Make sure to pip install any necessary package extras (i.e. could be something like `pip install prompty[{self.type}]`) for {self.type} as well as import the appropriate invokers (i.e. could be something like `import prompty.{self.type}`)." - - -@InvokerFactory.register_renderer("NOOP") -@InvokerFactory.register_parser("NOOP") -@InvokerFactory.register_executor("NOOP") -@InvokerFactory.register_processor("NOOP") -@InvokerFactory.register_parser("prompty.embedding") -@InvokerFactory.register_parser("prompty.image") -@InvokerFactory.register_parser("prompty.completion") -class NoOp(Invoker): - def invoke(self, data: Any) -> Any: - return data - - async def invoke_async(self, data: str) -> Any: - return self.invoke(data) diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py deleted file mode 100644 index f7a0c21d8bb8..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_mustache.py +++ /dev/null @@ -1,671 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# pylint: disable=line-too-long,R,consider-using-dict-items,docstring-missing-return,docstring-missing-rtype,docstring-missing-param,global-statement,unused-argument,global-variable-not-assigned,protected-access,logging-fstring-interpolation,deprecated-method -from __future__ import annotations -import logging -from collections.abc import Iterator, Sequence -from types import MappingProxyType -from typing import ( - Any, - Dict, - List, - Literal, - Mapping, - Optional, - Union, - cast, -) -from typing_extensions import TypeAlias - -logger = logging.getLogger(__name__) - - -Scopes: TypeAlias = List[Union[Literal[False, 0], Mapping[str, Any]]] - - -# Globals -_CURRENT_LINE = 1 -_LAST_TAG_LINE = None - - -class ChevronError(SyntaxError): - """Custom exception for Chevron errors.""" - - -# -# Helper functions -# - - -def grab_literal(template: str, l_del: str) -> tuple[str, str]: - """Parse a literal from the template. - - Args: - template: The template to parse. - l_del: The left delimiter. - - Returns: - Tuple[str, str]: The literal and the template. - """ - - global _CURRENT_LINE - - try: - # Look for the next tag and move the template to it - literal, template = template.split(l_del, 1) - _CURRENT_LINE += literal.count("\n") - return (literal, template) - - # There are no more tags in the template? - except ValueError: - # Then the rest of the template is a literal - return (template, "") - - -def l_sa_check(template: str, literal: str, is_standalone: bool) -> bool: - """Do a preliminary check to see if a tag could be a standalone. - - Args: - template: The template. (Not used.) - literal: The literal. - is_standalone: Whether the tag is standalone. - - Returns: - bool: Whether the tag could be a standalone. - """ - - # If there is a newline, or the previous tag was a standalone - if literal.find("\n") != -1 or is_standalone: - padding = literal.split("\n")[-1] - - # If all the characters since the last newline are spaces - # Then the next tag could be a standalone - # Otherwise it can't be - return padding.isspace() or padding == "" - else: - return False - - -def r_sa_check(template: str, tag_type: str, is_standalone: bool) -> bool: - """Do a final check to see if a tag could be a standalone. - - Args: - template: The template. - tag_type: The type of the tag. - is_standalone: Whether the tag is standalone. - - Returns: - bool: Whether the tag could be a standalone. - """ - - # Check right side if we might be a standalone - if is_standalone and tag_type not in ["variable", "no escape"]: - on_newline = template.split("\n", 1) - - # If the stuff to the right of us are spaces we're a standalone - return on_newline[0].isspace() or not on_newline[0] - - # If we're a tag can't be a standalone - else: - return False - - -def parse_tag(template: str, l_del: str, r_del: str) -> tuple[tuple[str, str], str]: - """Parse a tag from a template. - - Args: - template: The template. - l_del: The left delimiter. - r_del: The right delimiter. - - Returns: - Tuple[Tuple[str, str], str]: The tag and the template. - - Raises: - ChevronError: If the tag is unclosed. - ChevronError: If the set delimiter tag is unclosed. - """ - global _CURRENT_LINE - global _LAST_TAG_LINE - - tag_types = { - "!": "comment", - "#": "section", - "^": "inverted section", - "/": "end", - ">": "partial", - "=": "set delimiter?", - "{": "no escape?", - "&": "no escape", - } - - # Get the tag - try: - tag, template = template.split(r_del, 1) - except ValueError as e: - msg = "unclosed tag " f"at line {_CURRENT_LINE}" - raise ChevronError(msg) from e - - # Find the type meaning of the first character - tag_type = tag_types.get(tag[0], "variable") - - # If the type is not a variable - if tag_type != "variable": - # Then that first character is not needed - tag = tag[1:] - - # If we might be a set delimiter tag - if tag_type == "set delimiter?": - # Double check to make sure we are - if tag.endswith("="): - tag_type = "set delimiter" - # Remove the equal sign - tag = tag[:-1] - - # Otherwise we should complain - else: - msg = "unclosed set delimiter tag\n" f"at line {_CURRENT_LINE}" - raise ChevronError(msg) - - elif ( - # If we might be a no html escape tag - tag_type == "no escape?" - # And we have a third curly brace - # (And are using curly braces as delimiters) - and l_del == "{{" - and r_del == "}}" - and template.startswith("}") - ): - # Then we are a no html escape tag - template = template[1:] - tag_type = "no escape" - - # Strip the whitespace off the key and return - return ((tag_type, tag.strip()), template) - - -# -# The main tokenizing function -# - - -def tokenize(template: str, def_ldel: str = "{{", def_rdel: str = "}}") -> Iterator[tuple[str, str]]: - """Tokenize a mustache template. - - Tokenizes a mustache template in a generator fashion, - using file-like objects. It also accepts a string containing - the template. - - - Arguments: - - template -- a file-like object, or a string of a mustache template - - def_ldel -- The default left delimiter - ("{{" by default, as in spec compliant mustache) - - def_rdel -- The default right delimiter - ("}}" by default, as in spec compliant mustache) - - - Returns: - - A generator of mustache tags in the form of a tuple - - -- (tag_type, tag_key) - - Where tag_type is one of: - * literal - * section - * inverted section - * end - * partial - * no escape - - And tag_key is either the key or in the case of a literal tag, - the literal itself. - """ - - global _CURRENT_LINE, _LAST_TAG_LINE - _CURRENT_LINE = 1 - _LAST_TAG_LINE = None - - is_standalone = True - open_sections = [] - l_del = def_ldel - r_del = def_rdel - - while template: - literal, template = grab_literal(template, l_del) - - # If the template is completed - if not template: - # Then yield the literal and leave - yield ("literal", literal) - break - - # Do the first check to see if we could be a standalone - is_standalone = l_sa_check(template, literal, is_standalone) - - # Parse the tag - tag, template = parse_tag(template, l_del, r_del) - tag_type, tag_key = tag - - # Special tag logic - - # If we are a set delimiter tag - if tag_type == "set delimiter": - # Then get and set the delimiters - dels = tag_key.strip().split(" ") - l_del, r_del = dels[0], dels[-1] - - # If we are a section tag - elif tag_type in ["section", "inverted section"]: - # Then open a new section - open_sections.append(tag_key) - _LAST_TAG_LINE = _CURRENT_LINE - - # If we are an end tag - elif tag_type == "end": - # Then check to see if the last opened section - # is the same as us - try: - last_section = open_sections.pop() - except IndexError as e: - msg = f'Trying to close tag "{tag_key}"\n' "Looks like it was not opened.\n" f"line {_CURRENT_LINE + 1}" - raise ChevronError(msg) from e - if tag_key != last_section: - # Otherwise we need to complain - msg = ( - f'Trying to close tag "{tag_key}"\n' - f'last open tag is "{last_section}"\n' - f"line {_CURRENT_LINE + 1}" - ) - raise ChevronError(msg) - - # Do the second check to see if we're a standalone - is_standalone = r_sa_check(template, tag_type, is_standalone) - - # Which if we are - if is_standalone: - # Remove the stuff before the newline - template = template.split("\n", 1)[-1] - - # Partials need to keep the spaces on their left - if tag_type != "partial": - # But other tags don't - literal = literal.rstrip(" ") - - # Start yielding - # Ignore literals that are empty - if literal != "": - yield ("literal", literal) - - # Ignore comments and set delimiters - if tag_type not in ["comment", "set delimiter?"]: - yield (tag_type, tag_key) - - # If there are any open sections when we're done - if open_sections: - # Then we need to complain - msg = ( - "Unexpected EOF\n" - f'the tag "{open_sections[-1]}" was never closed\n' - f"was opened at line {_LAST_TAG_LINE}" - ) - raise ChevronError(msg) - - -# -# Helper functions -# - - -def _html_escape(string: str) -> str: - """HTML escape all of these " & < >""" - - html_codes = { - '"': """, - "<": "<", - ">": ">", - } - - # & must be handled first - string = string.replace("&", "&") - for char in html_codes: - string = string.replace(char, html_codes[char]) - return string - - -def _get_key( - key: str, - scopes: Scopes, - warn: bool, - keep: bool, - def_ldel: str, - def_rdel: str, -) -> Any: - """Get a key from the current scope""" - - # If the key is a dot - if key == ".": - # Then just return the current scope - return scopes[0] - - # Loop through the scopes - for scope in scopes: - try: - # Return an empty string if falsy, with two exceptions - # 0 should return 0, and False should return False - if scope in (0, False): - return scope - - # For every dot separated key - for child in key.split("."): - # Return an empty string if falsy, with two exceptions - # 0 should return 0, and False should return False - if scope in (0, False): - return scope - # Move into the scope - try: - # Try subscripting (Normal dictionaries) - scope = cast(Dict[str, Any], scope)[child] - except (TypeError, AttributeError): - try: - scope = getattr(scope, child) - except (TypeError, AttributeError): - # Try as a list - scope = scope[int(child)] # type: ignore - - try: - # This allows for custom falsy data types - # https://github.com/noahmorrison/chevron/issues/35 - if scope._CHEVRON_return_scope_when_falsy: # type: ignore - return scope - except AttributeError: - if scope in (0, False): - return scope - return scope or "" - except (AttributeError, KeyError, IndexError, ValueError): - # We couldn't find the key in the current scope - # We'll try again on the next pass - pass - - # We couldn't find the key in any of the scopes - - if warn: - logger.warn(f"Could not find key '{key}'") - - if keep: - return f"{def_ldel} {key} {def_rdel}" - - return "" - - -def _get_partial(name: str, partials_dict: Mapping[str, str]) -> str: - """Load a partial""" - try: - # Maybe the partial is in the dictionary - return partials_dict[name] - except KeyError: - return "" - - -# -# The main rendering function -# -g_token_cache: Dict[str, List[tuple[str, str]]] = {} - -EMPTY_DICT: MappingProxyType[str, str] = MappingProxyType({}) - - -def render( - template: Union[str, List[tuple[str, str]]] = "", - data: Mapping[str, Any] = EMPTY_DICT, - partials_dict: Mapping[str, str] = EMPTY_DICT, - padding: str = "", - def_ldel: str = "{{", - def_rdel: str = "}}", - scopes: Optional[Scopes] = None, - warn: bool = False, - keep: bool = False, -) -> str: - """Render a mustache template. - - Renders a mustache template with a data scope and inline partial capability. - - Arguments: - - template -- A file-like object or a string containing the template. - - data -- A python dictionary with your data scope. - - partials_path -- The path to where your partials are stored. - If set to None, then partials won't be loaded from the file system - (defaults to '.'). - - partials_ext -- The extension that you want the parser to look for - (defaults to 'mustache'). - - partials_dict -- A python dictionary which will be search for partials - before the filesystem is. {'include': 'foo'} is the same - as a file called include.mustache - (defaults to {}). - - padding -- This is for padding partials, and shouldn't be used - (but can be if you really want to). - - def_ldel -- The default left delimiter - ("{{" by default, as in spec compliant mustache). - - def_rdel -- The default right delimiter - ("}}" by default, as in spec compliant mustache). - - scopes -- The list of scopes that get_key will look through. - - warn -- Log a warning when a template substitution isn't found in the data - - keep -- Keep unreplaced tags when a substitution isn't found in the data. - - - Returns: - - A string containing the rendered template. - """ - - # If the template is a sequence but not derived from a string - if isinstance(template, Sequence) and not isinstance(template, str): - # Then we don't need to tokenize it - # But it does need to be a generator - tokens: Iterator[tuple[str, str]] = (token for token in template) - else: - if template in g_token_cache: - tokens = (token for token in g_token_cache[template]) - else: - # Otherwise make a generator - tokens = tokenize(template, def_ldel, def_rdel) - - output = "" - - if scopes is None: - scopes = [data] - - # Run through the tokens - for tag, key in tokens: - # Set the current scope - current_scope = scopes[0] - - # If we're an end tag - if tag == "end": - # Pop out of the latest scope - del scopes[0] - - # If the current scope is falsy and not the only scope - elif not current_scope and len(scopes) != 1: - if tag in ["section", "inverted section"]: - # Set the most recent scope to a falsy value - scopes.insert(0, False) - - # If we're a literal tag - elif tag == "literal": - # Add padding to the key and add it to the output - output += key.replace("\n", "\n" + padding) - - # If we're a variable tag - elif tag == "variable": - # Add the html escaped key to the output - thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel) - if thing is True and key == ".": - # if we've coerced into a boolean by accident - # (inverted tags do this) - # then get the un-coerced object (next in the stack) - thing = scopes[1] - if not isinstance(thing, str): - thing = str(thing) - output += _html_escape(thing) - - # If we're a no html escape tag - elif tag == "no escape": - # Just lookup the key and add it - thing = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel) - if not isinstance(thing, str): - thing = str(thing) - output += thing - - # If we're a section tag - elif tag == "section": - # Get the sections scope - scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel) - - # If the scope is a callable (as described in - # https://mustache.github.io/mustache.5.html) - if callable(scope): - # Generate template text from tags - text = "" - tags: List[tuple[str, str]] = [] - for token in tokens: - if token == ("end", key): - break - - tags.append(token) - tag_type, tag_key = token - if tag_type == "literal": - text += tag_key - elif tag_type == "no escape": - text += f"{def_ldel}& {tag_key} {def_rdel}" - else: - text += "{}{} {}{}".format( - def_ldel, - { - "comment": "!", - "section": "#", - "inverted section": "^", - "end": "/", - "partial": ">", - "set delimiter": "=", - "no escape": "&", - "variable": "", - }[tag_type], - tag_key, - def_rdel, - ) - - g_token_cache[text] = tags - - rend = scope( - text, - lambda template, data=None: render( - template, - data={}, - partials_dict=partials_dict, - padding=padding, - def_ldel=def_ldel, - def_rdel=def_rdel, - scopes=data and [data] + scopes or scopes, - warn=warn, - keep=keep, - ), - ) - - output += rend # type: ignore[reportOperatorIssue] - - # If the scope is a sequence, an iterator or generator but not - # derived from a string - elif isinstance(scope, (Sequence, Iterator)) and not isinstance(scope, str): - # Then we need to do some looping - - # Gather up all the tags inside the section - # (And don't be tricked by nested end tags with the same key) - # TODO: This feels like it still has edge cases, no? - tags = [] - tags_with_same_key = 0 - for token in tokens: - if token == ("section", key): - tags_with_same_key += 1 - if token == ("end", key): - tags_with_same_key -= 1 - if tags_with_same_key < 0: - break - tags.append(token) - - # For every item in the scope - for thing in scope: - # Append it as the most recent scope and render - new_scope = [thing] + scopes - rend = render( - template=tags, - scopes=new_scope, - padding=padding, - partials_dict=partials_dict, - def_ldel=def_ldel, - def_rdel=def_rdel, - warn=warn, - keep=keep, - ) - - output += rend - - else: - # Otherwise we're just a scope section - scopes.insert(0, scope) # type: ignore[reportArgumentType] - - # If we're an inverted section - elif tag == "inverted section": - # Add the flipped scope to the scopes - scope = _get_key(key, scopes, warn=warn, keep=keep, def_ldel=def_ldel, def_rdel=def_rdel) - scopes.insert(0, cast(Literal[False], not scope)) - - # If we're a partial - elif tag == "partial": - # Load the partial - partial = _get_partial(key, partials_dict) - - # Find what to pad the partial with - left = output.rpartition("\n")[2] - part_padding = padding - if left.isspace(): - part_padding += left - - # Render the partial - part_out = render( - template=partial, - partials_dict=partials_dict, - def_ldel=def_ldel, - def_rdel=def_rdel, - padding=part_padding, - scopes=scopes, - warn=warn, - keep=keep, - ) - - # If the partial was indented - if left.isspace(): - # then remove the spaces from the end - part_out = part_out.rstrip(" \t") - - # Add the partials output to the output - output += part_out - - return output diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py deleted file mode 100644 index de3c570e5c89..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_parsers.py +++ /dev/null @@ -1,156 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# mypy: disable-error-code="union-attr,return-value" -# pylint: disable=line-too-long,R,consider-using-enumerate,docstring-missing-param,docstring-missing-return,docstring-missing-rtype -import re -import base64 -from pathlib import Path -from typing import Any, Union -from ._core import Prompty -from ._invoker import Invoker, InvokerFactory - - -ROLES = ["assistant", "function", "system", "user"] - - -@InvokerFactory.register_parser("prompty.chat") -class PromptyChatParser(Invoker): - """Prompty Chat Parser""" - - def __init__(self, prompty: Prompty) -> None: - super().__init__(prompty) - self.path = Path(self.prompty.file).parent - - def invoke(self, data: str) -> Any: - return invoke_parser(self.path, data) - - async def invoke_async(self, data: str) -> Any: - """Invoke the Prompty Chat Parser (Async) - - Parameters - ---------- - data : str - The data to parse - - Returns - ------- - str - The parsed data - """ - return self.invoke(data) - - -def _inline_image(path: Union[Path, None], image_item: str) -> str: - """Inline Image - - Parameters - ---------- - image_item : str - The image item to inline - - Returns - ------- - str - The inlined image - """ - # pass through if it's a url or base64 encoded or the path is None - if image_item.startswith("http") or image_item.startswith("data") or path is None: - return image_item - # otherwise, it's a local file - need to base64 encode it - else: - image_path = (path if path is not None else Path(".")) / image_item - with open(image_path, "rb") as f: - base64_image = base64.b64encode(f.read()).decode("utf-8") - - if image_path.suffix == ".png": - return f"data:image/png;base64,{base64_image}" - elif image_path.suffix == ".jpg": - return f"data:image/jpeg;base64,{base64_image}" - elif image_path.suffix == ".jpeg": - return f"data:image/jpeg;base64,{base64_image}" - else: - raise ValueError( - f"Invalid image format {image_path.suffix} - currently only .png and .jpg / .jpeg are supported." - ) - - -def _parse_content(path: Union[Path, None], content: str): - """for parsing inline images - - Parameters - ---------- - content : str - The content to parse - - Returns - ------- - any - The parsed content - """ - # regular expression to parse markdown images - image = r"(?P!\[[^\]]*\])\((?P.*?)(?=\"|\))\)" - matches = re.findall(image, content, flags=re.MULTILINE) - if len(matches) > 0: - content_items = [] - content_chunks = re.split(image, content, flags=re.MULTILINE) - current_chunk = 0 - for i in range(len(content_chunks)): - # image entry - if current_chunk < len(matches) and content_chunks[i] == matches[current_chunk][0]: - content_items.append( - { - "type": "image_url", - "image_url": {"url": _inline_image(path, matches[current_chunk][1].split(" ")[0].strip())}, - } - ) - # second part of image entry - elif current_chunk < len(matches) and content_chunks[i] == matches[current_chunk][1]: - current_chunk += 1 - # text entry - else: - if len(content_chunks[i].strip()) > 0: - content_items.append({"type": "text", "text": content_chunks[i].strip()}) - return content_items - else: - return content - - -def invoke_parser(path: Union[Path, None], data: str) -> Any: - """Invoke the Prompty Chat Parser - - Parameters - ---------- - data : str - The data to parse - - Returns - ------- - str - The parsed data - """ - messages = [] - separator = r"(?i)^\s*#?\s*(" + "|".join(ROLES) + r")\s*:\s*\n" - - # get valid chunks - remove empty items - chunks = [item for item in re.split(separator, data, flags=re.MULTILINE) if len(item.strip()) > 0] - - # if no starter role, then inject system role - if not chunks[0].strip().lower() in ROLES: - chunks.insert(0, "system") - - # if last chunk is role entry, then remove (no content?) - if chunks[-1].strip().lower() in ROLES: - chunks.pop() - - if len(chunks) % 2 != 0: - raise ValueError("Invalid prompt format") - - # create messages - for i in range(0, len(chunks), 2): - role = chunks[i].strip().lower() - content = chunks[i + 1].strip() - messages.append({"role": role, "content": _parse_content(path, content)}) - - return messages diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py deleted file mode 100644 index 14ad4f62b4c1..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_patch.py +++ /dev/null @@ -1,124 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# pylint: disable=line-too-long,R -"""Customize generated code here. - -Follow our quickstart for examples: https://aka.ms/azsdk/python/dpcodegen/python/customize -""" - -import traceback -from pathlib import Path -from typing import Any, Dict, List, Optional -from typing_extensions import Self -from ._core import Prompty -from ._mustache import render -from ._parsers import invoke_parser -from ._prompty_utils import load, prepare -from ._utils import remove_leading_empty_space - - -class PromptTemplate: - """The helper class which takes variant of inputs, e.g. Prompty format or string, and returns the parsed prompt in an array.""" - - @classmethod - def from_prompty(cls, file_path: str) -> Self: - """Initialize a PromptTemplate object from a prompty file. - - :param file_path: The path to the prompty file. - :type file_path: str - :return: The PromptTemplate object. - :rtype: PromptTemplate - """ - if not file_path: - raise ValueError("Please provide file_path") - - # Get the absolute path of the file by `traceback.extract_stack()`, it's "-2" because: - # In the stack, the last function is the current function. - # The second last function is the caller function, which is the root of the file_path. - stack = traceback.extract_stack() - caller = Path(stack[-2].filename) - abs_file_path = Path(caller.parent / Path(file_path)).resolve().absolute() - - prompty = load(str(abs_file_path)) - return cls(prompty=prompty) - - @classmethod - def from_string(cls, prompt_template: str, api: str = "chat", model_name: Optional[str] = None) -> Self: - """Initialize a PromptTemplate object from a message template. - - :param prompt_template: The prompt template string. - :type prompt_template: str - :param api: The API type, e.g. "chat" or "completion". - :type api: str - :param model_name: The model name, e.g. "gpt-4o-mini". - :type model_name: str - :return: The PromptTemplate object. - :rtype: PromptTemplate - """ - return cls( - api=api, - prompt_template=prompt_template, - model_name=model_name, - prompty=None, - ) - - def __init__( - self, - *, - api: str = "chat", - prompty: Optional[Prompty] = None, - prompt_template: Optional[str] = None, - model_name: Optional[str] = None, - ) -> None: - self.prompty = prompty - if self.prompty is not None: - self.model_name = ( - self.prompty.model.configuration["azure_deployment"] - if "azure_deployment" in self.prompty.model.configuration - else None - ) - self.parameters = self.prompty.model.parameters - self._config = {} - elif prompt_template is not None: - self.model_name = model_name - self.parameters = {} - # _config is a dict to hold the internal configuration - self._config = { - "api": api if api is not None else "chat", - "prompt_template": prompt_template, - } - else: - raise ValueError("Please pass valid arguments for PromptTemplate") - - def create_messages(self, data: Optional[Dict[str, Any]] = None, **kwargs) -> List[Dict[str, Any]]: - """Render the prompt template with the given data. - - :param data: The data to render the prompt template with. - :type data: Optional[Dict[str, Any]] - :return: The rendered prompt template. - :rtype: List[Dict[str, Any]] - """ - if data is None: - data = kwargs - - if self.prompty is not None: - parsed = prepare(self.prompty, data) - return parsed - elif "prompt_template" in self._config: - prompt_template = remove_leading_empty_space(self._config["prompt_template"]) - system_prompt_str = render(prompt_template, data) - parsed = invoke_parser(None, system_prompt_str) - return parsed - else: - raise ValueError("Please provide valid prompt template") - - -def patch_sdk(): - """Do not remove from this file. - - `patch_sdk` is a last resort escape hatch that allows you to do customizations - you can't accomplish using the techniques described in - https://aka.ms/azsdk/python/dpcodegen/python/customize - """ diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py deleted file mode 100644 index 5ea38bda6229..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_prompty_utils.py +++ /dev/null @@ -1,415 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# mypy: disable-error-code="assignment" -# pylint: disable=R,docstring-missing-param,docstring-missing-return,docstring-missing-rtype,dangerous-default-value,redefined-outer-name,unused-wildcard-import,wildcard-import,raise-missing-from -import traceback -from pathlib import Path -from typing import Any, Dict, List, Union -from ._tracer import trace -from ._invoker import InvokerFactory -from ._core import ( - ModelSettings, - Prompty, - PropertySettings, - TemplateSettings, - param_hoisting, -) -from ._utils import ( - load_global_config, - load_prompty, -) - -from ._renderers import * -from ._parsers import * - - -@trace(description="Create a headless prompty object for programmatic use.") -def headless( - api: str, - content: Union[str, List[str], dict], - configuration: Dict[str, Any] = {}, - parameters: Dict[str, Any] = {}, - connection: str = "default", -) -> Prompty: - """Create a headless prompty object for programmatic use. - - Parameters - ---------- - api : str - The API to use for the model - content : Union[str, List[str], dict] - The content to process - configuration : Dict[str, Any], optional - The configuration to use, by default {} - parameters : Dict[str, Any], optional - The parameters to use, by default {} - connection : str, optional - The connection to use, by default "default" - - Returns - ------- - Prompty - The headless prompty object - - Example - ------- - >>> import prompty - >>> p = prompty.headless( - api="embedding", - configuration={"type": "azure", "azure_deployment": "text-embedding-ada-002"}, - content="hello world", - ) - >>> emb = prompty.execute(p) - - """ - - # get caller's path (to get relative path for prompty.json) - caller = Path(traceback.extract_stack()[-2].filename) - templateSettings = TemplateSettings(type="NOOP", parser="NOOP") - modelSettings = ModelSettings( - api=api, - configuration=Prompty.normalize( - param_hoisting(configuration, load_global_config(caller.parent, connection)), - caller.parent, - ), - parameters=parameters, - ) - - return Prompty(model=modelSettings, template=templateSettings, content=content) - - -def _load_raw_prompty(attributes: dict, content: str, p: Path, global_config: dict): - if "model" not in attributes: - attributes["model"] = {} - - if "configuration" not in attributes["model"]: - attributes["model"]["configuration"] = global_config - else: - attributes["model"]["configuration"] = param_hoisting( - attributes["model"]["configuration"], - global_config, - ) - - # pull model settings out of attributes - try: - model = ModelSettings(**attributes.pop("model")) - except Exception as e: - raise ValueError(f"Error in model settings: {e}") - - # pull template settings - try: - if "template" in attributes: - t = attributes.pop("template") - if isinstance(t, dict): - template = TemplateSettings(**t) - # has to be a string denoting the type - else: - template = TemplateSettings(type=t, parser="prompty") - else: - template = TemplateSettings(type="mustache", parser="prompty") - except Exception as e: - raise ValueError(f"Error in template loader: {e}") - - # formalize inputs and outputs - if "inputs" in attributes: - try: - inputs = {k: PropertySettings(**v) for (k, v) in attributes.pop("inputs").items()} - except Exception as e: - raise ValueError(f"Error in inputs: {e}") - else: - inputs = {} - if "outputs" in attributes: - try: - outputs = {k: PropertySettings(**v) for (k, v) in attributes.pop("outputs").items()} - except Exception as e: - raise ValueError(f"Error in outputs: {e}") - else: - outputs = {} - - prompty = Prompty( - **attributes, - model=model, - inputs=inputs, - outputs=outputs, - template=template, - content=content, - file=p, - ) - - return prompty - - -@trace(description="Load a prompty file.") -def load(prompty_file: Union[str, Path], configuration: str = "default") -> Prompty: - """Load a prompty file. - - Parameters - ---------- - prompty_file : Union[str, Path] - The path to the prompty file - configuration : str, optional - The configuration to use, by default "default" - - Returns - ------- - Prompty - The loaded prompty object - - Example - ------- - >>> import prompty - >>> p = prompty.load("prompts/basic.prompty") - >>> print(p) - """ - - p = Path(prompty_file) - if not p.is_absolute(): - # get caller's path (take into account trace frame) - caller = Path(traceback.extract_stack()[-3].filename) - p = Path(caller.parent / p).resolve().absolute() - - # load dictionary from prompty file - matter = load_prompty(p) - - attributes = matter["attributes"] - content = matter["body"] - - # normalize attribute dictionary resolve keys and files - attributes = Prompty.normalize(attributes, p.parent) - - # load global configuration - global_config = Prompty.normalize(load_global_config(p.parent, configuration), p.parent) - - prompty = _load_raw_prompty(attributes, content, p, global_config) - - # recursive loading of base prompty - if "base" in attributes: - # load the base prompty from the same directory as the current prompty - base = load(p.parent / attributes["base"]) - prompty = Prompty.hoist_base_prompty(prompty, base) - - return prompty - - -@trace(description="Prepare the inputs for the prompt.") -def prepare( - prompt: Prompty, - inputs: Dict[str, Any] = {}, -): - """Prepare the inputs for the prompt. - - Parameters - ---------- - prompt : Prompty - The prompty object - inputs : Dict[str, Any], optional - The inputs to the prompt, by default {} - - Returns - ------- - dict - The prepared and hidrated template shaped to the LLM model - - Example - ------- - >>> import prompty - >>> p = prompty.load("prompts/basic.prompty") - >>> inputs = {"name": "John Doe"} - >>> content = prompty.prepare(p, inputs) - """ - inputs = param_hoisting(inputs, prompt.sample) - - render = InvokerFactory.run_renderer(prompt, inputs, prompt.content) - result = InvokerFactory.run_parser(prompt, render) - - return result - - -@trace(description="Prepare the inputs for the prompt.") -async def prepare_async( - prompt: Prompty, - inputs: Dict[str, Any] = {}, -): - """Prepare the inputs for the prompt. - - Parameters - ---------- - prompt : Prompty - The prompty object - inputs : Dict[str, Any], optional - The inputs to the prompt, by default {} - - Returns - ------- - dict - The prepared and hidrated template shaped to the LLM model - - Example - ------- - >>> import prompty - >>> p = prompty.load("prompts/basic.prompty") - >>> inputs = {"name": "John Doe"} - >>> content = await prompty.prepare_async(p, inputs) - """ - inputs = param_hoisting(inputs, prompt.sample) - - render = await InvokerFactory.run_renderer_async(prompt, inputs, prompt.content) - result = await InvokerFactory.run_parser_async(prompt, render) - - return result - - -@trace(description="Run the prepared Prompty content against the model.") -def run( - prompt: Prompty, - content: Union[dict, list, str], - configuration: Dict[str, Any] = {}, - parameters: Dict[str, Any] = {}, - raw: bool = False, -): - """Run the prepared Prompty content. - - Parameters - ---------- - prompt : Prompty - The prompty object - content : Union[dict, list, str] - The content to process - configuration : Dict[str, Any], optional - The configuration to use, by default {} - parameters : Dict[str, Any], optional - The parameters to use, by default {} - raw : bool, optional - Whether to skip processing, by default False - - Returns - ------- - Any - The result of the prompt - - Example - ------- - >>> import prompty - >>> p = prompty.load("prompts/basic.prompty") - >>> inputs = {"name": "John Doe"} - >>> content = prompty.prepare(p, inputs) - >>> result = prompty.run(p, content) - """ - - if configuration != {}: - prompt.model.configuration = param_hoisting(configuration, prompt.model.configuration) - - if parameters != {}: - prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters) - - result = InvokerFactory.run_executor(prompt, content) - if not raw: - result = InvokerFactory.run_processor(prompt, result) - - return result - - -@trace(description="Run the prepared Prompty content against the model.") -async def run_async( - prompt: Prompty, - content: Union[dict, list, str], - configuration: Dict[str, Any] = {}, - parameters: Dict[str, Any] = {}, - raw: bool = False, -): - """Run the prepared Prompty content. - - Parameters - ---------- - prompt : Prompty - The prompty object - content : Union[dict, list, str] - The content to process - configuration : Dict[str, Any], optional - The configuration to use, by default {} - parameters : Dict[str, Any], optional - The parameters to use, by default {} - raw : bool, optional - Whether to skip processing, by default False - - Returns - ------- - Any - The result of the prompt - - Example - ------- - >>> import prompty - >>> p = prompty.load("prompts/basic.prompty") - >>> inputs = {"name": "John Doe"} - >>> content = await prompty.prepare_async(p, inputs) - >>> result = await prompty.run_async(p, content) - """ - - if configuration != {}: - prompt.model.configuration = param_hoisting(configuration, prompt.model.configuration) - - if parameters != {}: - prompt.model.parameters = param_hoisting(parameters, prompt.model.parameters) - - result = await InvokerFactory.run_executor_async(prompt, content) - if not raw: - result = await InvokerFactory.run_processor_async(prompt, result) - - return result - - -@trace(description="Execute a prompty") -def execute( - prompt: Union[str, Prompty], - configuration: Dict[str, Any] = {}, - parameters: Dict[str, Any] = {}, - inputs: Dict[str, Any] = {}, - raw: bool = False, - config_name: str = "default", -): - """Execute a prompty. - - Parameters - ---------- - prompt : Union[str, Prompty] - The prompty object or path to the prompty file - configuration : Dict[str, Any], optional - The configuration to use, by default {} - parameters : Dict[str, Any], optional - The parameters to use, by default {} - inputs : Dict[str, Any], optional - The inputs to the prompt, by default {} - raw : bool, optional - Whether to skip processing, by default False - connection : str, optional - The connection to use, by default "default" - - Returns - ------- - Any - The result of the prompt - - Example - ------- - >>> import prompty - >>> inputs = {"name": "John Doe"} - >>> result = prompty.execute("prompts/basic.prompty", inputs=inputs) - """ - if isinstance(prompt, str): - path = Path(prompt) - if not path.is_absolute(): - # get caller's path (take into account trace frame) - caller = Path(traceback.extract_stack()[-3].filename) - path = Path(caller.parent / path).resolve().absolute() - prompt = load(path, config_name) - - # prepare content - content = prepare(prompt, inputs) - - # run LLM model - result = run(prompt, content, configuration, parameters, raw) - - return result diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py deleted file mode 100644 index 0d682a7fe151..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_renderers.py +++ /dev/null @@ -1,30 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# mypy: disable-error-code="union-attr,assignment,arg-type" -from pathlib import Path -from ._core import Prompty -from ._invoker import Invoker, InvokerFactory -from ._mustache import render - - -@InvokerFactory.register_renderer("mustache") -class MustacheRenderer(Invoker): - """Render a mustache template.""" - - def __init__(self, prompty: Prompty) -> None: - super().__init__(prompty) - self.templates = {} - cur_prompt = self.prompty - while cur_prompt: - self.templates[Path(cur_prompt.file).name] = cur_prompt.content - cur_prompt = cur_prompt.basePrompty - self.name = Path(self.prompty.file).name - - def invoke(self, data: str) -> str: - generated = render(self.prompty.content, data) # type: ignore - return generated - - async def invoke_async(self, data: str) -> str: - return self.invoke(data) diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py deleted file mode 100644 index 24f800b465f4..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_tracer.py +++ /dev/null @@ -1,316 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# mypy: disable-error-code="union-attr,arg-type,misc,return-value,assignment,func-returns-value" -# pylint: disable=R,redefined-outer-name,bare-except,unspecified-encoding -import os -import json -import inspect -import traceback -import importlib -import contextlib -from pathlib import Path -from numbers import Number -from datetime import datetime -from functools import wraps, partial -from typing import Any, Callable, Dict, Iterator, List, Union - - -# clean up key value pairs for sensitive values -def sanitize(key: str, value: Any) -> Any: - if isinstance(value, str) and any([s in key.lower() for s in ["key", "token", "secret", "password", "credential"]]): - return len(str(value)) * "*" - - if isinstance(value, dict): - return {k: sanitize(k, v) for k, v in value.items()} - - return value - - -class Tracer: - _tracers: Dict[str, Callable[[str], Iterator[Callable[[str, Any], None]]]] = {} - - @classmethod - def add(cls, name: str, tracer: Callable[[str], Iterator[Callable[[str, Any], None]]]) -> None: - cls._tracers[name] = tracer - - @classmethod - def clear(cls) -> None: - cls._tracers = {} - - @classmethod - @contextlib.contextmanager - def start(cls, name: str) -> Iterator[Callable[[str, Any], None]]: - with contextlib.ExitStack() as stack: - traces: List[Any] = [stack.enter_context(tracer(name)) for tracer in cls._tracers.values()] # type: ignore - yield lambda key, value: [ # type: ignore - # normalize and sanitize any trace values - trace(key, sanitize(key, to_dict(value))) - for trace in traces - ] - - -def to_dict(obj: Any) -> Union[Dict[str, Any], List[Dict[str, Any]], str, Number, bool]: - # simple json types - if isinstance(obj, str) or isinstance(obj, Number) or isinstance(obj, bool): - return obj - - # datetime - if isinstance(obj, datetime): - return obj.isoformat() - - # safe Prompty obj serialization - if type(obj).__name__ == "Prompty": - return obj.to_safe_dict() - - # safe PromptyStream obj serialization - if type(obj).__name__ == "PromptyStream": - return "PromptyStream" - - if type(obj).__name__ == "AsyncPromptyStream": - return "AsyncPromptyStream" - - # recursive list and dict - if isinstance(obj, List): - return [to_dict(item) for item in obj] # type: ignore - - if isinstance(obj, Dict): - return {k: v if isinstance(v, str) else to_dict(v) for k, v in obj.items()} - - if isinstance(obj, Path): - return str(obj) - - # cast to string otherwise... - return str(obj) - - -def _name(func: Callable, args): - if hasattr(func, "__qualname__"): - signature = f"{func.__module__}.{func.__qualname__}" - else: - signature = f"{func.__module__}.{func.__name__}" - - # core invoker gets special treatment prompty.invoker.Invoker - core_invoker = signature.startswith("prompty.invoker.Invoker.run") - if core_invoker: - name = type(args[0]).__name__ - if signature.endswith("async"): - signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke_async" - else: - signature = f"{args[0].__module__}.{args[0].__class__.__name__}.invoke" - else: - name = func.__name__ - - return name, signature - - -def _inputs(func: Callable, args, kwargs) -> dict: - ba = inspect.signature(func).bind(*args, **kwargs) - ba.apply_defaults() - - inputs = {k: to_dict(v) for k, v in ba.arguments.items() if k != "self"} - - return inputs - - -def _results(result: Any) -> Union[Dict, List[Dict], str, Number, bool]: - return to_dict(result) if result is not None else "None" - - -def _trace_sync(func: Union[Callable, None] = None, **okwargs: Any) -> Callable: - - @wraps(func) # type: ignore - def wrapper(*args, **kwargs): - name, signature = _name(func, args) # type: ignore - with Tracer.start(name) as trace: - trace("signature", signature) - - # support arbitrary keyword - # arguments for trace decorator - for k, v in okwargs.items(): - trace(k, to_dict(v)) - - inputs = _inputs(func, args, kwargs) # type: ignore - trace("inputs", inputs) - - try: - result = func(*args, **kwargs) # type: ignore - trace("result", _results(result)) - except Exception as e: - trace( - "result", - { - "exception": { - "type": type(e), - "traceback": (traceback.format_tb(tb=e.__traceback__) if e.__traceback__ else None), - "message": str(e), - "args": to_dict(e.args), - } - }, - ) - raise e - - return result - - return wrapper - - -def _trace_async(func: Union[Callable, None] = None, **okwargs: Any) -> Callable: - - @wraps(func) # type: ignore - async def wrapper(*args, **kwargs): - name, signature = _name(func, args) # type: ignore - with Tracer.start(name) as trace: - trace("signature", signature) - - # support arbitrary keyword - # arguments for trace decorator - for k, v in okwargs.items(): - trace(k, to_dict(v)) - - inputs = _inputs(func, args, kwargs) # type: ignore - trace("inputs", inputs) - try: - result = await func(*args, **kwargs) # type: ignore - trace("result", _results(result)) - except Exception as e: - trace( - "result", - { - "exception": { - "type": type(e), - "traceback": (traceback.format_tb(tb=e.__traceback__) if e.__traceback__ else None), - "message": str(e), - "args": to_dict(e.args), - } - }, - ) - raise e - - return result - - return wrapper - - -def trace(func: Union[Callable, None] = None, **kwargs: Any) -> Callable: - if func is None: - return partial(trace, **kwargs) - wrapped_method = _trace_async if inspect.iscoroutinefunction(func) else _trace_sync - return wrapped_method(func, **kwargs) - - -class PromptyTracer: - def __init__(self, output_dir: Union[str, None] = None) -> None: - if output_dir: - self.output = Path(output_dir).resolve().absolute() - else: - self.output = Path(Path(os.getcwd()) / ".runs").resolve().absolute() - - if not self.output.exists(): - self.output.mkdir(parents=True, exist_ok=True) - - self.stack: List[Dict[str, Any]] = [] - - @contextlib.contextmanager - def tracer(self, name: str) -> Iterator[Callable[[str, Any], None]]: - try: - self.stack.append({"name": name}) - frame = self.stack[-1] - frame["__time"] = { - "start": datetime.now(), - } - - def add(key: str, value: Any) -> None: - if key not in frame: - frame[key] = value - # multiple values creates list - else: - if isinstance(frame[key], list): - frame[key].append(value) - else: - frame[key] = [frame[key], value] - - yield add - finally: - frame = self.stack.pop() - start: datetime = frame["__time"]["start"] - end: datetime = datetime.now() - - # add duration to frame - frame["__time"] = { - "start": start.strftime("%Y-%m-%dT%H:%M:%S.%f"), - "end": end.strftime("%Y-%m-%dT%H:%M:%S.%f"), - "duration": int((end - start).total_seconds() * 1000), - } - - # hoist usage to parent frame - if "result" in frame and isinstance(frame["result"], dict): - if "usage" in frame["result"]: - frame["__usage"] = self.hoist_item( - frame["result"]["usage"], - frame["__usage"] if "__usage" in frame else {}, - ) - - # streamed results may have usage as well - if "result" in frame and isinstance(frame["result"], list): - for result in frame["result"]: - if isinstance(result, dict) and "usage" in result and isinstance(result["usage"], dict): - frame["__usage"] = self.hoist_item( - result["usage"], - frame["__usage"] if "__usage" in frame else {}, - ) - - # add any usage frames from below - if "__frames" in frame: - for child in frame["__frames"]: - if "__usage" in child: - frame["__usage"] = self.hoist_item( - child["__usage"], - frame["__usage"] if "__usage" in frame else {}, - ) - - # if stack is empty, dump the frame - if len(self.stack) == 0: - self.write_trace(frame) - # otherwise, append the frame to the parent - else: - if "__frames" not in self.stack[-1]: - self.stack[-1]["__frames"] = [] - self.stack[-1]["__frames"].append(frame) - - def hoist_item(self, src: Dict[str, Any], cur: Dict[str, Any]) -> Dict[str, Any]: - for key, value in src.items(): - if value is None or isinstance(value, list) or isinstance(value, dict): - continue - try: - if key not in cur: - cur[key] = value - else: - cur[key] += value - except: - continue - - return cur - - def write_trace(self, frame: Dict[str, Any]) -> None: - trace_file = self.output / f"{frame['name']}.{datetime.now().strftime('%Y%m%d.%H%M%S')}.tracy" - - v = importlib.metadata.version("prompty") # type: ignore - enriched_frame = { - "runtime": "python", - "version": v, - "trace": frame, - } - - with open(trace_file, "w") as f: - json.dump(enriched_frame, f, indent=4) - - -@contextlib.contextmanager -def console_tracer(name: str) -> Iterator[Callable[[str, Any], None]]: - try: - print(f"Starting {name}") - yield lambda key, value: print(f"{key}:\n{json.dumps(to_dict(value), indent=4)}") - finally: - print(f"Ending {name}") diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py b/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py deleted file mode 100644 index 22f284180ee1..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/prompts/_utils.py +++ /dev/null @@ -1,100 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -# mypy: disable-error-code="import-untyped,return-value" -# pylint: disable=line-too-long,R,wrong-import-order,global-variable-not-assigned) -import json -import os -import re -import sys -from typing import Any, Dict -from pathlib import Path - - -_yaml_regex = re.compile( - r"^\s*" + r"(?:---|\+\+\+)" + r"(.*?)" + r"(?:---|\+\+\+)" + r"\s*(.+)$", - re.S | re.M, -) - - -def load_text(file_path, encoding="utf-8"): - with open(file_path, "r", encoding=encoding) as file: - return file.read() - - -def load_json(file_path, encoding="utf-8"): - return json.loads(load_text(file_path, encoding=encoding)) - - -def load_global_config(prompty_path: Path = Path.cwd(), configuration: str = "default") -> Dict[str, Any]: - prompty_config_path = prompty_path.joinpath("prompty.json") - if os.path.exists(prompty_config_path): - c = load_json(prompty_config_path) - if configuration in c: - return c[configuration] - else: - raise ValueError(f'Item "{configuration}" not found in "{prompty_config_path}"') - else: - return {} - - -def load_prompty(file_path, encoding="utf-8") -> Dict[str, Any]: - contents = load_text(file_path, encoding=encoding) - return parse(contents) - - -def parse(contents): - try: - import yaml # type: ignore - except ImportError as exc: - raise ImportError("Please install pyyaml to use this function. Run `pip install pyyaml`.") from exc - - global _yaml_regex - - fmatter = "" - body = "" - result = _yaml_regex.search(contents) - - if result: - fmatter = result.group(1) - body = result.group(2) - return { - "attributes": yaml.load(fmatter, Loader=yaml.SafeLoader), - "body": body, - "frontmatter": fmatter, - } - - -def remove_leading_empty_space(multiline_str: str) -> str: - """ - Processes a multiline string by: - 1. Removing empty lines - 2. Finding the minimum leading spaces - 3. Indenting all lines to the minimum level - - :param multiline_str: The input multiline string. - :type multiline_str: str - :return: The processed multiline string. - :rtype: str - """ - lines = multiline_str.splitlines() - start_index = 0 - while start_index < len(lines) and lines[start_index].strip() == "": - start_index += 1 - - # Find the minimum number of leading spaces - min_spaces = sys.maxsize - for line in lines[start_index:]: - if len(line.strip()) == 0: - continue - spaces = len(line) - len(line.lstrip()) - spaces += line.lstrip().count("\t") * 2 # Count tabs as 2 spaces - min_spaces = min(min_spaces, spaces) - - # Remove leading spaces and indent to the minimum level - processed_lines = [] - for line in lines[start_index:]: - processed_lines.append(line[min_spaces:]) - - return "\n".join(processed_lines) diff --git a/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py b/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py deleted file mode 100644 index f7937a99074a..000000000000 --- a/sdk/ai/azure-ai-inference/azure/ai/inference/tracing.py +++ /dev/null @@ -1,850 +0,0 @@ -# ------------------------------------ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# ------------------------------------ -import copy -from enum import Enum -import functools -import json -import importlib -import logging -import os -from time import time_ns -from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple, Union -from urllib.parse import urlparse - -# pylint: disable = no-name-in-module -from azure.core import CaseInsensitiveEnumMeta # type: ignore -from azure.core.settings import settings -from . import models as _models - -try: - # pylint: disable = no-name-in-module - from azure.core.tracing import AbstractSpan, SpanKind # type: ignore - from opentelemetry.trace import StatusCode, Span - - _tracing_library_available = True -except ModuleNotFoundError: - - _tracing_library_available = False - - -__all__ = [ - "AIInferenceInstrumentor", -] - - -_inference_traces_enabled: bool = False -_trace_inference_content: bool = False -_INFERENCE_GEN_AI_SYSTEM_NAME = "az.ai.inference" - - -class TraceType(str, Enum, metaclass=CaseInsensitiveEnumMeta): # pylint: disable=C4747 - """An enumeration class to represent different types of traces.""" - - INFERENCE = "Inference" - - -class AIInferenceInstrumentor: - """ - A class for managing the trace instrumentation of AI Inference. - - This class allows enabling or disabling tracing for AI Inference. - and provides functionality to check whether instrumentation is active. - - """ - - def __init__(self): - if not _tracing_library_available: - raise ModuleNotFoundError( - "Azure Core Tracing Opentelemetry is not installed. " - "Please install it using 'pip install azure-core-tracing-opentelemetry'" - ) - # In the future we could support different versions from the same library - # and have a parameter that specifies the version to use. - self._impl = _AIInferenceInstrumentorPreview() - - def instrument(self, enable_content_recording: Optional[bool] = None) -> None: - """ - Enable trace instrumentation for AI Inference. - - :param enable_content_recording: Whether content recording is enabled as part - of the traces or not. Content in this context refers to chat message content - and function call tool related function names, function parameter names and - values. True will enable content recording, False will disable it. If no value - s provided, then the value read from environment variable - AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED is used. If the environment variable - is not found, then the value will default to False. Please note that successive calls - to instrument will always apply the content recording value provided with the most - recent call to instrument (including applying the environment variable if no value is - provided and defaulting to false if the environment variable is not found), even if - instrument was already previously called without uninstrument being called in between - the instrument calls. - - :type enable_content_recording: bool, optional - """ - self._impl.instrument(enable_content_recording=enable_content_recording) - - def uninstrument(self) -> None: - """ - Disable trace instrumentation for AI Inference. - - Raises: - RuntimeError: If instrumentation is not currently enabled. - - This method removes any active instrumentation, stopping the tracing - of AI Inference. - """ - self._impl.uninstrument() - - def is_instrumented(self) -> bool: - """ - Check if trace instrumentation for AI Inference is currently enabled. - - :return: True if instrumentation is active, False otherwise. - :rtype: bool - """ - return self._impl.is_instrumented() - - def is_content_recording_enabled(self) -> bool: - """ - This function gets the content recording value. - - :return: A bool value indicating whether content recording is enabled. - :rtype: bool - """ - return self._impl.is_content_recording_enabled() - - -class _AIInferenceInstrumentorPreview: - """ - A class for managing the trace instrumentation of AI Inference. - - This class allows enabling or disabling tracing for AI Inference. - and provides functionality to check whether instrumentation is active. - """ - - def _str_to_bool(self, s): - if s is None: - return False - return str(s).lower() == "true" - - def instrument(self, enable_content_recording: Optional[bool] = None): - """ - Enable trace instrumentation for AI Inference. - - :param enable_content_recording: Whether content recording is enabled as part - of the traces or not. Content in this context refers to chat message content - and function call tool related function names, function parameter names and - values. True will enable content recording, False will disable it. If no value - is provided, then the value read from environment variable - AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED is used. If the environment variable - is not found, then the value will default to False. - - :type enable_content_recording: bool, optional - """ - if enable_content_recording is None: - var_value = os.environ.get("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED") - enable_content_recording = self._str_to_bool(var_value) - if not self.is_instrumented(): - self._instrument_inference(enable_content_recording) - else: - self._set_content_recording_enabled(enable_content_recording=enable_content_recording) - - def uninstrument(self): - """ - Disable trace instrumentation for AI Inference. - - This method removes any active instrumentation, stopping the tracing - of AI Inference. - """ - if self.is_instrumented(): - self._uninstrument_inference() - - def is_instrumented(self): - """ - Check if trace instrumentation for AI Inference is currently enabled. - - :return: True if instrumentation is active, False otherwise. - :rtype: bool - """ - return self._is_instrumented() - - def set_content_recording_enabled(self, enable_content_recording: bool = False) -> None: - """This function sets the content recording value. - - :param enable_content_recording: Indicates whether tracing of message content should be enabled. - This also controls whether function call tool function names, - parameter names and parameter values are traced. - :type enable_content_recording: bool - """ - self._set_content_recording_enabled(enable_content_recording=enable_content_recording) - - def is_content_recording_enabled(self) -> bool: - """This function gets the content recording value. - - :return: A bool value indicating whether content tracing is enabled. - :rtype bool - """ - return self._is_content_recording_enabled() - - def _set_attributes(self, span: "AbstractSpan", *attrs: Tuple[str, Any]) -> None: - for attr in attrs: - key, value = attr - if value is not None: - span.add_attribute(key, value) - - def _add_request_chat_message_events(self, span: "AbstractSpan", **kwargs: Any) -> int: - timestamp = 0 - for message in kwargs.get("messages", []): - try: - message = message.as_dict() - except AttributeError: - pass - - if message.get("role"): - timestamp = self._record_event( - span, - f"gen_ai.{message.get('role')}.message", - { - "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(message), - }, - timestamp, - ) - - return timestamp - - def _parse_url(self, url): - parsed = urlparse(url) - server_address = parsed.hostname - port = parsed.port - return server_address, port - - def _add_request_chat_attributes(self, span: "AbstractSpan", *args: Any, **kwargs: Any) -> None: - client = args[0] - endpoint = client._config.endpoint # pylint: disable=protected-access - server_address, port = self._parse_url(endpoint) - model = "chat" - if kwargs.get("model") is not None: - model_value = kwargs.get("model") - if model_value is not None: - model = model_value - - self._set_attributes( - span, - ("gen_ai.operation.name", "chat"), - ("gen_ai.system", _INFERENCE_GEN_AI_SYSTEM_NAME), - ("gen_ai.request.model", model), - ("gen_ai.request.max_tokens", kwargs.get("max_tokens")), - ("gen_ai.request.temperature", kwargs.get("temperature")), - ("gen_ai.request.top_p", kwargs.get("top_p")), - ("server.address", server_address), - ) - if port is not None and port != 443: - span.add_attribute("server.port", port) - - def _remove_function_call_names_and_arguments(self, tool_calls: list) -> list: - tool_calls_copy = copy.deepcopy(tool_calls) - for tool_call in tool_calls_copy: - if "function" in tool_call: - if "name" in tool_call["function"]: - del tool_call["function"]["name"] - if "arguments" in tool_call["function"]: - del tool_call["function"]["arguments"] - if not tool_call["function"]: - del tool_call["function"] - return tool_calls_copy - - def _get_finish_reasons(self, result) -> Optional[List[str]]: - if hasattr(result, "choices") and result.choices: - finish_reasons: List[str] = [] - for choice in result.choices: - finish_reason = getattr(choice, "finish_reason", None) - - if finish_reason is None: - # If finish_reason is None, default to "none" - finish_reasons.append("none") - elif hasattr(finish_reason, "value"): - # If finish_reason has a 'value' attribute (i.e., it's an enum), use it - finish_reasons.append(finish_reason.value) - elif isinstance(finish_reason, str): - # If finish_reason is a string, use it directly - finish_reasons.append(finish_reason) - else: - # Default to "none" - finish_reasons.append("none") - - return finish_reasons - return None - - def _get_finish_reason_for_choice(self, choice): - finish_reason = getattr(choice, "finish_reason", None) - if finish_reason is not None: - return finish_reason.value - - return "none" - - def _add_response_chat_message_events( - self, span: "AbstractSpan", result: _models.ChatCompletions, last_event_timestamp_ns: int - ) -> None: - for choice in result.choices: - attributes = {} - if _trace_inference_content: - full_response: Dict[str, Any] = { - "message": {"content": choice.message.content}, - "finish_reason": self._get_finish_reason_for_choice(choice), - "index": choice.index, - } - if choice.message.tool_calls: - full_response["message"]["tool_calls"] = [tool.as_dict() for tool in choice.message.tool_calls] - attributes = { - "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(full_response), - } - else: - response: Dict[str, Any] = { - "finish_reason": self._get_finish_reason_for_choice(choice), - "index": choice.index, - } - if choice.message.tool_calls: - response["message"] = {} - tool_calls_function_names_and_arguments_removed = self._remove_function_call_names_and_arguments( - choice.message.tool_calls - ) - response["message"]["tool_calls"] = [ - tool.as_dict() for tool in tool_calls_function_names_and_arguments_removed - ] - - attributes = { - "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(response), - } - last_event_timestamp_ns = self._record_event(span, "gen_ai.choice", attributes, last_event_timestamp_ns) - - def _add_response_chat_attributes( - self, - span: "AbstractSpan", - result: Union[_models.ChatCompletions, _models.StreamingChatCompletionsUpdate], - ) -> None: - self._set_attributes( - span, - ("gen_ai.response.id", result.id), - ("gen_ai.response.model", result.model), - ( - "gen_ai.usage.input_tokens", - (result.usage.prompt_tokens if hasattr(result, "usage") and result.usage else None), - ), - ( - "gen_ai.usage.output_tokens", - (result.usage.completion_tokens if hasattr(result, "usage") and result.usage else None), - ), - ) - finish_reasons = self._get_finish_reasons(result) - if not finish_reasons is None: - span.add_attribute("gen_ai.response.finish_reasons", finish_reasons) # type: ignore - - def _add_request_details(self, span: "AbstractSpan", args: Any, kwargs: Any) -> int: - self._add_request_chat_attributes(span, *args, **kwargs) - if _trace_inference_content: - return self._add_request_chat_message_events(span, **kwargs) - return 0 - - def _add_response_details(self, span: "AbstractSpan", result: object, last_event_timestamp_ns: int) -> None: - if isinstance(result, _models.ChatCompletions): - self._add_response_chat_attributes(span, result) - self._add_response_chat_message_events(span, result, last_event_timestamp_ns) - # TODO add more models here - - def _accumulate_response(self, item, accumulate: Dict[str, Any]) -> None: - if item.finish_reason: - accumulate["finish_reason"] = item.finish_reason - if item.index: - accumulate["index"] = item.index - if item.delta.content: - accumulate.setdefault("message", {}) - accumulate["message"].setdefault("content", "") - accumulate["message"]["content"] += item.delta.content - if item.delta.tool_calls: - accumulate.setdefault("message", {}) - accumulate["message"].setdefault("tool_calls", []) - if item.delta.tool_calls is not None: - for tool_call in item.delta.tool_calls: - if tool_call.id: - accumulate["message"]["tool_calls"].append( - { - "id": tool_call.id, - "type": "", - "function": {"name": "", "arguments": ""}, - } - ) - if tool_call.function: - accumulate["message"]["tool_calls"][-1]["type"] = "function" - if tool_call.function and tool_call.function.name: - accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name - if tool_call.function and tool_call.function.arguments: - accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments - - def _accumulate_async_streaming_response(self, item, accumulate: Dict[str, Any]) -> None: - if not "choices" in item: - return - if "finish_reason" in item["choices"][0] and item["choices"][0]["finish_reason"]: - accumulate["finish_reason"] = item["choices"][0]["finish_reason"] - if "index" in item["choices"][0] and item["choices"][0]["index"]: - accumulate["index"] = item["choices"][0]["index"] - if not "delta" in item["choices"][0]: - return - if "content" in item["choices"][0]["delta"] and item["choices"][0]["delta"]["content"]: - accumulate.setdefault("message", {}) - accumulate["message"].setdefault("content", "") - accumulate["message"]["content"] += item["choices"][0]["delta"]["content"] - if "tool_calls" in item["choices"][0]["delta"] and item["choices"][0]["delta"]["tool_calls"]: - accumulate.setdefault("message", {}) - accumulate["message"].setdefault("tool_calls", []) - if item["choices"][0]["delta"]["tool_calls"] is not None: - for tool_call in item["choices"][0]["delta"]["tool_calls"]: - if tool_call.id: - accumulate["message"]["tool_calls"].append( - { - "id": tool_call.id, - "type": "", - "function": {"name": "", "arguments": ""}, - } - ) - if tool_call.function: - accumulate["message"]["tool_calls"][-1]["type"] = "function" - if tool_call.function and tool_call.function.name: - accumulate["message"]["tool_calls"][-1]["function"]["name"] = tool_call.function.name - if tool_call.function and tool_call.function.arguments: - accumulate["message"]["tool_calls"][-1]["function"]["arguments"] += tool_call.function.arguments - - def _wrapped_stream( - self, stream_obj: _models.StreamingChatCompletions, span: "AbstractSpan", previous_event_timestamp: int - ) -> _models.StreamingChatCompletions: - class StreamWrapper(_models.StreamingChatCompletions): - def __init__(self, stream_obj, instrumentor): - super().__init__(stream_obj._response) - self._instrumentor = instrumentor - - def __iter__( # pyright: ignore [reportIncompatibleMethodOverride] - self, - ) -> Iterator[_models.StreamingChatCompletionsUpdate]: - accumulate: Dict[str, Any] = {} - try: - chunk = None - for chunk in stream_obj: - for item in chunk.choices: - self._instrumentor._accumulate_response(item, accumulate) - yield chunk - - if chunk is not None: - self._instrumentor._add_response_chat_attributes(span, chunk) - - except Exception as exc: - # Set the span status to error - if isinstance(span.span_instance, Span): # pyright: ignore [reportPossiblyUnboundVariable] - span.span_instance.set_status( - StatusCode.ERROR, # pyright: ignore [reportPossiblyUnboundVariable] - description=str(exc), - ) - module = exc.__module__ if hasattr(exc, "__module__") and exc.__module__ != "builtins" else "" - error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__ - self._instrumentor._set_attributes(span, ("error.type", error_type)) - raise - - finally: - if stream_obj._done is False: - if accumulate.get("finish_reason") is None: - accumulate["finish_reason"] = "error" - else: - # Only one choice expected with streaming - accumulate["index"] = 0 - # Delete message if content tracing is not enabled - if not _trace_inference_content: - if "message" in accumulate: - if "content" in accumulate["message"]: - del accumulate["message"]["content"] - if not accumulate["message"]: - del accumulate["message"] - if "message" in accumulate: - if "tool_calls" in accumulate["message"]: - tool_calls_function_names_and_arguments_removed = ( - self._instrumentor._remove_function_call_names_and_arguments( - accumulate["message"]["tool_calls"] - ) - ) - accumulate["message"]["tool_calls"] = list( - tool_calls_function_names_and_arguments_removed - ) - attributes = { - "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(accumulate), - } - self._instrumentor._record_event(span, "gen_ai.choice", attributes, previous_event_timestamp) - span.finish() - - return StreamWrapper(stream_obj, self) - - def _async_wrapped_stream( - self, stream_obj: _models.AsyncStreamingChatCompletions, span: "AbstractSpan", last_event_timestamp_ns: int - ) -> _models.AsyncStreamingChatCompletions: - class AsyncStreamWrapper(_models.AsyncStreamingChatCompletions): - def __init__(self, stream_obj, instrumentor, span, last_event_timestamp_ns): - super().__init__(stream_obj._response) - self._instrumentor = instrumentor - self._accumulate: Dict[str, Any] = {} - self._stream_obj = stream_obj - self.span = span - self._last_result = None - self._last_event_timestamp_ns = last_event_timestamp_ns - - async def __anext__(self) -> "_models.StreamingChatCompletionsUpdate": - try: - result = await super().__anext__() - self._instrumentor._accumulate_async_streaming_response( # pylint: disable=protected-access, line-too-long # pyright: ignore [reportFunctionMemberAccess] - result, self._accumulate - ) - self._last_result = result - except StopAsyncIteration as exc: - self._trace_stream_content() - raise exc - return result - - def _trace_stream_content(self) -> None: - if self._last_result: - self._instrumentor._add_response_chat_attributes( # pylint: disable=protected-access, line-too-long # pyright: ignore [reportFunctionMemberAccess] - span, self._last_result - ) - # Only one choice expected with streaming - self._accumulate["index"] = 0 - # Delete message if content tracing is not enabled - if not _trace_inference_content: - if "message" in self._accumulate: - if "content" in self._accumulate["message"]: - del self._accumulate["message"]["content"] - if not self._accumulate["message"]: - del self._accumulate["message"] - if "message" in self._accumulate: - if "tool_calls" in self._accumulate["message"]: - tools_no_recording = self._instrumentor._remove_function_call_names_and_arguments( # pylint: disable=protected-access, line-too-long # pyright: ignore [reportFunctionMemberAccess] - self._accumulate["message"]["tool_calls"] - ) - self._accumulate["message"]["tool_calls"] = list(tools_no_recording) - attributes = { - "gen_ai.system": _INFERENCE_GEN_AI_SYSTEM_NAME, - "gen_ai.event.content": json.dumps(self._accumulate), - } - self._last_event_timestamp_ns = self._instrumentor._record_event( # pylint: disable=protected-access, line-too-long # pyright: ignore [reportFunctionMemberAccess] - span, "gen_ai.choice", attributes, self._last_event_timestamp_ns - ) - span.finish() - - async_stream_wrapper = AsyncStreamWrapper(stream_obj, self, span, last_event_timestamp_ns) - return async_stream_wrapper - - def _record_event( - self, span: "AbstractSpan", name: str, attributes: Dict[str, Any], last_event_timestamp_ns: int - ) -> int: - timestamp = time_ns() - - # we're recording multiple events, some of them are emitted within (hundreds of) nanoseconds of each other. - # time.time_ns resolution is not high enough on windows to guarantee unique timestamps for each message. - # Also Azure Monitor truncates resolution to microseconds and some other backends truncate to milliseconds. - # - # But we need to give users a way to restore event order, so we're incrementing the timestamp - # by 1 microsecond for each message. - # - # This is a workaround, we'll find a generic and better solution - see - # https://github.com/open-telemetry/semantic-conventions/issues/1701 - if last_event_timestamp_ns > 0 and timestamp <= (last_event_timestamp_ns + 1000): - timestamp = last_event_timestamp_ns + 1000 - - span.span_instance.add_event(name=name, attributes=attributes, timestamp=timestamp) - - return timestamp - - def _trace_sync_function( - self, - function: Callable, - *, - _args_to_ignore: Optional[List[str]] = None, - _trace_type=TraceType.INFERENCE, - _name: Optional[str] = None, - ) -> Callable: - """ - Decorator that adds tracing to a synchronous function. - - :param function: The function to be traced. - :type function: Callable - :param args_to_ignore: A list of argument names to be ignored in the trace. - Defaults to None. - :type: args_to_ignore: [List[str]], optional - :param trace_type: The type of the trace. Defaults to TraceType.INFERENCE. - :type trace_type: TraceType, optional - :param name: The name of the trace, will set to func name if not provided. - :type name: str, optional - :return: The traced function. - :rtype: Callable - """ - - @functools.wraps(function) - def inner(*args, **kwargs): - - span_impl_type = settings.tracing_implementation() - if span_impl_type is None: - return function(*args, **kwargs) - - class_function_name = function.__qualname__ - - if class_function_name.startswith("ChatCompletionsClient.complete"): - if kwargs.get("model") is None: - span_name = "chat" - else: - model = kwargs.get("model") - span_name = f"chat {model}" - - span = span_impl_type( - name=span_name, - kind=SpanKind.CLIENT, # pyright: ignore [reportPossiblyUnboundVariable] - ) - - try: - # tracing events not supported in azure-core-tracing-opentelemetry - # so need to access the span instance directly - with span_impl_type.change_context(span.span_instance): - last_event_timestamp_ns = self._add_request_details(span, args, kwargs) - result = function(*args, **kwargs) - if kwargs.get("stream") is True: - return self._wrapped_stream(result, span, last_event_timestamp_ns) - self._add_response_details(span, result, last_event_timestamp_ns) - except Exception as exc: - # Set the span status to error - if isinstance(span.span_instance, Span): # pyright: ignore [reportPossiblyUnboundVariable] - span.span_instance.set_status( - StatusCode.ERROR, # pyright: ignore [reportPossiblyUnboundVariable] - description=str(exc), - ) - module = getattr(exc, "__module__", "") - module = module if module != "builtins" else "" - error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__ - self._set_attributes(span, ("error.type", error_type)) - span.finish() - raise - - span.finish() - return result - - # Handle the default case (if the function name does not match) - return None # Ensure all paths return - - return inner - - def _trace_async_function( - self, - function: Callable, - *, - _args_to_ignore: Optional[List[str]] = None, - _trace_type=TraceType.INFERENCE, - _name: Optional[str] = None, - ) -> Callable: - """ - Decorator that adds tracing to an asynchronous function. - - :param function: The function to be traced. - :type function: Callable - :param args_to_ignore: A list of argument names to be ignored in the trace. - Defaults to None. - :type: args_to_ignore: [List[str]], optional - :param trace_type: The type of the trace. Defaults to TraceType.INFERENCE. - :type trace_type: TraceType, optional - :param name: The name of the trace, will set to func name if not provided. - :type name: str, optional - :return: The traced function. - :rtype: Callable - """ - - @functools.wraps(function) - async def inner(*args, **kwargs): - span_impl_type = settings.tracing_implementation() - if span_impl_type is None: - return await function(*args, **kwargs) - - class_function_name = function.__qualname__ - - if class_function_name.startswith("ChatCompletionsClient.complete"): - if kwargs.get("model") is None: - span_name = "chat" - else: - model = kwargs.get("model") - span_name = f"chat {model}" - - span = span_impl_type( - name=span_name, - kind=SpanKind.CLIENT, # pyright: ignore [reportPossiblyUnboundVariable] - ) - try: - # tracing events not supported in azure-core-tracing-opentelemetry - # so need to access the span instance directly - with span_impl_type.change_context(span.span_instance): - last_event_timestamp_ns = self._add_request_details(span, args, kwargs) - result = await function(*args, **kwargs) - if kwargs.get("stream") is True: - return self._async_wrapped_stream(result, span, last_event_timestamp_ns) - self._add_response_details(span, result, last_event_timestamp_ns) - - except Exception as exc: - # Set the span status to error - if isinstance(span.span_instance, Span): # pyright: ignore [reportPossiblyUnboundVariable] - span.span_instance.set_status( - StatusCode.ERROR, # pyright: ignore [reportPossiblyUnboundVariable] - description=str(exc), - ) - module = getattr(exc, "__module__", "") - module = module if module != "builtins" else "" - error_type = f"{module}.{type(exc).__name__}" if module else type(exc).__name__ - self._set_attributes(span, ("error.type", error_type)) - span.finish() - raise - - span.finish() - return result - - # Handle the default case (if the function name does not match) - return None # Ensure all paths return - - return inner - - def _inject_async(self, f, _trace_type, _name): - wrapper_fun = self._trace_async_function(f) - wrapper_fun._original = f # pylint: disable=protected-access # pyright: ignore [reportFunctionMemberAccess] - return wrapper_fun - - def _inject_sync(self, f, _trace_type, _name): - wrapper_fun = self._trace_sync_function(f) - wrapper_fun._original = f # pylint: disable=protected-access # pyright: ignore [reportFunctionMemberAccess] - return wrapper_fun - - def _inference_apis(self): - sync_apis = ( - ( - "azure.ai.inference", - "ChatCompletionsClient", - "complete", - TraceType.INFERENCE, - "inference_chat_completions_complete", - ), - ) - async_apis = ( - ( - "azure.ai.inference.aio", - "ChatCompletionsClient", - "complete", - TraceType.INFERENCE, - "inference_chat_completions_complete", - ), - ) - return sync_apis, async_apis - - def _inference_api_list(self): - sync_apis, async_apis = self._inference_apis() - yield sync_apis, self._inject_sync - yield async_apis, self._inject_async - - def _generate_api_and_injector(self, apis): - for api, injector in apis: - for module_name, class_name, method_name, trace_type, name in api: - try: - module = importlib.import_module(module_name) - api = getattr(module, class_name) - if hasattr(api, method_name): - yield api, method_name, trace_type, injector, name - except AttributeError as e: - # Log the attribute exception with the missing class information - logging.warning( - "AttributeError: The module '%s' does not have the class '%s'. %s", - module_name, - class_name, - str(e), - ) - except Exception as e: # pylint: disable=broad-except - # Log other exceptions as a warning, as we're not sure what they might be - logging.warning("An unexpected error occurred: '%s'", str(e)) - - def _available_inference_apis_and_injectors(self): - """ - Generates a sequence of tuples containing Inference API classes, method names, and - corresponding injector functions. - - :return: A generator yielding tuples. - :rtype: tuple - """ - yield from self._generate_api_and_injector(self._inference_api_list()) - - def _instrument_inference(self, enable_content_tracing: bool = False): - """This function modifies the methods of the Inference API classes to - inject logic before calling the original methods. - The original methods are stored as _original attributes of the methods. - - :param enable_content_tracing: Indicates whether tracing of message content should be enabled. - This also controls whether function call tool function names, - parameter names and parameter values are traced. - :type enable_content_tracing: bool - """ - # pylint: disable=W0603 - global _inference_traces_enabled - global _trace_inference_content - if _inference_traces_enabled: - raise RuntimeError("Traces already started for azure.ai.inference") - _inference_traces_enabled = True - _trace_inference_content = enable_content_tracing - for ( - api, - method, - trace_type, - injector, - name, - ) in self._available_inference_apis_and_injectors(): - # Check if the method of the api class has already been modified - if not hasattr(getattr(api, method), "_original"): - setattr(api, method, injector(getattr(api, method), trace_type, name)) - - def _uninstrument_inference(self): - """This function restores the original methods of the Inference API classes - by assigning them back from the _original attributes of the modified methods. - """ - # pylint: disable=W0603 - global _inference_traces_enabled - global _trace_inference_content - _trace_inference_content = False - for api, method, _, _, _ in self._available_inference_apis_and_injectors(): - if hasattr(getattr(api, method), "_original"): - setattr(api, method, getattr(getattr(api, method), "_original")) - _inference_traces_enabled = False - - def _is_instrumented(self): - """This function returns True if Inference libary has already been instrumented - for tracing and False if it has not been instrumented. - - :return: A value indicating whether the Inference library is currently instrumented or not. - :rtype: bool - """ - return _inference_traces_enabled - - def _set_content_recording_enabled(self, enable_content_recording: bool = False) -> None: - """This function sets the content recording value. - - :param enable_content_recording: Indicates whether tracing of message content should be enabled. - This also controls whether function call tool function names, - parameter names and parameter values are traced. - :type enable_content_recording: bool - """ - global _trace_inference_content # pylint: disable=W0603 - _trace_inference_content = enable_content_recording - - def _is_content_recording_enabled(self) -> bool: - """This function gets the content recording value. - - :return: A bool value indicating whether content tracing is enabled. - :rtype bool - """ - return _trace_inference_content diff --git a/sdk/ai/azure-ai-inference/sdk_packaging.toml b/sdk/ai/azure-ai-inference/sdk_packaging.toml new file mode 100644 index 000000000000..e7687fdae93b --- /dev/null +++ b/sdk/ai/azure-ai-inference/sdk_packaging.toml @@ -0,0 +1,2 @@ +[packaging] +auto_update = false \ No newline at end of file diff --git a/sdk/ai/azure-ai-inference/setup.py b/sdk/ai/azure-ai-inference/setup.py index 3de5f549efe0..c7b5395a3f9f 100644 --- a/sdk/ai/azure-ai-inference/setup.py +++ b/sdk/ai/azure-ai-inference/setup.py @@ -13,7 +13,7 @@ PACKAGE_NAME = "azure-ai-inference" -PACKAGE_PPRINT_NAME = "Azure AI Inference" +PACKAGE_PPRINT_NAME = "Azure Ai Inference" # a-b-c => a/b/c package_folder_path = PACKAGE_NAME.replace("-", "/") @@ -35,7 +35,7 @@ license="MIT License", author="Microsoft Corporation", author_email="azpysdkhelp@microsoft.com", - url="https://github.com/Azure/azure-sdk-for-python/tree/main/sdk/ai/azure-ai-inference", + url="https://github.com/Azure/azure-sdk-for-python/tree/main/sdk", keywords="azure, azure sdk", classifiers=[ "Development Status :: 4 - Beta", @@ -68,8 +68,4 @@ "typing-extensions>=4.6.0", ], python_requires=">=3.8", - extras_require={ - "opentelemetry": ["azure-core-tracing-opentelemetry"], - "prompts": ["pyyaml"], - }, ) diff --git a/sdk/ai/azure-ai-inference/tsp-location.yaml b/sdk/ai/azure-ai-inference/tsp-location.yaml index fffb1db5b101..586442152432 100644 --- a/sdk/ai/azure-ai-inference/tsp-location.yaml +++ b/sdk/ai/azure-ai-inference/tsp-location.yaml @@ -1,4 +1,4 @@ directory: specification/ai/ModelClient -commit: 1f152afbf84c6febe1f4447d284750f03c7f188f +commit: 7dca60dfb7fda9a5e4aaeb4494db564b992c5c43 repo: Azure/azure-rest-api-specs -additionalDirectories: +additionalDirectories: