From b6afb07e21804942835a6920bfbe7c633888bd53 Mon Sep 17 00:00:00 2001 From: Huapeng Zhang <244563813@qq.com> Date: Thu, 9 Jan 2025 01:39:21 +0800 Subject: [PATCH] [Internal] Open Telemetry: Fixes client telemetry error type that is compliant with open telemetry standard (#4948) # Pull Request Template ## Description Address issue #4945 Previous error.type contained detailed error message which wasn't compliant with OpenTelemetry standard [error.type](https://opentelemetry.io/docs/specs/semconv/attributes-registry/error/). This PR generates the error type based on the exception type, status code and sub status code. An example of error.type value: ```"CosmosException_NotFound_ResourceNotFound"``` ## Type of change Please delete options that are not relevant. - [X] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update ## Closing issues #4945 To automatically close an issue: closes #IssueNumber --------- Co-authored-by: Sourabh Jain --- .../OpenTelemetryAttributeKeys.cs | 40 ++++- .../OpenTelemetryAttributeKeyTests.cs | 139 ++++++++++++++++++ 2 files changed, 171 insertions(+), 8 deletions(-) create mode 100644 Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryAttributeKeyTests.cs diff --git a/Microsoft.Azure.Cosmos/src/Telemetry/OpenTelemetry/OpenTelemetryAttributeKeys.cs b/Microsoft.Azure.Cosmos/src/Telemetry/OpenTelemetry/OpenTelemetryAttributeKeys.cs index eb0910d424..a45ce310bf 100644 --- a/Microsoft.Azure.Cosmos/src/Telemetry/OpenTelemetry/OpenTelemetryAttributeKeys.cs +++ b/Microsoft.Azure.Cosmos/src/Telemetry/OpenTelemetry/OpenTelemetryAttributeKeys.cs @@ -7,8 +7,10 @@ namespace Microsoft.Azure.Cosmos.Telemetry using System; using System.Collections.Generic; using System.Linq; + using System.Net; using global::Azure.Core; using Microsoft.Azure.Cosmos.Tracing.TraceData; + using Microsoft.Azure.Documents; /// /// Contains constant string values representing OpenTelemetry attribute keys for monitoring and tracing Cosmos DB operations. @@ -291,6 +293,10 @@ public KeyValuePair[] PopulateNetworkMeterDimensions(string oper int? operationLevelStatusCode = CosmosDbMeterUtil.GetStatusCode(attributes, ex); int? operationLevelSubStatusCode = CosmosDbMeterUtil.GetSubStatusCode(attributes, ex); + int? serviceEndpointStatusCode = GetStatusCode(tcpStats, httpStats); + int? serviceEndpointSubStatusCode = GetSubStatusCode(tcpStats, httpStats); + Exception serviceEndpointException = httpStats?.Exception ?? tcpStats?.StoreResult?.Exception; + List> dimensions = new () { new KeyValuePair(OpenTelemetryAttributeKeys.DbSystemName, OpenTelemetryCoreRecorder.CosmosDb), @@ -305,10 +311,10 @@ public KeyValuePair[] PopulateNetworkMeterDimensions(string oper new KeyValuePair(OpenTelemetryAttributeKeys.NetworkProtocolName, replicaEndpoint.Scheme), new KeyValuePair(OpenTelemetryAttributeKeys.ServiceEndpointHost, replicaEndpoint.Host), new KeyValuePair(OpenTelemetryAttributeKeys.ServiceEndPointPort, replicaEndpoint.Port), - new KeyValuePair(OpenTelemetryAttributeKeys.ServiceEndpointStatusCode, GetStatusCode(tcpStats, httpStats)), - new KeyValuePair(OpenTelemetryAttributeKeys.ServiceEndpointSubStatusCode, GetSubStatusCode(tcpStats, httpStats)), + new KeyValuePair(OpenTelemetryAttributeKeys.ServiceEndpointStatusCode, serviceEndpointStatusCode), + new KeyValuePair(OpenTelemetryAttributeKeys.ServiceEndpointSubStatusCode, serviceEndpointSubStatusCode), new KeyValuePair(OpenTelemetryAttributeKeys.ServiceEndpointRegion, GetRegion(tcpStats, httpStats)), - new KeyValuePair(OpenTelemetryAttributeKeys.ErrorType, GetException(tcpStats, httpStats)) + new KeyValuePair(OpenTelemetryAttributeKeys.ErrorType, GetErrorType(serviceEndpointException, serviceEndpointStatusCode, serviceEndpointSubStatusCode)) }; this.AddOptionalDimensions(optionFromRequest, tcpStats, httpStats, dimensions); @@ -350,6 +356,9 @@ public KeyValuePair[] PopulateOperationMeterDimensions(string op Exception ex, OperationMetricsOptions optionFromRequest) { + int? statusCode = CosmosDbMeterUtil.GetStatusCode(attributes, ex); + int? subStatusCode = CosmosDbMeterUtil.GetSubStatusCode(attributes, ex); + List> dimensions = new () { new KeyValuePair(OpenTelemetryAttributeKeys.DbSystemName, OpenTelemetryCoreRecorder.CosmosDb), @@ -358,10 +367,10 @@ public KeyValuePair[] PopulateOperationMeterDimensions(string op new KeyValuePair(OpenTelemetryAttributeKeys.ServerAddress, accountName?.Host), new KeyValuePair(OpenTelemetryAttributeKeys.ServerPort, accountName?.Port), new KeyValuePair(OpenTelemetryAttributeKeys.DbOperation, operationName), - new KeyValuePair(OpenTelemetryAttributeKeys.StatusCode, CosmosDbMeterUtil.GetStatusCode(attributes, ex)), - new KeyValuePair(OpenTelemetryAttributeKeys.SubStatusCode, CosmosDbMeterUtil.GetSubStatusCode(attributes, ex)), + new KeyValuePair(OpenTelemetryAttributeKeys.StatusCode, statusCode), + new KeyValuePair(OpenTelemetryAttributeKeys.SubStatusCode, subStatusCode), new KeyValuePair(OpenTelemetryAttributeKeys.ConsistencyLevel, GetConsistencyLevel(attributes, ex)), - new KeyValuePair(OpenTelemetryAttributeKeys.ErrorType, ex?.Message) + new KeyValuePair(OpenTelemetryAttributeKeys.ErrorType, GetErrorType(ex, statusCode, subStatusCode)) }; this.AddOptionalDimensions(attributes, optionFromRequest, dimensions); @@ -466,9 +475,24 @@ private static string GetRegion(ClientSideRequestStatisticsTraceDatum.StoreRespo return httpStats?.Region ?? tcpStats.Region; } - private static string GetException(ClientSideRequestStatisticsTraceDatum.StoreResponseStatistics tcpStats, ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics? httpStats) + /// + /// Return the error.type dimension value based on the exception type, status code and sub status code. + /// + /// Threw exception + /// Status code + /// Sub status code + /// error.type dimension value + private static string GetErrorType(Exception exception, int? statusCode, int? subStatusCode) { - return httpStats?.Exception?.Message ?? tcpStats?.StoreResult?.Exception?.Message; + if (exception == null) + { + return null; + } + + HttpStatusCode? code = statusCode.HasValue ? (HttpStatusCode)statusCode.Value : null; + SubStatusCodes? subCode = subStatusCode.HasValue ? (SubStatusCodes)subStatusCode.Value : null; + + return $"{exception.GetType().Name}_{code?.ToString()}_{subCode?.ToString()}"; } private static int GetSubStatusCode(ClientSideRequestStatisticsTraceDatum.StoreResponseStatistics tcpStats, ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics? httpStats) diff --git a/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryAttributeKeyTests.cs b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryAttributeKeyTests.cs new file mode 100644 index 0000000000..b0dcbe3a36 --- /dev/null +++ b/Microsoft.Azure.Cosmos/tests/Microsoft.Azure.Cosmos.Tests/Telemetry/OpenTelemetryAttributeKeyTests.cs @@ -0,0 +1,139 @@ +//------------------------------------------------------------ +// Copyright (c) Microsoft Corporation. All rights reserved. +//------------------------------------------------------------ + +namespace Microsoft.Azure.Cosmos.Tests.Telemetry +{ + using System; + using System.Collections.Generic; + using System.Linq; + using System.Net; + using System.Net.Http; + using Microsoft.Azure.Cosmos.Telemetry; + using Microsoft.Azure.Cosmos.Tracing.TraceData; + using Microsoft.Azure.Documents; + using Microsoft.VisualStudio.TestTools.UnitTesting; + + /// + /// Tests for . + /// + [TestClass] + public class OpenTelemetryAttributeKeysTests + { + [TestMethod] + [DataRow(false, null)] + [DataRow(true, "HttpRequestException_NotFound_ResourceNotFound")] + public void AppendErrorTypeForPopulateNetworkMeterDimensionsTests(bool throwException, string expectedResult) + { + OpenTelemetryAttributeKeys attributePopulator = new OpenTelemetryAttributeKeys(); + HttpStatusCode statusCode = throwException ? HttpStatusCode.NotFound : HttpStatusCode.OK; + int subStatusCode = throwException ? (int)SubStatusCodes.ResourceNotFound : (int)SubStatusCodes.Unknown; + OpenTelemetryAttributes attributes = new OpenTelemetryAttributes + { + ConsistencyLevel = ConsistencyLevel.Strong.ToString(), + StatusCode = statusCode, + SubStatusCode = subStatusCode, + }; + + CosmosException cosmosException = new CosmosException( + message: "test", + statusCode: statusCode, + subStatusCode: subStatusCode, + activityId: Guid.NewGuid().ToString(), + requestCharge: 1.1); + + // Mock http response message + HttpResponseMessage responseMessage = new HttpResponseMessage(statusCode); + responseMessage.Headers.Add("x-ms-substatus", subStatusCode.ToString()); + + // Mock http exception + HttpRequestException httpRequestException = new HttpRequestException("test"); + + ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics httpStatistics = new ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics( + requestStartTime: DateTime.MinValue, + requestEndTime: DateTime.MinValue, + requestUri: new Uri("https://test.com"), + httpMethod: HttpMethod.Post, + resourceType: ResourceType.Database, + responseMessage: responseMessage, + exception: throwException ? httpRequestException : null, + region: "East US"); + + KeyValuePair[] dimensions = attributePopulator.PopulateNetworkMeterDimensions( + operationName: "create_database_if_not_exists", + accountName: new Uri("https://test.com"), + "test_container", + "test_database", + attributes, + throwException ? cosmosException : null, + null, + null, + httpStatistics); + + // Check error.type value + KeyValuePair errorType = dimensions.FirstOrDefault(d => d.Key == "error.type"); + Assert.IsNotNull(errorType); + Assert.AreEqual(expectedResult, errorType.Value); + } + + [TestMethod] + [DataRow(false, null)] + [DataRow(true, "CosmosException_NotFound_ResourceNotFound")] + public void AppendErrorTypeForPopulateOperationMeterDimensionsTests(bool throwException, string expectedResult) + { + OpenTelemetryAttributeKeys attributePopulator = new OpenTelemetryAttributeKeys(); + OpenTelemetryAttributes attributes = new OpenTelemetryAttributes + { + ConsistencyLevel = ConsistencyLevel.Strong.ToString(), + StatusCode = throwException ? HttpStatusCode.NotFound : HttpStatusCode.OK, + SubStatusCode = throwException ? (int)SubStatusCodes.ResourceNotFound : (int)SubStatusCodes.Unknown, + }; + + Exception innerException = new NotFoundException("Not found"); + CosmosException cosmosException = new CosmosException( + statusCode: HttpStatusCode.NotFound, + message: "Not found", + stackTrace: null, + headers: new Headers { { "x-ms-substatus", ((int)SubStatusCodes.ResourceNotFound).ToString() } }, + trace: null, + error: null, + innerException: innerException); + + KeyValuePair[] dimensions = attributePopulator.PopulateOperationMeterDimensions( + operationName: "create_database_if_not_exists", + containerName: "Items", + databaseName: "ToDoList", + accountName: new Uri("https://test.com"), + attributes: attributes, + ex: throwException ? cosmosException : null, + optionFromRequest: null); + + // Check error.type value + KeyValuePair errorType = dimensions.FirstOrDefault(d => d.Key == "error.type"); + Assert.IsNotNull(errorType); + Assert.AreEqual(expectedResult, errorType.Value); + } + + [TestMethod] + public void AppendErrorTypeForPopulateOperationMeterDimensionsTests_AbnormalCase() + { + OpenTelemetryAttributeKeys attributePopulator = new OpenTelemetryAttributeKeys(); + + Exception exception = new NotFoundException("Not found"); + + KeyValuePair[] dimensions = attributePopulator.PopulateOperationMeterDimensions( + operationName: "create_database_if_not_exists", + containerName: "Items", + databaseName: "ToDoList", + accountName: new Uri("https://test.com"), + attributes: null, + ex: exception, + optionFromRequest: null); + + // Check error.type value + KeyValuePair errorType = dimensions.FirstOrDefault(d => d.Key == "error.type"); + Assert.IsNotNull(errorType); + Assert.AreEqual("NotFoundException__", errorType.Value); + } + } +}