Skip to content

Commit

Permalink
[Internal] Open Telemetry: Fixes client telemetry error type that is …
Browse files Browse the repository at this point in the history
…compliant with open telemetry standard (#4948)

# Pull Request Template

## Description

Address issue #4945 
Previous error.type contained detailed error message which wasn't
compliant with OpenTelemetry standard
[error.type](https://opentelemetry.io/docs/specs/semconv/attributes-registry/error/).

This PR generates the error type based on the exception type, status
code and sub status code.
An example of error.type value:
```"CosmosException_NotFound_ResourceNotFound"```

## Type of change

Please delete options that are not relevant.

- [X] Bug fix (non-breaking change which fixes an issue)
- [ ] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to not work as expected)
- [ ] This change requires a documentation update

## Closing issues
#4945 

To automatically close an issue: closes #IssueNumber

---------

Co-authored-by: Sourabh Jain <sourabhjain@microsoft.com>
  • Loading branch information
huapeng-zhang and sourabh1007 authored Jan 8, 2025
1 parent 9b2fae6 commit b6afb07
Show file tree
Hide file tree
Showing 2 changed files with 171 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@ namespace Microsoft.Azure.Cosmos.Telemetry
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using global::Azure.Core;
using Microsoft.Azure.Cosmos.Tracing.TraceData;
using Microsoft.Azure.Documents;

/// <summary>
/// Contains constant string values representing OpenTelemetry attribute keys for monitoring and tracing Cosmos DB operations.
Expand Down Expand Up @@ -291,6 +293,10 @@ public KeyValuePair<string, object>[] PopulateNetworkMeterDimensions(string oper
int? operationLevelStatusCode = CosmosDbMeterUtil.GetStatusCode(attributes, ex);
int? operationLevelSubStatusCode = CosmosDbMeterUtil.GetSubStatusCode(attributes, ex);

int? serviceEndpointStatusCode = GetStatusCode(tcpStats, httpStats);
int? serviceEndpointSubStatusCode = GetSubStatusCode(tcpStats, httpStats);
Exception serviceEndpointException = httpStats?.Exception ?? tcpStats?.StoreResult?.Exception;

List<KeyValuePair<string, object>> dimensions = new ()
{
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.DbSystemName, OpenTelemetryCoreRecorder.CosmosDb),
Expand All @@ -305,10 +311,10 @@ public KeyValuePair<string, object>[] PopulateNetworkMeterDimensions(string oper
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.NetworkProtocolName, replicaEndpoint.Scheme),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ServiceEndpointHost, replicaEndpoint.Host),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ServiceEndPointPort, replicaEndpoint.Port),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ServiceEndpointStatusCode, GetStatusCode(tcpStats, httpStats)),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ServiceEndpointSubStatusCode, GetSubStatusCode(tcpStats, httpStats)),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ServiceEndpointStatusCode, serviceEndpointStatusCode),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ServiceEndpointSubStatusCode, serviceEndpointSubStatusCode),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ServiceEndpointRegion, GetRegion(tcpStats, httpStats)),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ErrorType, GetException(tcpStats, httpStats))
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ErrorType, GetErrorType(serviceEndpointException, serviceEndpointStatusCode, serviceEndpointSubStatusCode))
};

this.AddOptionalDimensions(optionFromRequest, tcpStats, httpStats, dimensions);
Expand Down Expand Up @@ -350,6 +356,9 @@ public KeyValuePair<string, object>[] PopulateOperationMeterDimensions(string op
Exception ex,
OperationMetricsOptions optionFromRequest)
{
int? statusCode = CosmosDbMeterUtil.GetStatusCode(attributes, ex);
int? subStatusCode = CosmosDbMeterUtil.GetSubStatusCode(attributes, ex);

List<KeyValuePair<string, object>> dimensions = new ()
{
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.DbSystemName, OpenTelemetryCoreRecorder.CosmosDb),
Expand All @@ -358,10 +367,10 @@ public KeyValuePair<string, object>[] PopulateOperationMeterDimensions(string op
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ServerAddress, accountName?.Host),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ServerPort, accountName?.Port),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.DbOperation, operationName),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.StatusCode, CosmosDbMeterUtil.GetStatusCode(attributes, ex)),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.SubStatusCode, CosmosDbMeterUtil.GetSubStatusCode(attributes, ex)),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.StatusCode, statusCode),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.SubStatusCode, subStatusCode),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ConsistencyLevel, GetConsistencyLevel(attributes, ex)),
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ErrorType, ex?.Message)
new KeyValuePair<string, object>(OpenTelemetryAttributeKeys.ErrorType, GetErrorType(ex, statusCode, subStatusCode))
};

this.AddOptionalDimensions(attributes, optionFromRequest, dimensions);
Expand Down Expand Up @@ -466,9 +475,24 @@ private static string GetRegion(ClientSideRequestStatisticsTraceDatum.StoreRespo
return httpStats?.Region ?? tcpStats.Region;
}

private static string GetException(ClientSideRequestStatisticsTraceDatum.StoreResponseStatistics tcpStats, ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics? httpStats)
/// <summary>
/// Return the error.type dimension value based on the exception type, status code and sub status code.
/// </summary>
/// <param name="exception">Threw exception</param>
/// <param name="statusCode">Status code</param>
/// <param name="subStatusCode">Sub status code</param>
/// <returns>error.type dimension value</returns>
private static string GetErrorType(Exception exception, int? statusCode, int? subStatusCode)
{
return httpStats?.Exception?.Message ?? tcpStats?.StoreResult?.Exception?.Message;
if (exception == null)
{
return null;
}

HttpStatusCode? code = statusCode.HasValue ? (HttpStatusCode)statusCode.Value : null;
SubStatusCodes? subCode = subStatusCode.HasValue ? (SubStatusCodes)subStatusCode.Value : null;

return $"{exception.GetType().Name}_{code?.ToString()}_{subCode?.ToString()}";
}

private static int GetSubStatusCode(ClientSideRequestStatisticsTraceDatum.StoreResponseStatistics tcpStats, ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics? httpStats)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
//------------------------------------------------------------
// Copyright (c) Microsoft Corporation. All rights reserved.
//------------------------------------------------------------

namespace Microsoft.Azure.Cosmos.Tests.Telemetry
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Net;
using System.Net.Http;
using Microsoft.Azure.Cosmos.Telemetry;
using Microsoft.Azure.Cosmos.Tracing.TraceData;
using Microsoft.Azure.Documents;
using Microsoft.VisualStudio.TestTools.UnitTesting;

/// <summary>
/// Tests for <see cref="OpenTelemetryAttributeKeys"/>.
/// </summary>
[TestClass]
public class OpenTelemetryAttributeKeysTests
{
[TestMethod]
[DataRow(false, null)]
[DataRow(true, "HttpRequestException_NotFound_ResourceNotFound")]
public void AppendErrorTypeForPopulateNetworkMeterDimensionsTests(bool throwException, string expectedResult)
{
OpenTelemetryAttributeKeys attributePopulator = new OpenTelemetryAttributeKeys();
HttpStatusCode statusCode = throwException ? HttpStatusCode.NotFound : HttpStatusCode.OK;
int subStatusCode = throwException ? (int)SubStatusCodes.ResourceNotFound : (int)SubStatusCodes.Unknown;
OpenTelemetryAttributes attributes = new OpenTelemetryAttributes
{
ConsistencyLevel = ConsistencyLevel.Strong.ToString(),
StatusCode = statusCode,
SubStatusCode = subStatusCode,
};

CosmosException cosmosException = new CosmosException(
message: "test",
statusCode: statusCode,
subStatusCode: subStatusCode,
activityId: Guid.NewGuid().ToString(),
requestCharge: 1.1);

// Mock http response message
HttpResponseMessage responseMessage = new HttpResponseMessage(statusCode);
responseMessage.Headers.Add("x-ms-substatus", subStatusCode.ToString());

// Mock http exception
HttpRequestException httpRequestException = new HttpRequestException("test");

ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics httpStatistics = new ClientSideRequestStatisticsTraceDatum.HttpResponseStatistics(
requestStartTime: DateTime.MinValue,
requestEndTime: DateTime.MinValue,
requestUri: new Uri("https://test.com"),
httpMethod: HttpMethod.Post,
resourceType: ResourceType.Database,
responseMessage: responseMessage,
exception: throwException ? httpRequestException : null,
region: "East US");

KeyValuePair<string, object>[] dimensions = attributePopulator.PopulateNetworkMeterDimensions(
operationName: "create_database_if_not_exists",
accountName: new Uri("https://test.com"),
"test_container",
"test_database",
attributes,
throwException ? cosmosException : null,
null,
null,
httpStatistics);

// Check error.type value
KeyValuePair<string, object> errorType = dimensions.FirstOrDefault(d => d.Key == "error.type");
Assert.IsNotNull(errorType);
Assert.AreEqual(expectedResult, errorType.Value);
}

[TestMethod]
[DataRow(false, null)]
[DataRow(true, "CosmosException_NotFound_ResourceNotFound")]
public void AppendErrorTypeForPopulateOperationMeterDimensionsTests(bool throwException, string expectedResult)
{
OpenTelemetryAttributeKeys attributePopulator = new OpenTelemetryAttributeKeys();
OpenTelemetryAttributes attributes = new OpenTelemetryAttributes
{
ConsistencyLevel = ConsistencyLevel.Strong.ToString(),
StatusCode = throwException ? HttpStatusCode.NotFound : HttpStatusCode.OK,
SubStatusCode = throwException ? (int)SubStatusCodes.ResourceNotFound : (int)SubStatusCodes.Unknown,
};

Exception innerException = new NotFoundException("Not found");
CosmosException cosmosException = new CosmosException(
statusCode: HttpStatusCode.NotFound,
message: "Not found",
stackTrace: null,
headers: new Headers { { "x-ms-substatus", ((int)SubStatusCodes.ResourceNotFound).ToString() } },
trace: null,
error: null,
innerException: innerException);

KeyValuePair<string, object>[] dimensions = attributePopulator.PopulateOperationMeterDimensions(
operationName: "create_database_if_not_exists",
containerName: "Items",
databaseName: "ToDoList",
accountName: new Uri("https://test.com"),
attributes: attributes,
ex: throwException ? cosmosException : null,
optionFromRequest: null);

// Check error.type value
KeyValuePair<string, object> errorType = dimensions.FirstOrDefault(d => d.Key == "error.type");
Assert.IsNotNull(errorType);
Assert.AreEqual(expectedResult, errorType.Value);
}

[TestMethod]
public void AppendErrorTypeForPopulateOperationMeterDimensionsTests_AbnormalCase()
{
OpenTelemetryAttributeKeys attributePopulator = new OpenTelemetryAttributeKeys();

Exception exception = new NotFoundException("Not found");

KeyValuePair<string, object>[] dimensions = attributePopulator.PopulateOperationMeterDimensions(
operationName: "create_database_if_not_exists",
containerName: "Items",
databaseName: "ToDoList",
accountName: new Uri("https://test.com"),
attributes: null,
ex: exception,
optionFromRequest: null);

// Check error.type value
KeyValuePair<string, object> errorType = dimensions.FirstOrDefault(d => d.Key == "error.type");
Assert.IsNotNull(errorType);
Assert.AreEqual("NotFoundException__", errorType.Value);
}
}
}

0 comments on commit b6afb07

Please sign in to comment.