diff --git a/helm/examples/tracing/README.md b/helm/examples/tracing/README.md index 00fd4b8f77b..4d00e0a39cf 100644 --- a/helm/examples/tracing/README.md +++ b/helm/examples/tracing/README.md @@ -12,6 +12,9 @@ All configuration options detailed here are part of the HPCC Systems Helm chart, - alwaysCreateGlobalIds - If true, assign newly created global ID to any requests that do not supply one. - optAlwaysCreateTraceIds - If true components generate trace/span ids if none are provided by the remote caller. - enableDefaultLogExporter - If true, creates a trace exporter outputting to the log using the default options +- sampler - Defines head sampling strategy. Decision to sample or drop a span or trace is not made by inspecting the trace as a whole. https://opentelemetry.io/docs/concepts/sampling/ + - type "AlwaysOff" | "AlwaysOn" | "Ratio" + - argument Optional sampler type configuration value. Currently, only supported value applies to the "Ratio" sampler type. The argument value is a string representing a numeric value betwen 0.0 and 1.0. This value represents the ratio of trace/spans to sample - resourceAttributes: - Defines OTel specific resource attribute configuration values which are appended to the runtime OTEL_RESOURCE_ATTRIBUTES. See OTel doc: https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#general-sdk-configuration - deploymentEnvironment - Defines deployment.environment, which is used to specify diff --git a/helm/examples/tracing/baremetal-otlp-http-localhost-sample.xml b/helm/examples/tracing/baremetal-otlp-http-localhost-sample.xml index e430888efa5..d4c04ab26e3 100644 --- a/helm/examples/tracing/baremetal-otlp-http-localhost-sample.xml +++ b/helm/examples/tracing/baremetal-otlp-http-localhost-sample.xml @@ -5,6 +5,7 @@ + \ No newline at end of file diff --git a/helm/examples/tracing/otlp-http-collector-default-sampled.yaml b/helm/examples/tracing/otlp-http-collector-default-sampled.yaml new file mode 100644 index 00000000000..c6d7c5a3559 --- /dev/null +++ b/helm/examples/tracing/otlp-http-collector-default-sampled.yaml @@ -0,0 +1,9 @@ +global: + tracing: + sampler: + - type: Ratio //Head sampling based on simple ratio + - argument: "0.1" //only sample 10% of traces/spans + exporters: + - type: OTLP-HTTP + endpoint: "localhost:4318/v1/traces" + consoleDebug: true \ No newline at end of file diff --git a/helm/hpcc/values.schema.json b/helm/hpcc/values.schema.json index 86d2966d332..4985305d6cb 100644 --- a/helm/hpcc/values.schema.json +++ b/helm/hpcc/values.schema.json @@ -1164,6 +1164,20 @@ "type": "boolean", "description": "If true, creates a trace exporter outputting to the log using the default options" }, + "sampler": { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["AlwaysOff", "AlwaysOn", "Ratio"], + "description": "Name of the Head Sampling type AlwaysOff|AlwaysOn|Ratio" + }, + "argument" : { + "type": "string", + "description": "Optional sampler type configuration value" + } + } + }, "exporters": { "type": "array", "description": "List of trace exporters", diff --git a/helm/hpcc/values.yaml b/helm/hpcc/values.yaml index 8d6b6bed7ff..32ffe89cd60 100644 --- a/helm/hpcc/values.yaml +++ b/helm/hpcc/values.yaml @@ -31,7 +31,9 @@ global: alwaysCreateTraceIds: true resourceAttributes: # used to declare OTEL Resource Attribute config values deploymentEnvironment: development # used to anotate tracing spans' environment identifier (development/production/statiging/etc) - + sampler: + type: Ratio + argument: "0.25" # Only sample 1/4 of traces/spans ## resource settings for stub components #stubInstanceResources: # memory: "200Mi" diff --git a/system/jlib/jtrace.cpp b/system/jlib/jtrace.cpp index 9d17c644908..198b2a311cf 100644 --- a/system/jlib/jtrace.cpp +++ b/system/jlib/jtrace.cpp @@ -23,6 +23,11 @@ #include "opentelemetry/sdk/trace/tracer_context_factory.h" //opentelemetry::sdk::trace::TracerContextFactory::Create(std::move(processors)); #include "opentelemetry/sdk/trace/simple_processor_factory.h" #include "opentelemetry/sdk/trace/batch_span_processor_factory.h" +#include +#include +#include +#include +#include #include "opentelemetry/exporters/ostream/span_exporter_factory.h"// auto exporter = opentelemetry::exporter::trace::OStreamSpanExporterFactory::Create(); #include "opentelemetry/exporters/ostream/common_utils.h" #include "opentelemetry/exporters/memory/in_memory_span_exporter_factory.h" @@ -762,6 +767,9 @@ class CSpan : public CInterfaceOf if (span == nullptr) return false; + if (!span->IsRecording()) //if not sampled, we shouldn't consider this valid? + return false; + auto spanCtx = span->GetContext(); return spanCtx.IsValid(); } @@ -1368,11 +1376,58 @@ void CTraceManager::initTracerProviderAndGlobalInternals(const IPropertyTree * t std::vector> processors; + std::unique_ptr sampler; + //= std::unique_ptr(new opentelemetry::sdk::trace::AlwaysOnSampler); + + bool customSamplerEnabled = false; //By default trace spans to the logs in debug builds - so that developers get used to seeing them. //Default off for release builds to avoid flooding the logs, and because they are likely to use OTLP bool enableDefaultLogExporter = isDebugBuild(); if (traceConfig) { + IPropertyTree * samplerTree = traceConfig->queryPropTree("sampler"); + if (samplerTree) + { + const char * samplerType = samplerTree->queryProp("@type"); + if (!isEmptyString(samplerType)) + { + if (strcmp("AlwaysOff", samplerType)==0) + { + sampler = std::unique_ptr + (new opentelemetry::sdk::trace::AlwaysOffSampler); + customSamplerEnabled = true; + } + else if (strcmp("Ratio", samplerType)==0) + { + const char * samplerArgument = samplerTree->queryProp("@argument"); //if any other sampler requires argument, + //this has to move up + + //ratio – a required value, 1.0 >= ratio >= 0.0. + //If the given trace_id falls into a given ratio of all possible trace_id values, + //ShouldSample will return RECORD_AND_SAMPLE. + if (!isEmptyString(samplerArgument)) + { + size_t pos; + double ratio = std::stod(samplerArgument, &pos); + if (ratio < 0 || ratio > 1) + { + OERRLOG("JTrace invalid ratio sampler configuration. Ration must be LE 1.0 or GE 0.0"); + } + else + { + sampler = std::unique_ptr + (new opentelemetry::sdk::trace::TraceIdRatioBasedSampler(ratio)); + customSamplerEnabled = true; + } + } + else + { + OERRLOG("JTrace invalid ratio sampler configuration. Must provide ration samplerArgument"); + } + } + } + } + IPropertyTree * resourceAttributesTree = traceConfig->queryPropTree("resourceAttributes"); if (resourceAttributesTree) { @@ -1398,6 +1453,12 @@ void CTraceManager::initTracerProviderAndGlobalInternals(const IPropertyTree * t enableDefaultLogExporter = traceConfig->getPropBool("enableDefaultLogExporter", enableDefaultLogExporter); } + if (!customSamplerEnabled) + { + sampler = std::unique_ptr + (new opentelemetry::sdk::trace::AlwaysOnSampler); + } + if (enableDefaultLogExporter) { //Simple option to create logging to the log file - primarily to aid developers. @@ -1407,9 +1468,9 @@ void CTraceManager::initTracerProviderAndGlobalInternals(const IPropertyTree * t auto jtraceResource = opentelemetry::sdk::resource::Resource::Create(resourceAtts); - // Default is an always-on sampler. std::unique_ptr context = - opentelemetry::sdk::trace::TracerContextFactory::Create(std::move(processors), jtraceResource); + opentelemetry::sdk::trace::TracerContextFactory::Create(std::move(processors), jtraceResource, std::move(sampler)); + std::shared_ptr provider = opentelemetry::sdk::trace::TracerProviderFactory::Create(std::move(context));