diff --git a/config/config.go b/config/config.go index edf0ded..262b971 100644 --- a/config/config.go +++ b/config/config.go @@ -144,6 +144,7 @@ type GlobalOpts struct { ReportHeaders bool `json:"report_headers"` MetricsStaticAttributes Attributes `json:"metrics_static_attributes"` TracesStaticAttributes Attributes `json:"traces_static_attributes"` + SemConv string `json:"semantic_convention"` } // PipeOpts has the options for the KrakenD pipe stage diff --git a/go.mod b/go.mod index 2913ac3..3030f5b 100644 --- a/go.mod +++ b/go.mod @@ -1,23 +1,23 @@ module github.com/krakend/krakend-otel -go 1.21 +go 1.22.0 -toolchain go1.23.0 +toolchain go1.23.3 require ( github.com/gin-gonic/gin v1.9.1 github.com/luraproject/lura/v2 v2.6.2 github.com/prometheus/client_golang v1.18.0 - go.opentelemetry.io/otel v1.28.0 + go.opentelemetry.io/otel v1.33.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.28.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 go.opentelemetry.io/otel/exporters/prometheus v0.46.0 - go.opentelemetry.io/otel/metric v1.28.0 + go.opentelemetry.io/otel/metric v1.33.0 go.opentelemetry.io/otel/sdk v1.28.0 go.opentelemetry.io/otel/sdk/metric v1.28.0 - go.opentelemetry.io/otel/trace v1.28.0 + go.opentelemetry.io/otel/trace v1.33.0 ) require ( @@ -51,6 +51,7 @@ require ( github.com/twitchyliquid64/golang-asm v0.15.1 // indirect github.com/ugorji/go/codec v1.2.11 // indirect github.com/valyala/fastrand v1.1.0 // indirect + go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 // indirect go.opentelemetry.io/proto/otlp v1.3.1 // indirect golang.org/x/arch v0.3.0 // indirect diff --git a/go.sum b/go.sum index 359239c..05e02e2 100644 --- a/go.sum +++ b/go.sum @@ -77,8 +77,8 @@ github.com/prometheus/common v0.45.0 h1:2BGz0eBc2hdMDLnO/8n0jeB3oPrt2D08CekT0lne github.com/prometheus/common v0.45.0/go.mod h1:YJmSTw9BoKxJplESWWxlbyttQR4uaEcGyv9MZjVOJsY= github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo= github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII= +github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -89,16 +89,18 @@ github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.11 h1:BMaWp1Bb6fHwEtbplGBGJ498wD+LKlNSl25MjdZY4dU= github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= github.com/valyala/fastrand v1.1.0 h1:f+5HkLW4rsgzdNoleUOB69hyT9IlD2ZQh9GyDMfb5G8= github.com/valyala/fastrand v1.1.0/go.mod h1:HWqCzkrkg6QXT8V2EXWvXCoow7vLwOFN002oeRzjapQ= -go.opentelemetry.io/otel v1.28.0 h1:/SqNcYk+idO0CxKEUOtKQClMK/MimZihKYMruSMViUo= -go.opentelemetry.io/otel v1.28.0/go.mod h1:q68ijF8Fc8CnMHKyzqL6akLO46ePnjkgfIMIjUIX9z4= +go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= +go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A= +go.opentelemetry.io/otel v1.33.0 h1:/FerN9bax5LoK51X/sI0SVYrjSE0/yUL7DpxW4K3FWw= +go.opentelemetry.io/otel v1.33.0/go.mod h1:SUUkR6csvUQl+yjReHu5uM3EtVV7MBm5FHKRlNx4I8I= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0 h1:U2guen0GhqH8o/G2un8f/aG/y++OuW6MyCo6hT9prXk= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.28.0/go.mod h1:yeGZANgEcpdx/WK0IvvRFC+2oLiMS2u4L/0Rj2M2Qr0= go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.28.0 h1:aLmmtjRke7LPDQ3lvpFz+kNEH43faFhzW7v8BFIEydg= @@ -107,20 +109,18 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9RO go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0/go.mod h1:s75jGIWA9OfCMzF0xr+ZgfrB5FEbbV7UuYo32ahUiFI= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0 h1:R3X6ZXmNPRR8ul6i3WgFURCHzaXjHdm0karRG/+dj3s= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.28.0/go.mod h1:QWFXnDavXWwMx2EEcZsf3yxgEKAqsxQ+Syjp+seyInw= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0 h1:Xw8U6u2f8DK2XAkGRFV7BBLENgnTGX9i4rQRxJf+/vs= -go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.24.0/go.mod h1:6KW1Fm6R/s6Z3PGXwSJN2K4eT6wQB3vXX6CVnYX9NmM= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0 h1:j9+03ymgYhPKmeXGk5Zu+cIZOlVzd9Zv7QIiyItjFBU= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.28.0/go.mod h1:Y5+XiUG4Emn1hTfciPzGPJaSI+RpDts6BnCIir0SLqk= go.opentelemetry.io/otel/exporters/prometheus v0.46.0 h1:I8WIFXR351FoLJYuloU4EgXbtNX2URfU/85pUPheIEQ= go.opentelemetry.io/otel/exporters/prometheus v0.46.0/go.mod h1:ztwVUHe5DTR/1v7PeuGRnU5Bbd4QKYwApWmuutKsJSs= -go.opentelemetry.io/otel/metric v1.28.0 h1:f0HGvSl1KRAU1DLgLGFjrwVyismPlnuU6JD6bOeuA5Q= -go.opentelemetry.io/otel/metric v1.28.0/go.mod h1:Fb1eVBFZmLVTMb6PPohq3TO9IIhUisDsbJoL/+uQW4s= +go.opentelemetry.io/otel/metric v1.33.0 h1:r+JOocAyeRVXD8lZpjdQjzMadVZp2M4WmQ+5WtEnklQ= +go.opentelemetry.io/otel/metric v1.33.0/go.mod h1:L9+Fyctbp6HFTddIxClbQkjtubW6O9QS3Ann/M82u6M= go.opentelemetry.io/otel/sdk v1.28.0 h1:b9d7hIry8yZsgtbmM0DKyPWMMUMlK9NEKuIG4aBqWyE= go.opentelemetry.io/otel/sdk v1.28.0/go.mod h1:oYj7ClPUA7Iw3m+r7GeEjz0qckQRJK2B8zjcZEfu7Pg= go.opentelemetry.io/otel/sdk/metric v1.28.0 h1:OkuaKgKrgAbYrrY0t92c+cC+2F6hsFNnCQArXCKlg08= go.opentelemetry.io/otel/sdk/metric v1.28.0/go.mod h1:cWPjykihLAPvXKi4iZc1dpER3Jdq2Z0YLse3moQUCpg= -go.opentelemetry.io/otel/trace v1.28.0 h1:GhQ9cUuQGmNDd5BTCP2dAvv75RdMxEfTmYejp+lkx9g= -go.opentelemetry.io/otel/trace v1.28.0/go.mod h1:jPyXzNPg6da9+38HEwElrQiHlVMTnVfM3/yv2OlIHaI= +go.opentelemetry.io/otel/trace v1.33.0 h1:cCJuF7LRjUFso9LPnEAHJDB2pqzp+hbO8eu1qqW2d/s= +go.opentelemetry.io/otel/trace v1.33.0/go.mod h1:uIcdVUZMpTAmz0tI1z04GoVSezK37CbGV4fr1f2nBck= go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0= go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8= go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= diff --git a/http/client/transport_metrics.go b/http/client/transport_metrics.go index 0e7b627..ff7ef36 100644 --- a/http/client/transport_metrics.go +++ b/http/client/transport_metrics.go @@ -3,10 +3,14 @@ package client import ( "context" "errors" + "net/http" + "strconv" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/metric/noop" "go.opentelemetry.io/otel/semconv/v1.21.0" + v127 "go.opentelemetry.io/otel/semconv/v1.27.0" kotelconfig "github.com/krakend/krakend-otel/config" ) @@ -19,6 +23,7 @@ type TransportMetricsOptions struct { ReadPayload bool // provide metrics for the reading the full body DetailedConnection bool // provide detailed metrics about the connection: dns lookup, tls ... FixedAttributes []attribute.KeyValue // "static" attributes set at config time. + SemConv string // to use the latest metric names conventions } // Enabled tells if metrics should be reported for the transport. @@ -37,7 +42,8 @@ type transportMetrics struct { // the value of the Content-Length header for the request (not the // actual written bytes of the request, that might be cancelled // when it is already on flight. - requestContentLength metric.Int64Counter + requestContentLength metric.Int64Counter + requestContentLengthHist metric.Int64Histogram responseLatency metric.Float64Histogram @@ -56,18 +62,19 @@ type transportMetrics struct { clientName string } -func newTransportMetrics(metricsOpts *TransportMetricsOptions, meter metric.Meter, clientName string) *transportMetrics { - if meter == nil { - return nil - } +type metricFillerFn func(*TransportMetricsOptions, metric.Meter, *transportMetrics) + +func noSemConvMetricsFiller(metricsOpts *TransportMetricsOptions, meter metric.Meter, tm *transportMetrics) { + nopMeter := noop.Meter{} - var tm transportMetrics tm.requestsStarted, _ = meter.Int64Counter("http.client.request.started.count") // number of reqs started tm.requestsFailed, _ = meter.Int64Counter("http.client.request.failed.count") // number of reqs failed tm.requestsCanceled, _ = meter.Int64Counter("http.client.request.canceled.count") // number of canceled requests tm.requestsTimedOut, _ = meter.Int64Counter("http.client.request.timedout.count") // numer of timedout request (inclued in failed) tm.requestContentLength, _ = meter.Int64Counter("http.client.request.size") // the value of the Content-Length header for the request + // For non implemented metrics that can be used with other config, we use a noop meter: + tm.requestContentLengthHist, _ = nopMeter.Int64Histogram(v127.HTTPClientRequestBodySizeName) tm.responseLatency, _ = meter.Float64Histogram("http.client.duration", kotelconfig.TimeBucketsOpt) tm.responseContentLength, _ = meter.Int64Histogram("http.client.response.size", kotelconfig.SizeBucketsOpt) @@ -77,38 +84,105 @@ func newTransportMetrics(metricsOpts *TransportMetricsOptions, meter metric.Mete tm.getConnLatency, _ = meter.Float64Histogram("http.client.request.get-conn.duration", kotelconfig.TimeBucketsOpt) tm.dnsLatency, _ = meter.Float64Histogram("http.client.request.dns.duration", kotelconfig.TimeBucketsOpt) tm.tlsLatency, _ = meter.Float64Histogram("http.client.request.tls.duration", kotelconfig.TimeBucketsOpt) - return &tm } -func (m *transportMetrics) report(rtt *roundTripTracking, attrs []attribute.KeyValue) { - if m == nil || m.requestsStarted == nil { - // if metrics are nil or not initialized, we just return +func semConv1_27MetricsFiller(metricsOpts *TransportMetricsOptions, meter metric.Meter, tm *transportMetrics) { + nopMeter := noop.Meter{} + + tm.detailsEnabled = metricsOpts.DetailedConnection + + // For non implemented metrics that can be used with other config, we use a noop meter: + tm.requestContentLength, _ = nopMeter.Int64Counter("http.client.request.size") + + // WARNING: Stability => Experimental (subject to change in the future) + tm.requestContentLengthHist, _ = meter.Int64Histogram(v127.HTTPClientRequestBodySizeName, + metric.WithUnit(v127.HTTPClientRequestBodySizeUnit), + metric.WithDescription(v127.HTTPClientRequestBodySizeDescription), + kotelconfig.SizeBucketsOpt) // the value of the Content-Length header for the request + + // this is `http.client.request.duration`, and is a required metric: + // https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#metric-httpclientrequestduration + tm.responseLatency, _ = meter.Float64Histogram(v127.HTTPClientRequestDurationName, + metric.WithUnit(v127.HTTPClientRequestDurationUnit), + metric.WithDescription(v127.HTTPClientRequestDurationDescription), + kotelconfig.TimeBucketsOpt) + + // WARNING: Stability => Experimental (subject to change in the future) + tm.responseContentLength, _ = meter.Int64Histogram(v127.HTTPClientResponseBodySizeName, + metric.WithUnit(v127.HTTPClientResponseBodySizeUnit), + metric.WithDescription(v127.HTTPClientResponseBodySizeDescription), + kotelconfig.SizeBucketsOpt) // the value of the Content-Length header for the response + + // TODO: if we want the exact semantic convention, what should we do with our own extra data, that + // is not standarized by OTEL ? for now we only set it if the `metricsOpts.DetailedConnection` is + // set too: + if metricsOpts.DetailedConnection { + tm.requestsStarted, _ = meter.Int64Counter("http.client.request.started.count") // number of reqs started + tm.requestsFailed, _ = meter.Int64Counter("http.client.request.failed.count") // number of reqs failed + tm.requestsCanceled, _ = meter.Int64Counter("http.client.request.canceled.count") // number of canceled requests + tm.requestsTimedOut, _ = meter.Int64Counter("http.client.request.timedout.count") // numer of timedout request (inclued in failed) + + tm.responseNoContentLength, _ = meter.Int64Counter("http.client.response.no-content-length", + metric.WithUnit(v127.HTTPClientResponseBodySizeUnit), + metric.WithDescription("Client received responses that do not have 'Content-Length' value set")) + tm.getConnLatency, _ = meter.Float64Histogram("http.client.request.get-conn.duration", + metric.WithUnit(v127.HTTPClientRequestDurationUnit), + metric.WithDescription("Time spent acquiring a client connection"), + kotelconfig.TimeBucketsOpt) + tm.dnsLatency, _ = meter.Float64Histogram("http.client.request.dns.duration", + metric.WithUnit(v127.HTTPClientRequestDurationUnit), + metric.WithDescription("Time spent resolving the DNS name"), + kotelconfig.TimeBucketsOpt) + tm.tlsLatency, _ = meter.Float64Histogram("http.client.request.tls.duration", + metric.WithUnit(v127.HTTPClientRequestDurationUnit), + metric.WithDescription("Time spent on TLS negotiation and connection"), + kotelconfig.TimeBucketsOpt) return } + tm.requestsStarted, _ = nopMeter.Int64Counter("http.client.request.started.count") // number of reqs started + tm.requestsFailed, _ = nopMeter.Int64Counter("http.client.request.failed.count") // number of reqs failed + tm.requestsCanceled, _ = nopMeter.Int64Counter("http.client.request.canceled.count") // number of canceled requests + tm.requestsTimedOut, _ = nopMeter.Int64Counter("http.client.request.timedout.count") // numer of timedout request (inclued in failed) - attrM := make([]attribute.KeyValue, len(attrs), len(attrs)+4) - copy(attrM, attrs) - if len(m.clientName) > 0 { - attrM = append(attrM, attribute.Key("clientname").String(m.clientName)) + tm.responseNoContentLength, _ = nopMeter.Int64Counter("http.client.response.no-content-length") + tm.getConnLatency, _ = nopMeter.Float64Histogram("http.client.request.get-conn.duration") + tm.dnsLatency, _ = nopMeter.Float64Histogram("http.client.request.dns.duration") + tm.tlsLatency, _ = nopMeter.Float64Histogram("http.client.request.tls.duration") +} + +func newTransportMetrics(metricsOpts *TransportMetricsOptions, meter metric.Meter, clientName string) *transportMetrics { + if meter == nil { + return nil } - attrM = append(attrM, semconv.HTTPRequestMethodKey.String(rtt.req.Method)) - attrM = append(attrM, semconv.ServerAddress(rtt.req.RemoteAddr)) - statusCode := 0 - if rtt.err == nil { - // if we fail on the client side, we do not have a status code, but we - // want it set to 0 to be displayed on the dashboard - statusCode = int(rtt.resp.StatusCode) + supportedSemConv := map[string]metricFillerFn{ + "": noSemConvMetricsFiller, + "1.27": semConv1_27MetricsFiller, } - attrM = append(attrM, semconv.HTTPResponseStatusCode(statusCode)) - attrOpt := metric.WithAttributeSet(attribute.NewSet(attrM...)) + tm := transportMetrics{ + clientName: clientName, + } + filler := noSemConvMetricsFiller + if versionFiller, ok := supportedSemConv[metricsOpts.SemConv]; ok { + filler = versionFiller + } + filler(metricsOpts, meter, &tm) + return &tm +} +func (m *transportMetrics) report(rtt *roundTripTracking, attrs []attribute.KeyValue) { + if m == nil || m.requestsStarted == nil { + return + } + attrOpt := m.attributesOption(rtt, attrs) ctx := rtt.req.Context() m.requestsStarted.Add(ctx, 1, attrOpt) if rtt.req.ContentLength >= 0 { - // TOOD: should we check the http verb / method to report this ? m.requestContentLength.Add(ctx, rtt.req.ContentLength, attrOpt) + // the content-length is optional and experimental for 1.27 and also 1.29 sem conv: + // https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#metric-httpclientrequestbodysize + m.requestContentLengthHist.Record(ctx, rtt.req.ContentLength, attrOpt) } if rtt.err != nil { @@ -128,11 +202,15 @@ func (m *transportMetrics) report(rtt *roundTripTracking, attrs []attribute.KeyV } } + // the `http.client.request.duration` is required for semconv 1.27 and 1.29 m.responseLatency.Record(ctx, rtt.latencyInSecs, attrOpt) + if rtt.req.Method != "HEAD" && rtt.resp != nil { if rtt.resp.ContentLength >= 0 { // it might be the case were we receive a chunked response, and then // we will not record a metric for it. + // the `http.client.response.body.size` is optional and experimental + // for semconv 1.27 and 1.29 m.responseContentLength.Record(ctx, rtt.resp.ContentLength, attrOpt) } else { m.responseNoContentLength.Add(ctx, 1, attrOpt) @@ -145,3 +223,45 @@ func (m *transportMetrics) report(rtt *roundTripTracking, attrs []attribute.KeyV m.tlsLatency.Record(ctx, rtt.tlsLatency, attrOpt) } } + +func (m *transportMetrics) attributesOption(rtt *roundTripTracking, attrs []attribute.KeyValue) metric.MeasurementOption { + numAttrs := len(attrs) + 4 + 1 // static attrs + required attributes + clientname + attrM := make([]attribute.KeyValue, len(attrs), numAttrs) + copy(attrM, attrs) + if len(m.clientName) > 0 { + attrM = append(attrM, attribute.Key("clientname").String(m.clientName)) + } + + serverAddress, serverPort := requestServerAndPort(rtt.req) + + statusCode := 0 + if rtt.err == nil { + // if we fail on the client side, we do not have a status code, but we + // want it set to 0 to be displayed on the dashboard + statusCode = int(rtt.resp.StatusCode) + } + + attrM = append(attrM, + semconv.HTTPRequestMethodKey.String(rtt.req.Method), // required + // this is wrong, as the RemoteAddr is ignored when the request is used + // for a client (is filled by the server side): + // semconv.ServerAddress(rtt.req.RemoteAddr), + semconv.ServerAddress(serverAddress), // required by sem conv 1.29 + semconv.ServerPort(serverPort), // required by sem conv 1.29 + semconv.HTTPResponseStatusCode(statusCode), // required if received + ) + return metric.WithAttributeSet(attribute.NewSet(attrM...)) +} + +func requestServerAndPort(r *http.Request) (string, int) { + serverAddress := r.Host + serverPort := int(80) + if r.URL != nil { + serverAddress = r.URL.Hostname() + strPort := r.URL.Port() + if p, err := strconv.ParseInt(strPort, 10, 32); err != nil { + serverPort = int(p) + } + } + return serverAddress, serverPort +} diff --git a/http/server/metrics.go b/http/server/metrics.go index 350e0db..2c4dcde 100644 --- a/http/server/metrics.go +++ b/http/server/metrics.go @@ -2,10 +2,12 @@ package server import ( "net/http" + "strings" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/metric" semconv "go.opentelemetry.io/otel/semconv/v1.21.0" + v127 "go.opentelemetry.io/otel/semconv/v1.27.0" kotelconfig "github.com/krakend/krakend-otel/config" ) @@ -14,14 +16,23 @@ type metricsHTTP struct { fixedAttrs []attribute.KeyValue fixedAttrsOpts metric.MeasurementOption - latency metric.Float64Histogram - size metric.Int64Histogram + latency metric.Float64Histogram // the time it takes to serve the request + size metric.Int64Histogram // the response size } -func newMetricsHTTP(meter metric.Meter, attrs []attribute.KeyValue) *metricsHTTP { +type metricsFiller func(*metricsHTTP, metric.Meter) + +func newMetricsHTTP(meter metric.Meter, attrs []attribute.KeyValue, semconv string) *metricsHTTP { var m metricsHTTP - m.latency, _ = meter.Float64Histogram("http.server.duration", kotelconfig.TimeBucketsOpt) - m.size, _ = meter.Int64Histogram("http.server.response.size", kotelconfig.SizeBucketsOpt) + + supportedSemConv := map[string]metricsFiller{ + "1.27": semConv1_27MetricsFiller, + } + fill := noSemConvMetricsFiller + if semConvFiller, ok := supportedSemConv[semconv]; ok { + fill = semConvFiller + } + fill(&m, meter) if len(attrs) > 0 { m.fixedAttrs = make([]attribute.KeyValue, len(attrs)) copy(m.fixedAttrs, attrs) @@ -32,15 +43,74 @@ func newMetricsHTTP(meter metric.Meter, attrs []attribute.KeyValue) *metricsHTTP return &m } +// validMethod allows us to limit the different valid methods to avoid +// a client with bad intentions to keep using weird methods and blow up +// the cardinality of the metrics +func validMethod(r *http.Request) string { + valid := map[string]bool{ + "_OTHER": true, + "CONNECT": true, + "DELETE": true, + "GET": true, + "HEAD": true, + "OPTIONS": true, + "PATCH": true, + "POST": true, + "PUT": true, + "TRACE": true, + } + v := strings.ToUpper(r.Method) + if !valid[v] { + v = "_OTHER" + } + return v +} + func (m *metricsHTTP) report(t *tracking, r *http.Request) { if m == nil || m.latency == nil { return } + urlScheme := "http" + if r.URL != nil && r.URL.Scheme != "" { + urlScheme = r.URL.Scheme + } + + // https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#http-server dynAttrs := t.metricsStaticAttrs - dynAttrs = append(dynAttrs, semconv.HTTPRoute(t.EndpointPattern())) - dynAttrs = append(dynAttrs, semconv.HTTPRequestMethodKey.String(r.Method)) - dynAttrs = append(dynAttrs, semconv.HTTPResponseStatusCode(t.responseStatus)) + dynAttrs = append(dynAttrs, + semconv.HTTPRequestMethodKey.String(validMethod(r)), // required attribute + semconv.URLScheme(urlScheme), // required attribute + semconv.HTTPRoute(t.EndpointPattern()), // required if available + semconv.HTTPResponseStatusCode(t.responseStatus)) // required if was sent dynAttrsOpts := metric.WithAttributes(dynAttrs...) + m.latency.Record(t.ctx, t.latencyInSecs, m.fixedAttrsOpts, dynAttrsOpts) m.size.Record(t.ctx, int64(t.responseSize), m.fixedAttrsOpts, dynAttrsOpts) } + +func noSemConvMetricsFiller(m *metricsHTTP, meter metric.Meter) { + m.latency, _ = meter.Float64Histogram("http.server.duration", kotelconfig.TimeBucketsOpt) + m.size, _ = meter.Int64Histogram("http.server.response.size", kotelconfig.SizeBucketsOpt) +} + +func semConv1_27MetricsFiller(m *metricsHTTP, meter metric.Meter) { + // latency -> http.server.request.duration (required and stable) + m.latency, _ = meter.Float64Histogram(v127.HTTPServerRequestDurationName, + metric.WithUnit(v127.HTTPServerRequestDurationUnit), + metric.WithDescription(v127.HTTPServerRequestDurationDescription), + kotelconfig.TimeBucketsOpt) + + // as 1.27 AND still on 1.29, this is an "experimental" metric, tha must be a histogram: + // http.servers.response.body.size + // that must be Content-Length or compressed size. + // https://opentelemetry.io/docs/specs/semconv/http/http-metrics/#metric-httpserverresponsebodysize + m.size, _ = meter.Int64Histogram(v127.HTTPServerResponseBodySizeName, + metric.WithUnit(v127.HTTPServerResponseBodySizeUnit), + metric.WithDescription(v127.HTTPServerResponseBodySizeDescription), + kotelconfig.SizeBucketsOpt) + + // tracking request body size would mean rely on input 'Content-Length' header, or + // wrapping the reader to count the body size, that might involve checking if is a + // hijacked connection, etc ... since is optional and experimental too + // is left as further improvement. +} diff --git a/http/server/server.go b/http/server/server.go index ba56235..8924f89 100644 --- a/http/server/server.go +++ b/http/server/server.go @@ -40,7 +40,7 @@ func (h *trackingHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) { r = r.WithContext(t.ctx) if h.metrics != nil || h.traces != nil { - rw = newTrackingResponseWriter(rw, t, h.reportHeaders, func(c net.Conn, err error) (net.Conn, error) { + rw = newTrackingResponseWriter(rw, t, h.reportHeaders, func(c net.Conn, _ error) (net.Conn, error) { t.Finish() h.traces.end(t) h.metrics.report(t, r) @@ -81,7 +81,7 @@ func NewTrackingHandler(next http.Handler) http.Handler { } } - m = newMetricsHTTP(s.Meter(), metricsAttrs) + m = newMetricsHTTP(s.Meter(), metricsAttrs, gCfg.SemConv) } var t *tracesHTTP diff --git a/lura/backend.go b/lura/backend.go index a995d2c..82c3b1e 100644 --- a/lura/backend.go +++ b/lura/backend.go @@ -52,6 +52,11 @@ func InstrumentedHTTPClientFactory(clientFactory transport.HTTPClientFactory, return clientFactory } + strictSemConv := "" + if g := otelCfg.GlobalOpts(); g != nil { + strictSemConv = g.SemConv + } + opts := otelCfg.BackendOpts(cfg) if !opts.Enabled() { return clientFactory @@ -99,6 +104,7 @@ func InstrumentedHTTPClientFactory(clientFactory transport.HTTPClientFactory, ReadPayload: opts.Metrics.ReadPayload, DetailedConnection: opts.Metrics.DetailedConnection, FixedAttributes: metricAttrs, + SemConv: strictSemConv, }, TracesOpts: clienthttp.TransportTracesOptions{ RoundTrip: opts.Traces.RoundTrip, diff --git a/router/gin/endpoint.go b/router/gin/endpoint.go index 382864d..e8430aa 100644 --- a/router/gin/endpoint.go +++ b/router/gin/endpoint.go @@ -27,15 +27,15 @@ func New(hf krakendgin.HandlerFactory) krakendgin.HandlerFactory { var metricsAttrs []attribute.KeyValue var tracesAttrs []attribute.KeyValue - cfgExtra, err := kotelconfig.LuraExtraCfg(cfg.ExtraConfig) - if err == nil && cfgExtra.Layers != nil && cfgExtra.Layers.Global != nil { - for _, kv := range cfgExtra.Layers.Global.MetricsStaticAttributes { + cfgExtra, err := kotelconfig.LuraLayerExtraCfg(cfg.ExtraConfig) + if err == nil && cfgExtra.Global != nil { + for _, kv := range cfgExtra.Global.MetricsStaticAttributes { if len(kv.Key) > 0 && len(kv.Value) > 0 { metricsAttrs = append(metricsAttrs, attribute.String(kv.Key, kv.Value)) } } - for _, kv := range cfgExtra.Layers.Global.TracesStaticAttributes { + for _, kv := range cfgExtra.Global.TracesStaticAttributes { if len(kv.Key) > 0 && len(kv.Value) > 0 { tracesAttrs = append(tracesAttrs, attribute.String(kv.Key, kv.Value)) }