-
Notifications
You must be signed in to change notification settings - Fork 250
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
database_observability: report health of component and collectors #2392
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ import ( | |
"github.com/grafana/loki/v3/pkg/logproto" | ||
"github.com/hashicorp/golang-lru/v2/expirable" | ||
"github.com/prometheus/common/model" | ||
"go.uber.org/atomic" | ||
|
||
"github.com/grafana/alloy/internal/component/common/loki" | ||
"github.com/grafana/alloy/internal/component/database_observability" | ||
|
@@ -66,10 +67,10 @@ type SchemaTable struct { | |
// TODO(cristian): allow configuring cache size (currently unlimited). | ||
cache *expirable.LRU[string, tableInfo] | ||
|
||
logger log.Logger | ||
|
||
ctx context.Context | ||
cancel context.CancelFunc | ||
logger log.Logger | ||
running *atomic.Bool | ||
ctx context.Context | ||
cancel context.CancelFunc | ||
} | ||
|
||
type tableInfo struct { | ||
|
@@ -86,18 +87,29 @@ func NewSchemaTable(args SchemaTableArguments) (*SchemaTable, error) { | |
collectInterval: args.CollectInterval, | ||
entryHandler: args.EntryHandler, | ||
cache: expirable.NewLRU[string, tableInfo](0, nil, args.CacheTTL), | ||
logger: args.Logger, | ||
logger: log.With(args.Logger, "collector", "SchemaTable"), | ||
running: &atomic.Bool{}, | ||
}, nil | ||
} | ||
|
||
func (c *SchemaTable) Name() string { | ||
return "SchemaTable" | ||
} | ||
|
||
func (c *SchemaTable) Start(ctx context.Context) error { | ||
level.Debug(c.logger).Log("msg", "SchemaTable collector started") | ||
|
||
c.running.Store(true) | ||
ctx, cancel := context.WithCancel(ctx) | ||
c.ctx = ctx | ||
c.cancel = cancel | ||
|
||
go func() { | ||
defer func() { | ||
c.Stop() | ||
c.running.Store(false) | ||
}() | ||
|
||
ticker := time.NewTicker(c.collectInterval) | ||
|
||
for { | ||
|
@@ -119,6 +131,10 @@ func (c *SchemaTable) Start(ctx context.Context) error { | |
return nil | ||
} | ||
|
||
func (c *SchemaTable) Stopped() bool { | ||
return !c.running.Load() | ||
} | ||
|
||
// Stop should be kept idempotent | ||
func (c *SchemaTable) Stop() { | ||
c.cancel() | ||
|
@@ -155,6 +171,11 @@ func (c *SchemaTable) extractSchema(ctx context.Context) error { | |
} | ||
} | ||
|
||
if len(schemas) == 0 { | ||
level.Info(c.logger).Log("msg", "no schema detected from information_schema.schemata") | ||
return nil | ||
} | ||
|
||
Comment on lines
+174
to
+178
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Drive-by: log if no schema is detected |
||
tables := []tableInfo{} | ||
|
||
for _, schema := range schemas { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ import ( | |
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/client_golang/prometheus/promhttp" | ||
"github.com/prometheus/common/model" | ||
"go.uber.org/atomic" | ||
|
||
"github.com/grafana/alloy/internal/component" | ||
"github.com/grafana/alloy/internal/component/common/loki" | ||
|
@@ -79,12 +80,15 @@ type Exports struct { | |
} | ||
|
||
var ( | ||
_ component.Component = (*Component)(nil) | ||
_ http_service.Component = (*Component)(nil) | ||
_ component.Component = (*Component)(nil) | ||
_ http_service.Component = (*Component)(nil) | ||
_ component.HealthComponent = (*Component)(nil) | ||
) | ||
|
||
type Collector interface { | ||
Name() string | ||
Start(context.Context) error | ||
Stopped() bool | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In the future, you might have collectors that can be considered unhealthy but are still running. A different approach to support this would be to have a CurrentHealth function in the collector interface that returns the health object. Then you would not need the healthErr attribute anymore, you would just call CurrentHealth on all the collectors in the CurrentHealth function of the component. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes that's a great point. I wanted to start simple for now, as collectors are anyway not resilient at all (they'll stop as soon as any error is hit). Agree that in the future we might want to delegate the logic to the collectors themselves. |
||
Stop() | ||
} | ||
|
||
|
@@ -98,6 +102,7 @@ type Component struct { | |
baseTarget discovery.Target | ||
collectors []Collector | ||
dbConnection *sql.DB | ||
healthErr *atomic.String | ||
} | ||
|
||
func New(opts component.Options, args Arguments) (*Component, error) { | ||
|
@@ -107,6 +112,7 @@ func New(opts component.Options, args Arguments) (*Component, error) { | |
receivers: args.ForwardTo, | ||
handler: loki.NewLogsReceiver(), | ||
registry: prometheus.NewRegistry(), | ||
healthErr: atomic.NewString(""), | ||
} | ||
|
||
baseTarget, err := c.getBaseTarget() | ||
|
@@ -184,6 +190,16 @@ func (c *Component) Update(args component.Arguments) error { | |
|
||
c.args = args.(Arguments) | ||
|
||
if err := c.startCollectors(); err != nil { | ||
c.healthErr.Store(err.Error()) | ||
return err | ||
} | ||
|
||
c.healthErr.Store("") | ||
return nil | ||
} | ||
|
||
func (c *Component) startCollectors() error { | ||
dbConnection, err := sql.Open("mysql", formatDSN(string(c.args.DataSourceName), "parseTime=true")) | ||
if err != nil { | ||
return err | ||
|
@@ -254,6 +270,40 @@ func (c *Component) Handler() http.Handler { | |
return promhttp.HandlerFor(c.registry, promhttp.HandlerOpts{}) | ||
} | ||
|
||
func (c *Component) CurrentHealth() component.Health { | ||
if err := c.healthErr.Load(); err != "" { | ||
return component.Health{ | ||
Health: component.HealthTypeUnhealthy, | ||
Message: err, | ||
UpdateTime: time.Now(), | ||
} | ||
} | ||
|
||
var unhealthyCollectors []string | ||
|
||
c.mut.RLock() | ||
for _, collector := range c.collectors { | ||
if collector.Stopped() { | ||
unhealthyCollectors = append(unhealthyCollectors, collector.Name()) | ||
} | ||
} | ||
c.mut.RUnlock() | ||
|
||
if len(unhealthyCollectors) > 0 { | ||
return component.Health{ | ||
Health: component.HealthTypeUnhealthy, | ||
Message: "One or more collectors are unhealthy: [" + strings.Join(unhealthyCollectors, ", ") + "]", | ||
UpdateTime: time.Now(), | ||
} | ||
} | ||
|
||
return component.Health{ | ||
Health: component.HealthTypeHealthy, | ||
Message: "All collectors are healthy", | ||
UpdateTime: time.Now(), | ||
} | ||
} | ||
|
||
// instanceKey returns network(hostname:port)/dbname of the MySQL server. | ||
// This is the same key as used by the mysqld_exporter integration. | ||
func (c *Component) instanceKey() string { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drive-by remove noisy info log