Skip to content

Commit

Permalink
Consul registry performance improvements (#928)
Browse files Browse the repository at this point in the history
Two changes:
1. Limit the initial list of Health Checks to those with the configured Tag Prefix
2. Reduce the amount of loops done in passingServices
  • Loading branch information
ddreier authored Sep 2, 2024
1 parent ced0c9a commit 084a3cd
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 61 deletions.
90 changes: 30 additions & 60 deletions registry/consul/passing.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,28 +7,48 @@ import (
"github.com/hashicorp/consul/api"
)

// passingServices takes a list of Consul Health Checks and only returns ones where the overall health of
// the Service Instance is passing. This includes the health of the Node that the Services Instance runs on.
func passingServices(checks []*api.HealthCheck, status []string, strict bool) []*api.HealthCheck {
var p []*api.HealthCheck

CHECKS:
for _, svc := range checks {
if !isServiceCheck(svc) {
continue
}
total, passing := countChecks(svc, checks, status)
var total, passing int

for _, c := range checks {
if svc.Node == c.Node {
if svc.ServiceID == c.ServiceID {
total++
if hasStatus(c, status) {
passing++
}
}
if c.CheckID == "serfHealth" && c.Status == "critical" {
log.Printf("[DEBUG] consul: Skipping service %q since agent on node %q is down: %s", c.ServiceID, c.Node, c.Output)
continue CHECKS
}
if c.CheckID == "_node_maintenance" {
log.Printf("[DEBUG] consul: Skipping service %q since node %q is in maintenance mode: %s", c.ServiceID, c.Node, c.Output)
continue CHECKS
}
if c.CheckID == "_service_maintenance:"+svc.ServiceID && c.Status == "critical" {
log.Printf("[DEBUG] consul: Skipping service %q since it is in maintenance mode: %s", svc.ServiceID, c.Output)
continue CHECKS
}
}
}

if passing == 0 {
continue
}
if strict && total != passing {
continue
}
if isAgentCritical(svc, checks) {
continue
}
if isNodeInMaintenance(svc, checks) {
continue
}
if isServiceInMaintenance(svc, checks) {
continue
}

p = append(p, svc)
}

Expand All @@ -43,56 +63,6 @@ func isServiceCheck(c *api.HealthCheck) bool {
!strings.HasPrefix(c.CheckID, "_service_maintenance:")
}

// isAgentCritical returns true if the agent on the node on which the service
// runs is critical.
func isAgentCritical(svc *api.HealthCheck, checks []*api.HealthCheck) bool {
for _, c := range checks {
if svc.Node == c.Node && c.CheckID == "serfHealth" && c.Status == "critical" {
log.Printf("[DEBUG] consul: Skipping service %q since agent on node %q is down: %s", c.ServiceID, c.Node, c.Output)
return true
}
}
return false
}

// isNodeInMaintenance returns true if the node on which the service runs is in
// maintenance mode.
func isNodeInMaintenance(svc *api.HealthCheck, checks []*api.HealthCheck) bool {
for _, c := range checks {
if svc.Node == c.Node && c.CheckID == "_node_maintenance" {
log.Printf("[DEBUG] consul: Skipping service %q since node %q is in maintenance mode: %s", c.ServiceID, c.Node, c.Output)
return true
}
}
return false
}

// isServiceInMaintenance returns true if the service instance is in
// maintenance mode.
func isServiceInMaintenance(svc *api.HealthCheck, checks []*api.HealthCheck) bool {
for _, c := range checks {
if svc.Node == c.Node && c.CheckID == "_service_maintenance:"+svc.ServiceID && c.Status == "critical" {
log.Printf("[DEBUG] consul: Skipping service %q since it is in maintenance mode: %s", svc.ServiceID, c.Output)
return true
}
}
return false
}

// countChecks counts the number of service checks exist for a given service
// and how many of them are passing.
func countChecks(svc *api.HealthCheck, checks []*api.HealthCheck, status []string) (total int, passing int) {
for _, c := range checks {
if svc.Node == c.Node && svc.ServiceID == c.ServiceID {
total++
if hasStatus(c, status) {
passing++
}
}
}
return
}

// hasStatus returns true if the health check status is one of the given
// values.
func hasStatus(c *api.HealthCheck, status []string) bool {
Expand Down
24 changes: 23 additions & 1 deletion registry/consul/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,11 @@ func (w *ServiceMonitor) Watch(updates chan string) {
}
log.Printf("[DEBUG] consul: Health changed to #%d", meta.LastIndex)

prefixedChecks := checksWithTagPrefix(w.config.TagPrefix, checks)
log.Printf("[DEBUG] consul: only %d of %d checks have the configured tag prefix", len(prefixedChecks), len(checks))

// determine which services have passing health checks
passing := passingServices(checks, w.config.ServiceStatus, w.strict)
passing := passingServices(prefixedChecks, w.config.ServiceStatus, w.strict)

// build the config for the passing services
updates <- w.makeConfig(passing)
Expand Down Expand Up @@ -138,3 +141,22 @@ func (w *ServiceMonitor) serviceConfig(name string, passing map[string]bool) (co
}
return config
}

// checksWithTagPrefix filters a list of Consul Health Checks to only the Checks with a Tag that begins with the prefix
func checksWithTagPrefix(prefix string, checks api.HealthChecks) api.HealthChecks {
checksWithPrefix := make(api.HealthChecks, 0, len(checks))
for _, c := range checks {
if c.CheckID == "serfHealth" || c.CheckID == "_node_maintenance" || strings.HasPrefix(c.CheckID, "_service_maintenance") {
checksWithPrefix = append(checksWithPrefix, c)
continue
}
for _, t := range c.ServiceTags {
if strings.HasPrefix(t, prefix) {
checksWithPrefix = append(checksWithPrefix, c)
break
}
}
}

return checksWithPrefix
}

0 comments on commit 084a3cd

Please sign in to comment.