diff --git a/cmd/api/api.go b/cmd/api/api.go index 21f0c98df..4923016e3 100644 --- a/cmd/api/api.go +++ b/cmd/api/api.go @@ -5,7 +5,9 @@ import ( "context" "net/http" "os" + "os/signal" "strings" + "syscall" "time" "github.com/go-chi/chi/v5" @@ -204,9 +206,27 @@ func (s *Service) Start() { MaxHeaderBytes: 1 << 20, } - s.logger.Error("shutting down", - "error", server.ListenAndServe(), - ) + serverDone := make(chan error) + go func() { + serverDone <- server.ListenAndServe() + }() + + // Trap Ctrl+C and SIGTERM; the latter is issued by Kubernetes to request a shutdown. + signalChan := make(chan os.Signal, 1) + signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(signalChan) // Stop catching Ctrl+C signals. + + select { + case err := <-serverDone: + s.logger.Error("api server shutting down", "err", err) + return + case <-signalChan: + s.logger.Info("received interrupt, shutting down") + // Let the default handler handle ctrl+C so people can kill the process in a hurry. + signal.Stop(signalChan) + // We'll call a.cleanup() via a defer. + return + } } // cleanup gracefully shuts down the service. diff --git a/cmd/root.go b/cmd/root.go index 6dbfbdd4e..c415236da 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -9,6 +9,7 @@ import ( "runtime/pprof" "sync" "syscall" + "time" "github.com/spf13/cobra" @@ -55,6 +56,22 @@ func rootMain(cmd *cobra.Command, args []string) { } logger := common.RootLogger() + // Initialize cpu profiling. + if cfg.Metrics.CpuProfile != "" { + logger.Warn("CPU profiling is enabled, this will impact performance (negatively)", "dest", cfg.Metrics.CpuProfile) + cpuF, err := os.Create(cfg.Metrics.CpuProfile) + if err != nil { + logger.Error("failed to initialize cpu profile", "err", err) + os.Exit(1) + } + defer cpuF.Close() + if err := pprof.StartCPUProfile(cpuF); err != nil { + logger.Error("could not start cpu profile", "err", err) + os.Exit(1) + } + defer pprof.StopCPUProfile() + } + // Initialize services. var wg sync.WaitGroup runInWG := func(s Service) { @@ -83,9 +100,46 @@ func rootMain(cmd *cobra.Command, args []string) { } logger.Info("started all services") + // Collect memory profile. + if cfg.Metrics.MemProfile != "" { + go collectMemProfiles(cfg.Metrics.MemProfile, logger) + } wg.Wait() } +func collectMemProfiles(dest string, logger *log.Logger) { + signalChan := make(chan os.Signal, 1) + signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM) + defer signal.Stop(signalChan) // Stop catching Ctrl+C signals. + + for { + select { + case <-time.After(5 * time.Minute): + logMemProfile(dest, logger) + case <-signalChan: + logger.Info("received interrupt, shutting down") + // Let the default handler handle ctrl+C so people can kill the process in a hurry. + signal.Stop(signalChan) + return + } + } +} + +func logMemProfile(dest string, logger *log.Logger) { + logger.Info("Writing memory profile", "dest", dest) + memF, err := os.Create(dest) + if err != nil { + logger.Error("failed to create memory profile", "err", err) + os.Exit(1) + } + defer memF.Close() + // runtime.GC() + if err := pprof.WriteHeapProfile(memF); err != nil { + logger.Error("failed to write memory profile", "err", err) + os.Exit(1) + } +} + // Execute spawns the main entry point after handing the config file. func Execute() { // Debug hook. If we receive SIGUSR1, dump all goroutines. diff --git a/config/config.go b/config/config.go index dab814cb4..01d0da674 100644 --- a/config/config.go +++ b/config/config.go @@ -704,6 +704,8 @@ func (cfg *LogConfig) Validate() error { // MetricsConfig contains the metrics configuration. type MetricsConfig struct { PullEndpoint string `koanf:"pull_endpoint"` + CpuProfile string `koanf:"cpu_prof"` + MemProfile string `koanf:"mem_prof"` } // Validate validates the metrics configuration. diff --git a/config/local-dev.yml b/config/local-dev.yml index 97d1e17d8..9c34ab248 100644 --- a/config/local-dev.yml +++ b/config/local-dev.yml @@ -40,3 +40,5 @@ log: metrics: pull_endpoint: localhost:8009 + cpu_prof: cpu.prof + mem_prof: mem.prof