Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add cpu/memory profiling draft #796

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 23 additions & 3 deletions cmd/api/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ import (
"context"
"net/http"
"os"
"os/signal"
"strings"
"syscall"
"time"

"github.com/go-chi/chi/v5"
Expand Down Expand Up @@ -204,9 +206,27 @@ func (s *Service) Start() {
MaxHeaderBytes: 1 << 20,
}

s.logger.Error("shutting down",
"error", server.ListenAndServe(),
)
serverDone := make(chan error)
go func() {
serverDone <- server.ListenAndServe()
}()

// Trap Ctrl+C and SIGTERM; the latter is issued by Kubernetes to request a shutdown.
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
defer signal.Stop(signalChan) // Stop catching Ctrl+C signals.

select {
case err := <-serverDone:
s.logger.Error("api server shutting down", "err", err)
return
case <-signalChan:
s.logger.Info("received interrupt, shutting down")
// Let the default handler handle ctrl+C so people can kill the process in a hurry.
signal.Stop(signalChan)
// We'll call a.cleanup() via a defer.
return
}
}

// cleanup gracefully shuts down the service.
Expand Down
54 changes: 54 additions & 0 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"runtime/pprof"
"sync"
"syscall"
"time"

"github.com/spf13/cobra"

Expand Down Expand Up @@ -55,6 +56,22 @@ func rootMain(cmd *cobra.Command, args []string) {
}
logger := common.RootLogger()

// Initialize cpu profiling.
if cfg.Metrics.CpuProfile != "" {
logger.Warn("CPU profiling is enabled, this will impact performance (negatively)", "dest", cfg.Metrics.CpuProfile)
cpuF, err := os.Create(cfg.Metrics.CpuProfile)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we instead add pprof.address (or similar) config parameter. Then if set, we start the pprof server on the provided address?

That is more generic and covers more profiling cases, since the user has more control over the profiling setup. That's the way we have it done in other projects, for example web3-gateway and oasis-core, and it tends to work pretty well in practice.

if err != nil {
logger.Error("failed to initialize cpu profile", "err", err)
os.Exit(1)
}
defer cpuF.Close()
if err := pprof.StartCPUProfile(cpuF); err != nil {
logger.Error("could not start cpu profile", "err", err)
os.Exit(1)
}
defer pprof.StopCPUProfile()
}

// Initialize services.
var wg sync.WaitGroup
runInWG := func(s Service) {
Expand Down Expand Up @@ -83,9 +100,46 @@ func rootMain(cmd *cobra.Command, args []string) {
}

logger.Info("started all services")
// Collect memory profile.
if cfg.Metrics.MemProfile != "" {
go collectMemProfiles(cfg.Metrics.MemProfile, logger)
}
wg.Wait()
}

func collectMemProfiles(dest string, logger *log.Logger) {
signalChan := make(chan os.Signal, 1)
signal.Notify(signalChan, syscall.SIGINT, syscall.SIGTERM)
defer signal.Stop(signalChan) // Stop catching Ctrl+C signals.

for {
select {
case <-time.After(5 * time.Minute):
logMemProfile(dest, logger)
case <-signalChan:
logger.Info("received interrupt, shutting down")
// Let the default handler handle ctrl+C so people can kill the process in a hurry.
signal.Stop(signalChan)
return
}
}
}

func logMemProfile(dest string, logger *log.Logger) {
logger.Info("Writing memory profile", "dest", dest)
memF, err := os.Create(dest)
if err != nil {
logger.Error("failed to create memory profile", "err", err)
os.Exit(1)
}
defer memF.Close()
// runtime.GC()
if err := pprof.WriteHeapProfile(memF); err != nil {
logger.Error("failed to write memory profile", "err", err)
os.Exit(1)
}
}

// Execute spawns the main entry point after handing the config file.
func Execute() {
// Debug hook. If we receive SIGUSR1, dump all goroutines.
Expand Down
2 changes: 2 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,8 @@ func (cfg *LogConfig) Validate() error {
// MetricsConfig contains the metrics configuration.
type MetricsConfig struct {
PullEndpoint string `koanf:"pull_endpoint"`
CpuProfile string `koanf:"cpu_prof"`
MemProfile string `koanf:"mem_prof"`
}

// Validate validates the metrics configuration.
Expand Down
2 changes: 2 additions & 0 deletions config/local-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,5 @@ log:

metrics:
pull_endpoint: localhost:8009
cpu_prof: cpu.prof
mem_prof: mem.prof
Loading