Skip to content

Commit

Permalink
Adding podMem (#155)
Browse files Browse the repository at this point in the history
* Adding podMem

Adding the podMem similar to the podCPU to our csv/result data

Signed-off-by: Joe Talerico aka rook <joe.talerico@gmail.com>

* Fixing mem query, adding vswitchd

Signed-off-by: Joe Talerico aka rook <joe.talerico@gmail.com>

* Adding vswitch data

Signed-off-by: Joe Talerico aka rook <joe.talerico@gmail.com>

* Update pkg/metrics/system.go

Co-authored-by: Raúl Sevilla <rsevilla@redhat.com>

* Update pkg/metrics/system.go

Co-authored-by: Raúl Sevilla <rsevilla@redhat.com>

---------

Signed-off-by: Joe Talerico aka rook <joe.talerico@gmail.com>
Co-authored-by: Raúl Sevilla <rsevilla@redhat.com>
  • Loading branch information
jtaleric and rsevilla87 authored Nov 18, 2024
1 parent 3771765 commit bb7c578
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 69 deletions.
7 changes: 7 additions & 0 deletions cmd/k8s-netperf/k8s-netperf.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,14 @@ var rootCmd = &cobra.Command{
if len(npr.ClientNodeInfo.NodeName) > 0 && len(npr.ServerNodeInfo.NodeName) > 0 {
sr.Results[i].ClientMetrics, _ = metrics.QueryNodeCPU(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime)
sr.Results[i].ServerMetrics, _ = metrics.QueryNodeCPU(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime)
metrics.VSwitchCPU(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ClientMetrics)
metrics.VSwitchMem(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ClientMetrics)
metrics.VSwitchCPU(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ServerMetrics)
metrics.VSwitchMem(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime, &sr.Results[i].ServerMetrics)
sr.Results[i].ClientPodCPU, _ = metrics.TopPodCPU(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime)
sr.Results[i].ServerPodCPU, _ = metrics.TopPodCPU(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime)
sr.Results[i].ClientPodMem, _ = metrics.TopPodMem(npr.ClientNodeInfo, pcon, npr.StartTime, npr.EndTime)
sr.Results[i].ServerPodMem, _ = metrics.TopPodMem(npr.ServerNodeInfo, pcon, npr.StartTime, npr.EndTime)
}
}
}
Expand Down Expand Up @@ -319,6 +325,7 @@ var rootCmd = &cobra.Command{
if showMetrics {
result.ShowNodeCPU(sr)
result.ShowPodCPU(sr)
result.ShowPodMem(sr)
}
} else {
err = archive.WriteJSONResult(sr)
Expand Down
171 changes: 113 additions & 58 deletions pkg/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,42 @@ const ltcyMetric = "usec"

// Doc struct of the JSON document to be indexed
type Doc struct {
UUID string `json:"uuid"`
Timestamp time.Time `json:"timestamp"`
HostNetwork bool `json:"hostNetwork"`
Driver string `json:"driver"`
Parallelism int `json:"parallelism"`
Profile string `json:"profile"`
Duration int `json:"duration"`
Service bool `json:"service"`
Local bool `json:"local"`
Virt bool `json:"virt"`
AcrossAZ bool `json:"acrossAZ"`
Samples int `json:"samples"`
Messagesize int `json:"messageSize"`
Burst int `json:"burst"`
Throughput float64 `json:"throughput"`
Latency float64 `json:"latency"`
TputMetric string `json:"tputMetric"`
LtcyMetric string `json:"ltcyMetric"`
TCPRetransmit float64 `json:"tcpRetransmits"`
UDPLossPercent float64 `json:"udpLossPercent"`
ToolVersion string `json:"toolVersion"`
ToolGitCommit string `json:"toolGitCommit"`
Metadata result.Metadata `json:"metadata"`
ServerNodeCPU metrics.NodeCPU `json:"serverCPU"`
ServerPodCPU []metrics.PodCPU `json:"serverPods"`
ClientNodeCPU metrics.NodeCPU `json:"clientCPU"`
ClientPodCPU []metrics.PodCPU `json:"clientPods"`
Confidence []float64 `json:"confidence"`
ServerNodeInfo metrics.NodeInfo `json:"serverNodeInfo"`
ClientNodeInfo metrics.NodeInfo `json:"clientNodeInfo"`
UUID string `json:"uuid"`
Timestamp time.Time `json:"timestamp"`
HostNetwork bool `json:"hostNetwork"`
Driver string `json:"driver"`
Parallelism int `json:"parallelism"`
Profile string `json:"profile"`
Duration int `json:"duration"`
Service bool `json:"service"`
Local bool `json:"local"`
Virt bool `json:"virt"`
AcrossAZ bool `json:"acrossAZ"`
Samples int `json:"samples"`
Messagesize int `json:"messageSize"`
Burst int `json:"burst"`
Throughput float64 `json:"throughput"`
Latency float64 `json:"latency"`
TputMetric string `json:"tputMetric"`
LtcyMetric string `json:"ltcyMetric"`
TCPRetransmit float64 `json:"tcpRetransmits"`
UDPLossPercent float64 `json:"udpLossPercent"`
ToolVersion string `json:"toolVersion"`
ToolGitCommit string `json:"toolGitCommit"`
Metadata result.Metadata `json:"metadata"`
ServerNodeCPU metrics.NodeCPU `json:"serverCPU"`
ServerPodCPU []metrics.PodCPU `json:"serverPods"`
ServerPodMem []metrics.PodMem `json:"serverPodsMem"`
ClientNodeCPU metrics.NodeCPU `json:"clientCPU"`
ClientPodCPU []metrics.PodCPU `json:"clientPods"`
ClientPodMem []metrics.PodMem `json:"clientPodsMem"`
Confidence []float64 `json:"confidence"`
ServerNodeInfo metrics.NodeInfo `json:"serverNodeInfo"`
ClientNodeInfo metrics.NodeInfo `json:"clientNodeInfo"`
ServerVSwitchCpu float64 `json:"serverVswtichCpu"`
ServerVSwitchMem float64 `json:"serverVswitchMem"`
ClientVSwitchCpu float64 `json:"clientVswtichCpu"`
ClientVSwiitchMem float64 `json:"clientVswitchMem"`
}

// Connect returns a client connected to the desired cluster.
Expand Down Expand Up @@ -89,31 +95,37 @@ func BuildDocs(sr result.ScenarioResults, uuid string) ([]interface{}, error) {
}
c := []float64{lo, hi}
d := Doc{
UUID: uuid,
Timestamp: time,
ToolVersion: sr.Version,
ToolGitCommit: sr.GitCommit,
Driver: r.Driver,
HostNetwork: r.HostNetwork,
Parallelism: r.Parallelism,
Profile: r.Profile,
Duration: r.Duration,
Virt: sr.Virt,
Samples: r.Samples,
Service: r.Service,
Messagesize: r.MessageSize,
Burst: r.Burst,
TputMetric: r.Metric,
LtcyMetric: ltcyMetric,
ServerNodeCPU: r.ServerMetrics,
ClientNodeCPU: r.ClientMetrics,
ServerPodCPU: r.ServerPodCPU.Results,
ClientPodCPU: r.ClientPodCPU.Results,
Metadata: sr.Metadata,
AcrossAZ: r.AcrossAZ,
Confidence: c,
ClientNodeInfo: r.ClientNodeInfo,
ServerNodeInfo: r.ServerNodeInfo,
UUID: uuid,
Timestamp: time,
ToolVersion: sr.Version,
ToolGitCommit: sr.GitCommit,
Driver: r.Driver,
HostNetwork: r.HostNetwork,
Parallelism: r.Parallelism,
Profile: r.Profile,
Duration: r.Duration,
Virt: sr.Virt,
Samples: r.Samples,
Service: r.Service,
Messagesize: r.MessageSize,
Burst: r.Burst,
TputMetric: r.Metric,
LtcyMetric: ltcyMetric,
ServerNodeCPU: r.ServerMetrics,
ClientNodeCPU: r.ClientMetrics,
ServerPodCPU: r.ServerPodCPU.Results,
ServerPodMem: r.ServerPodMem.MemResults,
ClientPodMem: r.ClientPodMem.MemResults,
ClientPodCPU: r.ClientPodCPU.Results,
ClientVSwitchCpu: r.ClientMetrics.VSwitchCPU,
ClientVSwiitchMem: r.ClientMetrics.VSwitchMem,
ServerVSwitchCpu: r.ServerMetrics.VSwitchCPU,
ServerVSwitchMem: r.ServerMetrics.VSwitchMem,
Metadata: sr.Metadata,
AcrossAZ: r.AcrossAZ,
Confidence: c,
ClientNodeInfo: r.ClientNodeInfo,
ServerNodeInfo: r.ServerNodeInfo,
}
UDPLossPercent, e := result.Average(r.LossSummary)
if e != nil {
Expand Down Expand Up @@ -189,7 +201,7 @@ func commonCsvDataFields(row result.Data) []string {
}

// Writes all the mertics to the archive.
func writeArchive(cpuarchive, podarchive *csv.Writer, role string, row result.Data, podResults []metrics.PodCPU) error {
func writeArchive(vswitch, cpuarchive, podarchive, podmemarchive *csv.Writer, role string, row result.Data, podResults []metrics.PodCPU, podMem []metrics.PodMem) error {
roleFieldData := []string{role}
for _, pod := range podResults {
if err := podarchive.Write(append(append(roleFieldData,
Expand All @@ -200,11 +212,26 @@ func writeArchive(cpuarchive, podarchive *csv.Writer, role string, row result.Da
return fmt.Errorf("failed to write archive to file")
}
}
for _, pod := range podMem {
if err := podmemarchive.Write(append(append(roleFieldData,
commonCsvDataFields(row)...),
pod.Name,
fmt.Sprintf("%f", pod.Value),
)); err != nil {
return fmt.Errorf("failed to write archive to file")
}
}

cpu := row.ClientMetrics
if role == "Server" {
cpu = row.ServerMetrics
}
if err := vswitch.Write(append(append(roleFieldData,
commonCsvDataFields(row)...),
fmt.Sprintf("%f", cpu.VSwitchCPU),
fmt.Sprintf("%f", cpu.VSwitchMem))); err != nil {
return fmt.Errorf("failed to write archive to file")
}
if err := cpuarchive.Write(append(append(roleFieldData,
commonCsvDataFields(row)...),
fmt.Sprintf("%f", cpu.Idle),
Expand All @@ -223,6 +250,17 @@ func writeArchive(cpuarchive, podarchive *csv.Writer, role string, row result.Da
// WritePromCSVResult writes the prom data in CSV format
func WritePromCSVResult(r result.ScenarioResults) error {
d := time.Now().Unix()

vswitchfp, err := os.Create(fmt.Sprintf("vswitch-result-%d.csv", d))
if err != nil {
return fmt.Errorf("failed to open vswitch archive file")
}
defer vswitchfp.Close()
podmemfp, err := os.Create(fmt.Sprintf("podmem-result-%d.csv", d))
if err != nil {
return fmt.Errorf("failed to open pod mem archive file")
}
defer podmemfp.Close()
podfp, err := os.Create(fmt.Sprintf("podcpu-result-%d.csv", d))
if err != nil {
return fmt.Errorf("failed to open pod cpu archive file")
Expand All @@ -233,10 +271,15 @@ func WritePromCSVResult(r result.ScenarioResults) error {
return fmt.Errorf("failed to open cpu archive file")
}
defer cpufp.Close()
vswitch := csv.NewWriter(vswitchfp)
defer vswitch.Flush()
cpuarchive := csv.NewWriter(cpufp)
defer cpuarchive.Flush()
podarchive := csv.NewWriter(podfp)
defer podarchive.Flush()
podmemarchive := csv.NewWriter(podmemfp)
defer podmemarchive.Flush()

roleField := []string{"Role"}
cpudata := append(append(roleField,
commonCsvHeaderFields()...),
Expand All @@ -253,20 +296,32 @@ func WritePromCSVResult(r result.ScenarioResults) error {
"Pod Name",
"Utilization",
)
vswtichdata := append(append(roleField,
commonCsvHeaderFields()...),
"CPU Utilization",
"Memory Utilization",
)
if err := cpuarchive.Write(cpudata); err != nil {
return fmt.Errorf("failed to write cpu archive to file")
}
if err := podarchive.Write(poddata); err != nil {
return fmt.Errorf("failed to write pod archive to file")
}
if err := podmemarchive.Write(poddata); err != nil {
return fmt.Errorf("failed to write pod archive to file")
}
if err := vswitch.Write(vswtichdata); err != nil {
return fmt.Errorf("failed to write vswitch archive to file")
}
for _, row := range r.Results {
if err := writeArchive(cpuarchive, podarchive, "Client", row, row.ClientPodCPU.Results); err != nil {
if err := writeArchive(vswitch, cpuarchive, podarchive, podmemarchive, "Client", row, row.ClientPodCPU.Results, row.ClientPodMem.MemResults); err != nil {
return err
}
if err := writeArchive(cpuarchive, podarchive, "Server", row, row.ServerPodCPU.Results); err != nil {
if err := writeArchive(vswitch, cpuarchive, podarchive, podmemarchive, "Server", row, row.ServerPodCPU.Results, row.ServerPodMem.MemResults); err != nil {
return err
}
}

return nil
}

Expand Down
83 changes: 72 additions & 11 deletions pkg/metrics/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,16 @@ type NodeInfo struct {

// NodeCPU stores CPU information for a specific Node
type NodeCPU struct {
Idle float64 `json:"idleCPU"`
User float64 `json:"userCPU"`
Steal float64 `json:"stealCPU"`
System float64 `json:"systemCPU"`
Nice float64 `json:"niceCPU"`
Irq float64 `json:"irqCPU"`
Softirq float64 `json:"softCPU"`
Iowait float64 `json:"ioCPU"`
Idle float64 `json:"idleCPU"`
User float64 `json:"userCPU"`
Steal float64 `json:"stealCPU"`
System float64 `json:"systemCPU"`
Nice float64 `json:"niceCPU"`
Irq float64 `json:"irqCPU"`
Softirq float64 `json:"softCPU"`
Iowait float64 `json:"ioCPU"`
VSwitchCPU float64 `json:"vSwitchCPU"`
VSwitchMem float64 `json:"vSwitchMem"`
}

// PodCPU stores pod CPU
Expand All @@ -40,9 +42,15 @@ type PodCPU struct {
Value float64 `json:"cpuUsage"`
}

type PodMem struct {
Name string `json:"podName"`
Value float64 `json:"memUsage"`
}

// PodValues is a collection of PodCPU
type PodValues struct {
Results []PodCPU
Results []PodCPU
MemResults []PodMem
}

// PromConnect stores the prom information
Expand Down Expand Up @@ -166,10 +174,10 @@ func QueryNodeCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Tim
return cpu, true
}

// TopPodCPU will return the top 5 CPU consumers for a specific node
// TopPodCPU will return the top 10 CPU consumers for a specific node
func TopPodCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Time) (PodValues, bool) {
var pods PodValues
query := fmt.Sprintf("topk(5,sum(irate(container_cpu_usage_seconds_total{name!=\"\",instance=~\"%s:.*\"}[2m]) * 100) by (pod, namespace, instance))", node.IP)
query := fmt.Sprintf("topk(10,sum(irate(container_cpu_usage_seconds_total{name!=\"\",instance=~\"%s:.*\"}[2m]) * 100) by (pod, namespace, instance))", node.IP)
logging.Debugf("Prom Query : %s", query)
val, err := conn.Client.QueryRange(query, start, end, time.Minute)
if err != nil {
Expand All @@ -187,6 +195,59 @@ func TopPodCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Time)
return pods, true
}

// VSwitchCPU will return the vswitchd cpu usage for specific node
func VSwitchCPU(node NodeInfo, conn PromConnect, start time.Time, end time.Time, ndata *NodeCPU) bool {
query := fmt.Sprintf("irate(container_cpu_usage_seconds_total{id=~\"/system.slice/ovs-vswitchd.service\", node=~\"%s\"}[2m])*100", node.NodeName)
logging.Debugf("Prom Query : %s", query)
val, err := conn.Client.QueryRange(query, start, end, time.Minute)
if err != nil {
logging.Error("Issue querying Prometheus")
return false
}
v := val.(model.Matrix)
for _, s := range v {
ndata.VSwitchCPU = avg(s.Values)
}
return true
}

// VSwitchMem will return the vswitchd cpu usage for specific node
func VSwitchMem(node NodeInfo, conn PromConnect, start time.Time, end time.Time, ndata *NodeCPU) bool {
query := fmt.Sprintf("container_memory_rss{id=~\"/system.slice/ovs-vswitchd.service\", node=~\"%s\"}", node.NodeName)
logging.Debugf("Prom Query : %s", query)
val, err := conn.Client.QueryRange(query, start, end, time.Minute)
if err != nil {
logging.Error("Issue querying Prometheus")
return false
}
v := val.(model.Matrix)
for _, s := range v {
ndata.VSwitchMem = avg(s.Values)
}
return true
}

// TopPodMem will return the top 10 Mem consumers for a specific node
func TopPodMem(node NodeInfo, conn PromConnect, start time.Time, end time.Time) (PodValues, bool) {
var pods PodValues
query := fmt.Sprintf("topk(10,sum(container_memory_rss{container!=\"POD\",name!=\"\",node=~\"%s\"}) by (pod, namespace, node))", node.NodeName)
logging.Debugf("Prom Query : %s", query)
val, err := conn.Client.QueryRange(query, start, end, time.Minute)
if err != nil {
logging.Error("Issue querying Prometheus")
return pods, false
}
v := val.(model.Matrix)
for _, s := range v {
p := PodMem{
Name: string(s.Metric["pod"]),
Value: avg(s.Values),
}
pods.MemResults = append(pods.MemResults, p)
}
return pods, true
}

// Calculates average for the given data
func avg(data []model.SamplePair) float64 {
sum := 0.0
Expand Down
Loading

0 comments on commit bb7c578

Please sign in to comment.