Skip to content

Commit

Permalink
OSD-23466 increase DT gather logs limit (#589)
Browse files Browse the repository at this point in the history
* OSD-23466 - Increase DT Gather logs max log count limit

* OSD-23466 - Add context for follow up

* OSD-23466 - Move logic for acquiring vault token to avoid unnecessary calls

* Update cmd/cluster/dynatrace/hcpGatherLogsCmd.go

Log the failed events query execution results to stdout for further debugging

Co-authored-by: Michael Shen <mishen@umich.edu>

* Update cmd/cluster/dynatrace/hcpGatherLogsCmd.go

Log the logs query when it fails so that the user can use it for debugging later.

Co-authored-by: Michael Shen <mishen@umich.edu>

* OSD-23466 - Fix import errors and remove unnecessary function

* OSD-23466 - Remove unused type

---------

Co-authored-by: Michael Shen <mishen@umich.edu>
  • Loading branch information
Tafhim and mjlshen authored Jul 3, 2024
1 parent b30d722 commit 4e39e1f
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 33 deletions.
20 changes: 7 additions & 13 deletions cmd/cluster/dynatrace/hcpGatherLogsCmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package dynatrace
import (
"context"
"fmt"
"log"
"os"
"path/filepath"

Expand Down Expand Up @@ -115,12 +116,6 @@ func dumpEvents(deploys *appsv1.DeploymentList, parentDir string, targetNS strin
}
eventQuery.Build()

eventsRequestToken, err := getRequestToken(eventQuery.finalQuery, DTURL, accessToken)
if err != nil {
fmt.Println(fmt.Errorf("failed to acquire request token %v", err))
continue
}

deploymentYamlFileName := "deployment.yaml"
eventsFileName := "events.log"
eventsDirPath, err := addDir([]string{parentDir, "events", d.Name}, []string{deploymentYamlFileName, eventsFileName})
Expand All @@ -143,10 +138,12 @@ func dumpEvents(deploys *appsv1.DeploymentList, parentDir string, targetNS strin
return err
}

eventsRequestToken, err := getDTQueryExecution(DTURL, accessToken, eventQuery.finalQuery)
err = getEvents(DTURL, accessToken, eventsRequestToken, f)
f.Close()
if err != nil {
return fmt.Errorf("failed to get logs %v", err)
log.Printf("failed to get logs, continuing: %v. Query: %v", err, eventQuery.finalQuery)
continue
}

}
Expand All @@ -164,11 +161,6 @@ func dumpPodLogs(pods *corev1.PodList, parentDir string, targetNS string, manage
}
podLogsQuery.Build()

podLogsRequestToken, err := getRequestToken(podLogsQuery.finalQuery, DTURL, accessToken)
if err != nil {
return fmt.Errorf("failed to acquire request token %v", err)
}

podYamlFileName := "pod.yaml"
podLogFileName := "pod.log"
podDirPath, err := addDir([]string{parentDir, "pods", p.Name}, []string{podLogFileName, podYamlFileName})
Expand All @@ -191,10 +183,12 @@ func dumpPodLogs(pods *corev1.PodList, parentDir string, targetNS string, manage
return err
}

podLogsRequestToken, err := getDTQueryExecution(DTURL, accessToken, podLogsQuery.finalQuery)
err = getLogs(DTURL, accessToken, podLogsRequestToken, f)
f.Close()
if err != nil {
return fmt.Errorf("failed to get logs %v", err)
log.Printf("failed to get logs, continuing: %v. Query: %v", err, podLogsQuery.finalQuery)
continue
}
}

Expand Down
10 changes: 3 additions & 7 deletions cmd/cluster/dynatrace/logsCmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,6 @@ func main(clusterID string) error {
return fmt.Errorf("failed to acquire cluster details %v", err)
}

accessToken, err := getAccessToken()
if err != nil {
return fmt.Errorf("failed to acquire access token %v", err)
}

query, err := getQuery(clusterInternalID, mgmtClusterName)
if err != nil {
return fmt.Errorf("failed to build query for Dynatrace %v", err)
Expand All @@ -84,11 +79,12 @@ func main(clusterID string) error {
return nil
}

requestToken, err := getRequestToken(query.finalQuery, DTURL, accessToken)
accessToken, err := getAccessToken()
if err != nil {
return fmt.Errorf("failed to acquire request token %v", err)
return fmt.Errorf("failed to acquire access token %v", err)
}

requestToken, err := getDTQueryExecution(DTURL, accessToken, query.finalQuery)
err = getLogs(DTURL, accessToken, requestToken, nil)
if err != nil {
return fmt.Errorf("failed to get logs %v", err)
Expand Down
76 changes: 63 additions & 13 deletions cmd/cluster/dynatrace/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ const (
VaultAddr string = "vault_address"
)

type DTRequestError struct {
Records json.RawMessage `json:"error"`
}

type Requester struct {
method string
url string
Expand All @@ -28,7 +32,7 @@ type Requester struct {

func (rh *Requester) send() (string, error) {
client := http.Client{
Timeout: time.Second * 10,
Timeout: time.Second * 600,
}

var req *http.Request
Expand All @@ -54,15 +58,21 @@ func (rh *Requester) send() (string, error) {

defer resp.Body.Close()

if resp.StatusCode != rh.successCode {
return "", fmt.Errorf("request failed: %v", resp.Status)
}

body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}

if resp.StatusCode != rh.successCode {
var dtError DTRequestError
err = json.Unmarshal([]byte(body), &dtError)
if err != nil {
return "", err
}

return "", fmt.Errorf("request failed: %v %s", resp.Status, dtError)
}

return string(body), nil
}

Expand Down Expand Up @@ -135,7 +145,8 @@ func getAccessToken() (string, error) {
}

type DTQueryPayload struct {
Query string `json:"query"`
Query string `json:"query"`
MaxResultRecords int `json:"maxResultRecords"`
}

type DTPollResult struct {
Expand Down Expand Up @@ -166,13 +177,27 @@ type DTEventResult struct {
Records []json.RawMessage `json:"records"`
}

type ExecuteResponse struct {
type DTExecuteState struct {
State string `json:"state"`
TTLSeconds int `json:"ttlSeconds"`
}

type DTExecuteToken struct {
RequestToken string `json:"requestToken"`
}

func getRequestToken(query string, dtURL string, accessToken string) (requestToken string, error error) {
type DTExecuteResults struct {
Result []json.RawMessage `json:"records"`
}

func getDTQueryExecution(dtURL string, accessToken string, query string) (reqToken string, error error) {
// Note: Currently we are setting a limit of 20,000 lines to pull from Dynatrace
// due to a limitation in dynatrace to pull all logs. This limitation can be revoked
// once https://community.dynatrace.com/t5/Product-ideas/Pagination-in-DQL-results/idi-p/248282#M45818
// is addressed. Then we can implement https://issues.redhat.com/browse/OSD-24349 to get rid of this limitation.
payload := DTQueryPayload{
Query: query,
Query: query,
MaxResultRecords: 20000,
}

payloadJSON, err := json.Marshal(payload)
Expand All @@ -191,18 +216,43 @@ func getRequestToken(query string, dtURL string, accessToken string) (requestTok
successCode: http.StatusAccepted,
}

resp, err := requester.send()
var resp string
for {
resp, err = requester.send()
if err != nil {
return "", err
}
var execState DTExecuteState
err = json.Unmarshal([]byte(resp), &execState)
if err != nil {
return "", err
}

if execState.State != "RUNNING" && execState.State != "SUCCEEDED" {
return "", fmt.Errorf("query failed")
}

break
}

var state DTExecuteState
err = json.Unmarshal([]byte(resp), &state)
if err != nil {
return "", err
}

var execResp ExecuteResponse
err = json.Unmarshal([]byte(resp), &execResp)
if state.State != "RUNNING" && state.State != "SUCCEEDED" {
return "", fmt.Errorf("query failed")
}

// acquire the request token for the execution
var token DTExecuteToken
err = json.Unmarshal([]byte(resp), &token)
if err != nil {
return "", err
}

return execResp.RequestToken, nil
return token.RequestToken, err
}

func getDTPollResults(dtURL string, requestToken string, accessToken string) (respBody string, error error) {
Expand Down

0 comments on commit 4e39e1f

Please sign in to comment.