From 2bb529bbc2c138842e873b837a4a20a4853a7047 Mon Sep 17 00:00:00 2001 From: Arvind Subramanian Date: Tue, 10 Oct 2023 13:20:12 -0400 Subject: [PATCH 1/2] Always update checkrun status for terraform workflows Why? * Terraform workflows cannot cancel due to WaitForCancellation, so we might as well try to update the status of the checkrun. * We also think this will help with the non determinism issues we are seeing where cancellations can occur at different than expected times on replay, causing the checkrun cache to be inaccurate, thus causing wrong calls to either create or update. --- .../internal/pr/revision/processor.go | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/server/neptune/workflows/internal/pr/revision/processor.go b/server/neptune/workflows/internal/pr/revision/processor.go index d4b4f9e62..1e2d45759 100644 --- a/server/neptune/workflows/internal/pr/revision/processor.go +++ b/server/neptune/workflows/internal/pr/revision/processor.go @@ -17,8 +17,9 @@ import ( ) const ( - ReviewSignalID = "pr-review" - CheckRunCancelled = "checkrun was cancelled" + ReviewSignalID = "pr-review" + CheckRunCancelled = "checkrun was cancelled" + NotifyAllCheckRunsChangeID = "notify-all-checkruns-always" ) type TFWorkflow func(ctx workflow.Context, request terraform.Request) (terraform.Response, error) @@ -72,9 +73,11 @@ func (p *Processor) Process(ctx workflow.Context, prRevision Revision) { // Mark checkruns as aborted if the context was cancelled, this typically happens if revisions arrive in quick succession defer func() { if temporal.IsCanceledError(ctx.Err()) { - ctx, _ := workflow.NewDisconnectedContext(ctx) - p.markCheckRunsAborted(ctx, prRevision, roots) - return + version := workflow.GetVersion(ctx, NotifyAllCheckRunsChangeID, workflow.DefaultVersion, 1) + if version == workflow.DefaultVersion { + p.markCheckRunsAborted(ctx, prRevision, roots) + return + } } // At this point, all workflows should be successful, and we can mark combined plan check run as success @@ -131,6 +134,13 @@ func (p *Processor) awaitChildTerraformWorkflows(ctx workflow.Context, futures [ selector := workflow.NewNamedSelector(ctx, "TerraformChildWorkflow") ch := workflow.GetSignalChannel(ctx, state.WorkflowStateChangeSignal) selector.AddReceive(ch, func(c workflow.ReceiveChannel, _ bool) { + // The parent workflow can be cancelled when a new revision is received, but we still would like to notify + // state of any of the terraform workflows which will finish regardless of parent cancellation. + ctx := ctx + version := workflow.GetVersion(ctx, NotifyAllCheckRunsChangeID, workflow.DefaultVersion, 1) + if version == 1 { + ctx, _ = workflow.NewDisconnectedContext(ctx) + } p.TFStateReceiver.Receive(ctx, c, roots) }) @@ -182,7 +192,6 @@ func (p *Processor) markCombinedCheckRun(ctx workflow.Context, revision Revision func (p *Processor) markCheckRunsAborted(ctx workflow.Context, revision Revision, roots map[string]RootInfo) { p.markCombinedCheckRun(ctx, revision, github.CheckRunCancelled, CheckRunCancelled) - for _, rootInfo := range roots { ctx = workflow.WithRetryPolicy(ctx, temporal.RetryPolicy{ MaximumAttempts: 3, From 8af0ee774642851b14013f80d34b7253b32c8050 Mon Sep 17 00:00:00 2001 From: Arvind Subramanian Date: Tue, 10 Oct 2023 14:22:50 -0400 Subject: [PATCH 2/2] add artificial sleep to "simple" root --- server/neptune/workflows/internal/terraform/workflow.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/neptune/workflows/internal/terraform/workflow.go b/server/neptune/workflows/internal/terraform/workflow.go index e1b0ad558..ec8dc6536 100644 --- a/server/neptune/workflows/internal/terraform/workflow.go +++ b/server/neptune/workflows/internal/terraform/workflow.go @@ -5,6 +5,7 @@ import ( "fmt" "github.com/runatlantis/atlantis/server/neptune/workflows/activities/conftest" "net/url" + "strings" "time" key "github.com/runatlantis/atlantis/server/neptune/context" @@ -146,6 +147,10 @@ func (r *Runner) Plan(ctx workflow.Context, root *terraform.LocalRoot, serverURL return response, newUpdateJobError(err, "unable to update job with in-progress status") } + if strings.HasSuffix(root.Path, "simple") { + _ = workflow.Sleep(ctx, 20*time.Second) + } + response, err = r.JobRunner.Plan(ctx, root, jobID.String(), r.Request.WorkflowMode) if err != nil {