Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Error Handling - update tcp dial timeout regex to catch broader range of error conditions #1108

Merged
merged 6 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 7 additions & 9 deletions sdk/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -777,10 +777,10 @@ func extendedRetryPolicy(resp *http.Response, err error) (bool, error) {

// A regular expression to catch dial timeouts in the underlying TCP session
// connection
tcpDialTimeoutRe := regexp.MustCompile(`dial tcp .*: i/o timeout`)
tcpDialTCPRe := regexp.MustCompile(`dial tcp`)

// A regular expression to match complete packet loss - see comment below on packet-loss scenarios
// completePacketLossRe := regexp.MustCompile(`EOF$`)
// A regular expression to match complete packet loss
completePacketLossRe := regexp.MustCompile(`EOF`)

if err != nil {
var v *url.Error
Expand All @@ -805,15 +805,13 @@ func extendedRetryPolicy(resp *http.Response, err error) (bool, error) {
return false, v
}

if tcpDialTimeoutRe.MatchString(v.Error()) {
if tcpDialTCPRe.MatchString(v.Error()) {
return false, v
}

// TODO - Need to investigate how to deal with total packet-loss situations that doesn't break LRO retries.
// Such as Temporary Proxy outage, or recoverable disruption to network traffic (e.g. bgp events etc)
// if completePacketLossRe.MatchString(v.Error()) {
// return false, v
// }
if completePacketLossRe.MatchString(v.Error()) {
return false, v
}

var certificateVerificationError *tls.CertificateVerificationError
if ok := errors.As(v.Err, &certificateVerificationError); ok {
Expand Down
20 changes: 18 additions & 2 deletions sdk/client/resourcemanager/poller_lro.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
Expand All @@ -27,6 +28,9 @@ type longRunningOperationPoller struct {
initialRetryDuration time.Duration
originalUrl *url.URL
pollingUrl *url.URL

droppedConnectionCount int
maxDroppedConnections int
}

func pollingUriForLongRunningOperation(resp *client.Response) string {
Expand All @@ -39,8 +43,9 @@ func pollingUriForLongRunningOperation(resp *client.Response) string {

func longRunningOperationPollerFromResponse(resp *client.Response, client *client.Client) (*longRunningOperationPoller, error) {
poller := longRunningOperationPoller{
client: client,
initialRetryDuration: 10 * time.Second,
client: client,
initialRetryDuration: 10 * time.Second,
maxDroppedConnections: 3,
}

pollingUrl := pollingUriForLongRunningOperation(resp)
Expand Down Expand Up @@ -107,9 +112,20 @@ func (p *longRunningOperationPoller) Poll(ctx context.Context) (result *pollers.
}
result.HttpResponse, err = req.Execute(ctx)
if err != nil {
var e *url.Error
if errors.As(err, &e) {
p.droppedConnectionCount++
if p.droppedConnectionCount < p.maxDroppedConnections {
result.Status = pollers.PollingStatusUnknown
return result, nil
}
}

return nil, err
}

p.droppedConnectionCount = 0

if result.HttpResponse != nil {
var respBody []byte
respBody, err = io.ReadAll(result.HttpResponse.Body)
Expand Down
3 changes: 2 additions & 1 deletion sdk/client/resourcemanager/poller_lro_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,7 @@ func TestPollerLRO_InProvisioningState_AcceptedThenDroppedThenInProgressThenSucc

expectedStatuses := []pollers.PollingStatus{
pollers.PollingStatusInProgress, // the 202 Accepted
pollers.PollingStatusUnknown,
// NOTE: the Dropped Connection will be ignored/silently retried
pollers.PollingStatusInProgress, // working on it
pollers.PollingStatusSucceeded, // good
Expand Down Expand Up @@ -366,7 +367,7 @@ func TestPollerLRO_InStatus_AcceptedThenDroppedThenInProgressThenSuccess(t *test

expectedStatuses := []pollers.PollingStatus{
pollers.PollingStatusInProgress, // the 202 Accepted
// NOTE: the Dropped Connection will be ignored/silently retried
pollers.PollingStatusUnknown,
pollers.PollingStatusInProgress, // working on it
pollers.PollingStatusSucceeded, // good
}
Expand Down
26 changes: 21 additions & 5 deletions sdk/client/resourcemanager/poller_provisioning_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package resourcemanager

import (
"context"
"errors"
"fmt"
"net/http"
"net/url"
Expand All @@ -27,6 +28,9 @@ type provisioningStatePoller struct {
initialRetryDuration time.Duration
originalUri string
resourcePath string

droppedConnectionCount int
maxDroppedConnections int
}

func provisioningStatePollerFromResponse(response *client.Response, lroIsSelfReference bool, client *Client, pollingInterval time.Duration) (*provisioningStatePoller, error) {
Expand Down Expand Up @@ -58,11 +62,12 @@ func provisioningStatePollerFromResponse(response *client.Response, lroIsSelfRef
}

return &provisioningStatePoller{
apiVersion: apiVersion,
client: client,
initialRetryDuration: pollingInterval,
originalUri: originalUri,
resourcePath: resourcePath,
apiVersion: apiVersion,
client: client,
initialRetryDuration: pollingInterval,
originalUri: originalUri,
resourcePath: resourcePath,
maxDroppedConnections: 3,
}, nil
}

Expand All @@ -84,6 +89,17 @@ func (p *provisioningStatePoller) Poll(ctx context.Context) (*pollers.PollResult
}
resp, err := p.client.Execute(ctx, req)
if err != nil {
var e *url.Error
if errors.As(err, &e) {
p.droppedConnectionCount++
if p.droppedConnectionCount < p.maxDroppedConnections {
return &pollers.PollResult{
PollInterval: p.initialRetryDuration,
Status: pollers.PollingStatusUnknown,
}, nil
}
}

jackofallops marked this conversation as resolved.
Show resolved Hide resolved
return nil, fmt.Errorf("executing request: %+v", err)
}
if resp == nil {
Expand Down
28 changes: 15 additions & 13 deletions sdk/client/resourcemanager/poller_provisioning_state_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ func TestPollerProvisioningState_OkWithNoBody_AfterPolling(t *testing.T) {
}

func TestPollerProvisioningState_InProvisioningState_DroppedThenInProgressThenSuccess(t *testing.T) {
ctx, cancel := context.WithTimeout(context.TODO(), 30*time.Second)
ctx, cancel := context.WithTimeout(context.TODO(), 90*time.Second)
defer cancel()

// changing where we're setting this for the heck of it
Expand All @@ -177,15 +177,17 @@ func TestPollerProvisioningState_InProvisioningState_DroppedThenInProgressThenSu
apiVersion: "2020-01-01",
}
poller := provisioningStatePoller{
apiVersion: helper.expectedApiVersion,
client: resourceManagerClient,
initialRetryDuration: 10,
originalUri: "/provisioning-state/poll",
resourcePath: "/provisioning-state/poll",
apiVersion: helper.expectedApiVersion,
client: resourceManagerClient,
initialRetryDuration: 10,
originalUri: "/provisioning-state/poll",
resourcePath: "/provisioning-state/poll",
maxDroppedConnections: 3,
}

expectedStatuses := []pollers.PollingStatus{
pollers.PollingStatusInProgress, // working on it
pollers.PollingStatusUnknown,
// NOTE: the Dropped Connection will be ignored/silently retried
pollers.PollingStatusInProgress, // working on it
pollers.PollingStatusSucceeded, // good
Expand All @@ -200,7 +202,6 @@ func TestPollerProvisioningState_InProvisioningState_DroppedThenInProgressThenSu
t.Fatalf("expected status to be %q but got %q", expected, result.Status)
}
}
// sanity-checking - expect 4 calls but 3 statuses (since the dropped connection is silently retried)
helper.assertCalled(t, 4)
}

Expand All @@ -225,16 +226,17 @@ func TestPollerProvisioningState_InStatus_DroppedThenInProgressThenSuccess(t *te
apiVersion: "2020-01-01",
}
poller := provisioningStatePoller{
apiVersion: helper.expectedApiVersion,
client: resourceManagerClient,
initialRetryDuration: 10,
originalUri: "/provisioning-state/poll",
resourcePath: "/provisioning-state/poll",
apiVersion: helper.expectedApiVersion,
client: resourceManagerClient,
initialRetryDuration: 10,
originalUri: "/provisioning-state/poll",
resourcePath: "/provisioning-state/poll",
maxDroppedConnections: 3,
}

expectedStatuses := []pollers.PollingStatus{
pollers.PollingStatusInProgress, // working on it
// NOTE: the Dropped Connection will be ignored/silently retried
pollers.PollingStatusUnknown,
pollers.PollingStatusInProgress, // working on it
pollers.PollingStatusSucceeded, // good
}
Expand Down
Loading