From 0a4a5beba4aa609a9783674974f00535955a7994 Mon Sep 17 00:00:00 2001 From: Alex Ott Date: Wed, 24 Jul 2024 14:35:38 +0200 Subject: [PATCH] [Exporter] Adding more retries for SCIM API calls (#3807) ## Changes There were issue when SCIM API failed to return list of users (because of the incorrect status code or something like that, so Go SDK didn't retry it) and this lead to marking all workspace objects (notebooks/files/...) as ignored because user wasn't found. Also, when the list of users wasn't retrieved, the process didn't stop, and we generated incomplete Terraform code. This PR improves the situation by following: - Adding retries around Users/SPs/Groups SCIM API calls - Exit with the error code when we aren't able to fetch the list of the users or service principals ## Tests - [x] `make test` run locally - [ ] relevant change in `docs/` folder - [ ] covered with integration tests in `internal/acceptance` - [ ] relevant acceptance tests are passing - [ ] using Go SDK Co-authored-by: Tanmay Rustagi <88379306+tanmay-db@users.noreply.github.com> --- exporter/importables.go | 4 +- exporter/util.go | 170 ++++++++++++++++++++++++---------------- 2 files changed, 106 insertions(+), 68 deletions(-) diff --git a/exporter/importables.go b/exporter/importables.go index 7c08a41eca..231619a15f 100644 --- a/exporter/importables.go +++ b/exporter/importables.go @@ -1023,7 +1023,7 @@ var resourcesMap map[string]importable = map[string]importable{ if err != nil { return err } - ic.emitGroups(u) + ic.emitGroups(*u) ic.emitRoles("user", u.ID, u.Roles) return nil }, @@ -1081,7 +1081,7 @@ var resourcesMap map[string]importable = map[string]importable{ if err != nil { return err } - ic.emitGroups(u) + ic.emitGroups(*u) ic.emitRoles("service_principal", u.ID, u.Roles) if ic.accountLevel { ic.Emit(&resource{ diff --git a/exporter/util.go b/exporter/util.go index ab06f99647..741c370d10 100644 --- a/exporter/util.go +++ b/exporter/util.go @@ -452,32 +452,48 @@ func (ic *importContext) cacheGroups() error { defer ic.groupsMutex.Unlock() if ic.allGroups == nil { log.Printf("[INFO] Caching groups in memory ...") - var groups []iam.Group + var groups *[]iam.Group var err error - if ic.accountLevel { - groups, err = ic.accountClient.Groups.ListAll(ic.Context, iam.ListAccountGroupsRequest{ - Attributes: "id", - }) - } else { - groups, err = ic.workspaceClient.Groups.ListAll(ic.Context, iam.ListGroupsRequest{ - Attributes: "id", - }) - } + err = runWithRetries(func() error { + var grps []iam.Group + var err error + if ic.accountLevel { + grps, err = ic.accountClient.Groups.ListAll(ic.Context, iam.ListAccountGroupsRequest{ + Attributes: "id", + }) + } else { + grps, err = ic.workspaceClient.Groups.ListAll(ic.Context, iam.ListGroupsRequest{ + Attributes: "id", + }) + } + if err != nil { + return err + } + groups = &grps + return nil + }, "error fetching full list of groups") if err != nil { - log.Printf("[ERROR] can't fetch list of groups") + log.Printf("[ERROR] can't fetch list of groups. Error: %v", err) return err } api := scim.NewGroupsAPI(ic.Context, ic.Client) - ic.allGroups = make([]scim.Group, 0, len(groups)) - for i, g := range groups { - group, err := api.Read(g.Id, "id,displayName,active,externalId,entitlements,groups,roles,members") + groupsCount := len(*groups) + ic.allGroups = make([]scim.Group, 0, groupsCount) + for i, g := range *groups { + err = runWithRetries(func() error { + group, err := api.Read(g.Id, "id,displayName,active,externalId,entitlements,groups,roles,members") + if err != nil { + return err + } + ic.allGroups = append(ic.allGroups, group) + return nil + }, "error reading group with ID "+g.Id) if err != nil { - log.Printf("[ERROR] Error reading group with ID %s", g.Id) + log.Printf("[ERROR] Error reading group with ID %s: %v", g.Id, err) continue } - ic.allGroups = append(ic.allGroups, group) if (i+1)%10 == 0 { - log.Printf("[DEBUG] Read %d out of %d groups", i+1, len(groups)) + log.Printf("[DEBUG] Read %d out of %d groups", i+1, groupsCount) } } log.Printf("[INFO] Cached %d groups", len(ic.allGroups)) @@ -506,30 +522,34 @@ func (ic *importContext) getUsersMapping() { return } ic.allUsersMapping = make(map[string]string) - var users []iam.User - var err error - if ic.accountLevel { - users, err = ic.accountClient.Users.ListAll(ic.Context, iam.ListAccountUsersRequest{ - Attributes: "id,userName", - }) - } else { - users, err = ic.workspaceClient.Users.ListAll(ic.Context, iam.ListUsersRequest{ - Attributes: "id,userName", - }) - } + err := runWithRetries(func() error { + var users []iam.User + var err error + if ic.accountLevel { + users, err = ic.accountClient.Users.ListAll(ic.Context, iam.ListAccountUsersRequest{ + Attributes: "id,userName", + }) + } else { + users, err = ic.workspaceClient.Users.ListAll(ic.Context, iam.ListUsersRequest{ + Attributes: "id,userName", + }) + } + if err != nil { + return err + } + for _, user := range users { + ic.allUsersMapping[user.UserName] = user.Id + } + log.Printf("[DEBUG] users are copied") + return nil + }, "error fetching full list of users") if err != nil { - log.Printf("[ERROR] can't fetch list of users") - return - } - for _, user := range users { - // log.Printf("[DEBUG] adding user %v into the map. %d out of %d", user, i+1, len(users)) - ic.allUsersMapping[user.UserName] = user.Id + log.Fatalf("[ERROR] can't fetch list of users after few retries: error=%v", err) } - log.Printf("[DEBUG] users are copied") } } -func (ic *importContext) findUserByName(name string, fastCheck bool) (u scim.User, err error) { +func (ic *importContext) findUserByName(name string, fastCheck bool) (u *scim.User, err error) { log.Printf("[DEBUG] Looking for user %s", name) ic.usersMutex.RLocker().Lock() user, exists := ic.allUsers[name] @@ -540,7 +560,7 @@ func (ic *importContext) findUserByName(name string, fastCheck bool) (u scim.Use err = fmt.Errorf("user %s is not found", name) } else { log.Printf("[DEBUG] existing user %s is found in the cache", name) - u = user + u = &user } return } @@ -550,21 +570,28 @@ func (ic *importContext) findUserByName(name string, fastCheck bool) (u scim.Use ic.allUsersMutex.RLocker().Unlock() if !exists { err = fmt.Errorf("there is no user '%s'", name) - u = scim.User{UserName: nonExistingUserOrSp} + u = &scim.User{UserName: nonExistingUserOrSp} } else { if fastCheck { - return scim.User{UserName: name}, nil + return &scim.User{UserName: name}, nil } a := scim.NewUsersAPI(ic.Context, ic.Client) - u, err = a.Read(userId, "id,userName,displayName,active,externalId,entitlements,groups,roles") + err = runWithRetries(func() error { + usr, err := a.Read(userId, "id,userName,displayName,active,externalId,entitlements,groups,roles") + if err != nil { + return err + } + u = &usr + return nil + }, fmt.Sprintf("error reading user with name '%s', user ID: %s", name, userId)) if err != nil { log.Printf("[WARN] error reading user with name '%s', user ID: %s", name, userId) - u = scim.User{UserName: nonExistingUserOrSp} + u = &scim.User{UserName: nonExistingUserOrSp} } } ic.usersMutex.Lock() defer ic.usersMutex.Unlock() - ic.allUsers[name] = u + ic.allUsers[name] = *u return } @@ -573,24 +600,28 @@ func (ic *importContext) getSpsMapping() { defer ic.spsMutex.Unlock() if ic.allSpsMapping == nil { ic.allSpsMapping = make(map[string]string) - var sps []iam.ServicePrincipal - var err error - // Reimplement it myself - if ic.accountLevel { - sps, err = ic.accountClient.ServicePrincipals.ListAll(ic.Context, iam.ListAccountServicePrincipalsRequest{ - Attributes: "id,userName", - }) - } else { - sps, err = ic.workspaceClient.ServicePrincipals.ListAll(ic.Context, iam.ListServicePrincipalsRequest{ - Attributes: "id,userName", - }) - } + err := runWithRetries(func() error { + var sps []iam.ServicePrincipal + var err error + if ic.accountLevel { + sps, err = ic.accountClient.ServicePrincipals.ListAll(ic.Context, iam.ListAccountServicePrincipalsRequest{ + Attributes: "id,userName", + }) + } else { + sps, err = ic.workspaceClient.ServicePrincipals.ListAll(ic.Context, iam.ListServicePrincipalsRequest{ + Attributes: "id,userName", + }) + } + if err != nil { + return err + } + for _, sp := range sps { + ic.allSpsMapping[sp.ApplicationId] = sp.Id + } + return nil + }, "error fetching full list of service principals") if err != nil { - log.Printf("[ERROR] can't fetch list of service principals") - return - } - for _, sp := range sps { - ic.allSpsMapping[sp.ApplicationId] = sp.Id + log.Fatalf("[ERROR] can't fetch list of service principals after few retries: error=%v", err) } } } @@ -621,7 +652,7 @@ func (ic *importContext) getBuiltinPolicyFamilies() map[string]compute.PolicyFam return ic.builtInPolicies } -func (ic *importContext) findSpnByAppID(applicationID string, fastCheck bool) (u scim.User, err error) { +func (ic *importContext) findSpnByAppID(applicationID string, fastCheck bool) (u *scim.User, err error) { log.Printf("[DEBUG] Looking for SP %s", applicationID) ic.spsMutex.RLocker().Lock() sp, exists := ic.allSps[applicationID] @@ -632,7 +663,7 @@ func (ic *importContext) findSpnByAppID(applicationID string, fastCheck bool) (u err = fmt.Errorf("service principal %s is not found", applicationID) } else { log.Printf("[DEBUG] existing SP %s is found in the cache", applicationID) - u = sp + u = &sp } return } @@ -642,21 +673,28 @@ func (ic *importContext) findSpnByAppID(applicationID string, fastCheck bool) (u ic.spsMutex.RLocker().Unlock() if !exists { err = fmt.Errorf("there is no service principal '%s'", applicationID) - u = scim.User{ApplicationID: nonExistingUserOrSp} + u = &scim.User{ApplicationID: nonExistingUserOrSp} } else { if fastCheck { - return scim.User{ApplicationID: applicationID}, nil + return &scim.User{ApplicationID: applicationID}, nil } a := scim.NewServicePrincipalsAPI(ic.Context, ic.Client) - u, err = a.Read(spId, "userName,displayName,active,externalId,entitlements,groups,roles") + err = runWithRetries(func() error { + usr, err := a.Read(spId, "userName,displayName,active,externalId,entitlements,groups,roles") + if err != nil { + return err + } + u = &usr + return nil + }, fmt.Sprintf("error reading service principal with AppID '%s', SP ID: %s", applicationID, spId)) if err != nil { log.Printf("[WARN] error reading service principal with AppID '%s', SP ID: %s", applicationID, spId) - u = scim.User{ApplicationID: nonExistingUserOrSp} + u = &scim.User{ApplicationID: nonExistingUserOrSp} } } ic.spsMutex.Lock() defer ic.spsMutex.Unlock() - ic.allSps[applicationID] = u + ic.allSps[applicationID] = *u return }