[Exporter] Allow to match resource names by regular expression

In addition to existing `-match` option, this PR allows to match names by regex during the listing operation. There are two options: - `-matchRegex` - checks if name matches a regex - this could be useful for exporting notebooks for only specific users, or something like that. - `-excludeRegex` - checks if name matches a regex, and skips processing of that object. For example, it could be used to exclude `databricks_automl` directories. This parameter has higher priority than the `-match` and `-matchRegex`.
databricks · Oct 31, 2024 · 1b42338 · 1b42338
1 parent 613ed1a
commit 1b42338
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 3 deletions.
diff --git a/docs/guides/experimental-exporter.md b/docs/guides/experimental-exporter.md
@@ -61,6 +61,8 @@ All arguments are optional, and they tune what code is being generated.
 * `-listing` - Comma-separated list of services to be listed and further passed on for importing. For each service specified, the exporter performs a listing of available resources using the `List` function and emits them for importing together with their dependencies. The `-services` parameter could be used to control which transitive dependencies will be also imported.
 * `-services` - Comma-separated list of services to import. By default, all services are imported.
 * `-match` - Match resource names during listing operation. This filter applies to all resources that are getting listed, so if you want to import all dependencies of just one cluster, specify `-match=autoscaling -listing=compute`. By default, it is empty, which matches everything.
+* `-matchRegex` - Match resource names against a given regex during listing operation. Applicable to all resources selected for listing.
+* `-excludeRegex` - Exclude resource names matching a given regex. Applied during the listing operation and has higher priority than `-match` and `-matchRegex`.  Applicable to all resources selected for listing.  Could be used to exclude things like `databricks_automl` notebooks, etc.  
 * `-mounts` - List DBFS mount points, an extremely slow operation that would not trigger unless explicitly specified.
 * `-generateProviderDeclaration` - the flag that toggles the generation of `databricks.tf` file with the declaration of the Databricks Terraform provider that is necessary for Terraform versions since Terraform 0.13 (disabled by default).
 * `-prefix` - optional prefix that will be added to the name of all exported resources - that's useful for exporting resources from multiple workspaces for merging into a single one.

diff --git a/exporter/command.go b/exporter/command.go
@@ -145,6 +145,12 @@ func Run(args ...string) error {
 	flags.StringVar(&ic.match, "match", "", "Match resource names during listing operation. "+
 		"This filter applies to all resources that are getting listed, so if you want to import "+
 		"all dependencies of just one cluster, specify -listing=compute")
+	flags.StringVar(&ic.matchRegexStr, "matchRegex", "", "Match resource names during listing operation against a regex. "+
+		"This filter applies to all resources that are getting listed, so if you want to import "+
+		"all dependencies of just one cluster, specify -listing=compute")
+	flags.StringVar(&ic.excludeRegexStr, "excludeRegex", "", "Exclude resource names matching regex during listing operation. "+
+		"This filter applies to all resources that are getting listed, so if you want to import "+
+		"all dependencies of just one cluster, specify -listing=compute")
 	prefix := ""
 	flags.StringVar(&prefix, "prefix", "", "Prefix that will be added to the name of all exported resources")
 	newArgs := args

diff --git a/exporter/context.go b/exporter/context.go
@@ -88,6 +88,10 @@ type importContext struct {
 	services                 map[string]struct{}
 	listing                  map[string]struct{}
 	match                    string
+	matchRegexStr            string
+	matchRegex               *regexp.Regexp
+	excludeRegexStr          string
+	excludeRegex             *regexp.Regexp
 	lastActiveDays           int64
 	lastActiveMs             int64
 	generateDeclaration      bool
@@ -297,6 +301,24 @@ func (ic *importContext) Run() error {
 		return fmt.Errorf("no services to import")
 	}
 
+	if ic.matchRegexStr != "" {
+		log.Printf("[DEBUG] Using regex '%s' to filter resources", ic.matchRegexStr)
+		re, err := regexp.Compile(ic.matchRegexStr)
+		if err != nil {
+			log.Printf("[ERROR] can't compile regex '%s': %v", ic.matchRegexStr, err)
+			return err
+		}
+		ic.matchRegex = re
+	}
+	if ic.excludeRegexStr != "" {
+		log.Printf("[DEBUG] Using regex '%s' to filter resources", ic.excludeRegexStr)
+		re, err := regexp.Compile(ic.excludeRegexStr)
+		if err != nil {
+			log.Printf("[ERROR] can't compile regex '%s': %v", ic.excludeRegexStr, err)
+			return err
+		}
+		ic.excludeRegex = re
+	}
 	if ic.incremental {
 		if ic.updatedSinceStr == "" {
 			ic.updatedSinceStr = getLastRunString(statsFileName)

diff --git a/exporter/exporter_test.go b/exporter/exporter_test.go
@@ -2371,10 +2371,32 @@ func TestImportingNotebooksWorkspaceFiles(t *testing.T) {
 				Method:   "GET",
 				Resource: "/api/2.0/workspace/list?path=%2F",
 				Response: workspace.ObjectList{
-					Objects: []workspace.ObjectStatus{notebookStatus, fileStatus},
+					Objects: []workspace.ObjectStatus{notebookStatus, fileStatus,
+						workspace.ObjectStatus{
+							ObjectID:   4567,
+							ObjectType: workspace.Notebook,
+							Path:       "/UnmatchedNotebook",
+							Language:   "PYTHON",
+						},
+						workspace.ObjectStatus{
+							ObjectID:   1234,
+							ObjectType: workspace.File,
+							Path:       "/UnmatchedFile",
+						},
+						workspace.ObjectStatus{
+							ObjectID:   456,
+							ObjectType: workspace.Directory,
+							Path:       "/databricks_automl",
+						},
+					},
 				},
 				ReuseRequest: true,
 			},
+			{
+				Method:   "GET",
+				Resource: "/api/2.0/workspace/list?path=%2Fdatabricks_automl",
+				Response: workspace.ObjectList{},
+			},
 			{
 				Method:       "GET",
 				Resource:     "/api/2.0/workspace/get-status?path=%2FNotebook",
@@ -2410,10 +2432,26 @@ func TestImportingNotebooksWorkspaceFiles(t *testing.T) {
 
 			ic := newImportContext(client)
 			ic.Directory = tmpDir
-			ic.enableListing("notebooks")
+			ic.enableListing("notebooks,wsfiles")
+			ic.excludeRegexStr = "databricks_automl"
+			ic.matchRegexStr = "^/[FN].*$"
 
 			err := ic.Run()
 			assert.NoError(t, err)
+			// check generated code for notebooks
+			content, err := os.ReadFile(tmpDir + "/notebooks.tf")
+			assert.NoError(t, err)
+			contentStr := string(content)
+			assert.True(t, strings.Contains(contentStr, `resource "databricks_notebook" "notebook_456"`))
+			assert.True(t, strings.Contains(contentStr, `path   = "/Notebook"`))
+			assert.False(t, strings.Contains(contentStr, `/UnmatchedNotebook`))
+			// check generated code for workspace files
+			content, err = os.ReadFile(tmpDir + "/wsfiles.tf")
+			assert.NoError(t, err)
+			contentStr = string(content)
+			assert.True(t, strings.Contains(contentStr, `resource "databricks_workspace_file" "file_123"`))
+			assert.True(t, strings.Contains(contentStr, `path   = "/File"`))
+			assert.False(t, strings.Contains(contentStr, `/UnmatchedFile`))
 		})
 }
 

diff --git a/exporter/util.go b/exporter/util.go
@@ -35,9 +35,15 @@ func (ic *importContext) isServiceInListing(service string) bool {
 }
 
 func (ic *importContext) MatchesName(n string) bool {
-	if ic.match == "" {
+	if ic.match == "" && ic.matchRegex == nil && ic.excludeRegex == nil {
 		return true
 	}
+	if ic.excludeRegex != nil && ic.excludeRegex.MatchString(n) {
+		return false
+	}
+	if ic.matchRegex != nil {
+		return ic.matchRegex.MatchString(n)
+	}
 	return strings.Contains(strings.ToLower(n), strings.ToLower(ic.match))
 }