merged with upstream

databricks · Nov 1, 2024 · 6f6d19e · 6f6d19e
2 parents 32e79f6 + 28b8f49
commit 6f6d19e
Show file tree

Hide file tree

Showing 25 changed files with 888 additions and 385 deletions.
diff --git a/.github/workflows/external-message.yml b/.github/workflows/external-message.yml
@@ -11,81 +11,22 @@ on:
     branches:
       - main
 
-
 jobs:
   comment-on-pr:
     runs-on: ubuntu-latest
     permissions:
       pull-requests: write
 
     steps:
-      # NOTE: The following checks may not be accurate depending on Org or Repo settings. 
-      - name: Check user and potential secret access
-        id: check-secrets-access
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        run: |
-          USER_LOGIN="${{ github.event.pull_request.user.login }}"
-          REPO_OWNER="${{ github.repository_owner }}"
-          REPO_NAME="${{ github.event.repository.name }}"
-          
-          echo "Pull request opened by: $USER_LOGIN"
-          
-          # Check if PR is from a fork
-          IS_FORK=$([[ "${{ github.event.pull_request.head.repo.full_name }}" != "${{ github.repository }}" ]] && echo "true" || echo "false")
-          
-          HAS_ACCESS="false"
-          
-          # Check user's permission level on the repository
-          USER_PERMISSION=$(gh api repos/$REPO_OWNER/$REPO_NAME/collaborators/$USER_LOGIN/permission --jq '.permission')
-          
-          if [[ "$USER_PERMISSION" == "admin" || "$USER_PERMISSION" == "write" ]]; then
-            HAS_ACCESS="true"
-          elif [[ "$USER_PERMISSION" == "read" ]]; then
-            # For read access, we need to check if the user has been explicitly granted secret access
-            # This information is not directly available via API, so we'll make an assumption
-            # that read access does not imply secret access
-            HAS_ACCESS="false"
-          fi
-          
-          # Check if repo owner is an organization
-          IS_ORG=$(gh api users/$REPO_OWNER --jq '.type == "Organization"')
-          
-          if [[ "$IS_ORG" == "true" && "$HAS_ACCESS" == "false" ]]; then
-            # Check if user is a member of any team with write or admin access to the repo
-            TEAMS_WITH_ACCESS=$(gh api repos/$REPO_OWNER/$REPO_NAME/teams --jq '.[] | select(.permission == "push" or .permission == "admin") | .slug')
-            for team in $TEAMS_WITH_ACCESS; do
-              IS_TEAM_MEMBER=$(gh api orgs/$REPO_OWNER/teams/$team/memberships/$USER_LOGIN --silent && echo "true" || echo "false")
-              if [[ "$IS_TEAM_MEMBER" == "true" ]]; then
-                HAS_ACCESS="true"
-                break
-              fi
-            done
-          fi
-          
-          # If it's a fork, set HAS_ACCESS to false regardless of other checks
-          if [[ "$IS_FORK" == "true" ]]; then
-            HAS_ACCESS="false"
-          fi
-          
-          echo "has_secrets_access=$HAS_ACCESS" >> $GITHUB_OUTPUT
-          if [[ "$HAS_ACCESS" == "true" ]]; then
-            echo "User $USER_LOGIN likely has access to secrets"
-          else
-            echo "User $USER_LOGIN likely does not have access to secrets"
-          fi
-
-
       - uses: actions/checkout@v4
 
       - name: Delete old comments
-        if: steps.check-secrets-access.outputs.has_secrets_access != 'true'
         env:
            GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         run: |
             # Delete previous comment if it exists
             previous_comment_ids=$(gh api "repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments" \
-              --jq '.[] | select(.body | startswith("<!-- INTEGRATION_TESTS -->")) | .id')
+              --jq '.[] | select(.body | startswith("<!-- INTEGRATION_TESTS_MANUAL -->")) | .id')
             echo "Previous comment IDs: $previous_comment_ids"
             # Iterate over each comment ID and delete the comment
             if [ ! -z "$previous_comment_ids" ]; then
@@ -96,14 +37,15 @@ jobs:
             fi
 
       - name: Comment on PR
-        if: steps.check-secrets-access.outputs.has_secrets_access != 'true'
         env:
           GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           COMMIT_SHA: ${{ github.event.pull_request.head.sha }}
         run: |
           gh pr comment ${{ github.event.pull_request.number }} --body \
-          "<!-- INTEGRATION_TESTS -->
-          Run integration tests manually:
+          "<!-- INTEGRATION_TESTS_MANUAL -->
+          If integration tests don't run automatically, an authorized user can run them manually by following the instructions below:
+          
+          Trigger:
           [go/deco-tests-run/terraform](https://go/deco-tests-run/terraform)
 
           Inputs:

diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -12,17 +12,18 @@ jobs:
   check-token:
     name: Check secrets access
     runs-on: ubuntu-latest
+    environment: "test-trigger-is"
     outputs:
       has_token: ${{ steps.set-token-status.outputs.has_token }}
     steps:
-      - name: Check if GITHUB_TOKEN is set
+      - name: Check if DECO_WORKFLOW_TRIGGER_APP_ID is set
         id: set-token-status
         run: |
-            if [ -z "${{ secrets.GITHUB_TOKEN }}" ]; then
-              echo "GITHUB_TOKEN is empty. User has no access to tokens."
+            if [ -z "${{ secrets.DECO_WORKFLOW_TRIGGER_APP_ID }}" ]; then
+              echo "DECO_WORKFLOW_TRIGGER_APP_ID is empty. User has no access to secrets."
               echo "::set-output name=has_token::false"
             else
-              echo "GITHUB_TOKEN is set. User has no access to tokens."
+              echo "DECO_WORKFLOW_TRIGGER_APP_ID is set. User has access to secrets."
               echo "::set-output name=has_token::true"
             fi
     

diff --git a/docs/data-sources/volumes.md b/docs/data-sources/volumes.md
@@ -7,6 +7,9 @@ subcategory: "Unity Catalog"
 
 Retrieves a list of [databricks_volume](../resources/volume.md) ids (full names), that were created by Terraform or manually.
 
+## Plugin Framework Migration
+The volumes data source has been migrated from sdkv2 to plugin framework in version 1.57。 If you encounter any problem with this data source and suspect it is due to the migration, you can fallback to sdkv2 by setting the environment variable in the following way `export USE_SDK_V2_DATA_SOURCES="databricks_volumes"`.
+
 ## Example Usage
 
 Listing all volumes in a _things_ [databricks_schema](../resources/schema.md) of a  _sandbox_ [databricks_catalog](../resources/catalog.md):

diff --git a/docs/guides/experimental-exporter.md b/docs/guides/experimental-exporter.md
@@ -61,6 +61,9 @@ All arguments are optional, and they tune what code is being generated.
 * `-listing` - Comma-separated list of services to be listed and further passed on for importing. For each service specified, the exporter performs a listing of available resources using the `List` function and emits them for importing together with their dependencies. The `-services` parameter could be used to control which transitive dependencies will be also imported.
 * `-services` - Comma-separated list of services to import. By default, all services are imported.
 * `-match` - Match resource names during listing operation. This filter applies to all resources that are getting listed, so if you want to import all dependencies of just one cluster, specify `-match=autoscaling -listing=compute`. By default, it is empty, which matches everything.
+* `-matchRegex` - Match resource names against a given regex during listing operation. Applicable to all resources selected for listing.
+* `-excludeRegex` - Exclude resource names matching a given regex. Applied during the listing operation and has higher priority than `-match` and `-matchRegex`.  Applicable to all resources selected for listing.  Could be used to exclude things like `databricks_automl` notebooks, etc.  
+* `-filterDirectoriesDuringWorkspaceWalking` - if we should apply match logic to directory names when we're performing workspace tree walking.  *Note: be careful with it as it will be applied to all entries, so if you want to filter only specific users, then you will need to specify condition for `/Users` as well, so regex will be `^(/Users|/Users/[a-c].*)$`*.
 * `-mounts` - List DBFS mount points, an extremely slow operation that would not trigger unless explicitly specified.
 * `-generateProviderDeclaration` - the flag that toggles the generation of `databricks.tf` file with the declaration of the Databricks Terraform provider that is necessary for Terraform versions since Terraform 0.13 (disabled by default).
 * `-prefix` - optional prefix that will be added to the name of all exported resources - that's useful for exporting resources from multiple workspaces for merging into a single one.

diff --git a/docs/guides/troubleshooting.md b/docs/guides/troubleshooting.md
@@ -17,6 +17,18 @@ TF_LOG=DEBUG DATABRICKS_DEBUG_TRUNCATE_BYTES=250000 terraform apply -no-color 2>
 
 * Open a [new GitHub issue](https://github.com/databricks/terraform-provider-databricks/issues/new/choose) providing all information described in the issue template - debug logs, your Terraform code, Terraform & plugin versions, etc.
 
+## Plugin Framework Migration Problems
+The following resources and data sources have been migrated from sdkv2 to plugin framework。 If you encounter any problem with those, you can fallback to sdkv2 by setting the `USE_SDK_V2_RESOURCES` and `USE_SDK_V2_DATA_SOURCES` environment variables.
+
+Example: `export USE_SDK_V2_RESOURCES="databricks_library,databricks_quality_monitor"`
+
+### Resources migrated
+  - databricks_quality_monitor
+  - databricks_library
+### Data sources migrated
+  - databricks_volumes
+
+
 ## Typical problems
 
 ### Data resources and Authentication is not configured errors

diff --git a/docs/resources/library.md b/docs/resources/library.md
@@ -7,6 +7,9 @@ Installs a [library](https://docs.databricks.com/libraries/index.html) on [datab
 
 -> `databricks_library` resource would always start the associated cluster if it's not running, so make sure to have auto-termination configured. It's not possible to atomically change the version of the same library without cluster restart. Libraries are fully removed from the cluster only after restart.
 
+## Plugin Framework Migration
+The library resource has been migrated from sdkv2 to plugin framework。 If you encounter any problem with this resource and suspect it is due to the migration, you can fallback to sdkv2 by setting the environment variable in the following way `export USE_SDK_V2_RESOURCES="databricks_library"`.
+
 ## Installing library on all clusters
 
 You can install libraries on all clusters with the help of [databricks_clusters](../data-sources/clusters.md) data resource:

diff --git a/docs/resources/quality_monitor.md b/docs/resources/quality_monitor.md
@@ -7,6 +7,9 @@ This resource allows you to manage [Lakehouse Monitors](https://docs.databricks.
 
 A `databricks_quality_monitor` is attached to a [databricks_sql_table](sql_table.md) and can be of type timeseries, snapshot or inference. 
 
+## Plugin Framework Migration
+The quality monitor resource has been migrated from sdkv2 to plugin framework。 If you encounter any problem with this resource and suspect it is due to the migration, you can fallback to sdkv2 by setting the environment variable in the following way `export USE_SDK_V2_RESOURCES="databricks_quality_monitor"`.
+
 ## Example Usage
 
 ```hcl

diff --git a/exporter/command.go b/exporter/command.go
@@ -131,6 +131,8 @@ func Run(args ...string) error {
 	flags.BoolVar(&ic.mounts, "mounts", false, "List DBFS mount points.")
 	flags.BoolVar(&ic.generateDeclaration, "generateProviderDeclaration", true,
 		"Generate Databricks provider declaration.")
+	flags.BoolVar(&ic.filterDirectoriesDuringWorkspaceWalking, "filterDirectoriesDuringWorkspaceWalking", false,
+		"Apply filtering to directory names during workspace walking")
 	flags.StringVar(&ic.notebooksFormat, "notebooksFormat", "SOURCE",
 		"Format to export notebooks: SOURCE, DBC, JUPYTER. Default: SOURCE")
 	services, listing := ic.allServicesAndListing()
@@ -145,6 +147,12 @@ func Run(args ...string) error {
 	flags.StringVar(&ic.match, "match", "", "Match resource names during listing operation. "+
 		"This filter applies to all resources that are getting listed, so if you want to import "+
 		"all dependencies of just one cluster, specify -listing=compute")
+	flags.StringVar(&ic.matchRegexStr, "matchRegex", "", "Match resource names during listing operation against a regex. "+
+		"This filter applies to all resources that are getting listed, so if you want to import "+
+		"all dependencies of just one cluster, specify -listing=compute")
+	flags.StringVar(&ic.excludeRegexStr, "excludeRegex", "", "Exclude resource names matching regex during listing operation. "+
+		"This filter applies to all resources that are getting listed, so if you want to import "+
+		"all dependencies of just one cluster, specify -listing=compute")
 	prefix := ""
 	flags.StringVar(&prefix, "prefix", "", "Prefix that will be added to the name of all exported resources")
 	newArgs := args

diff --git a/exporter/context.go b/exporter/context.go
@@ -78,28 +78,33 @@ type importContext struct {
 	Scope importedResources
 
 	// command-line resources (immutable, or set by the single thread)
-	includeUserDomains       bool
-	importAllUsers           bool
-	exportDeletedUsersAssets bool
-	incremental              bool
-	mounts                   bool
-	noFormat                 bool
-	nativeImportSupported    bool
-	services                 map[string]struct{}
-	listing                  map[string]struct{}
-	match                    string
-	lastActiveDays           int64
-	lastActiveMs             int64
-	generateDeclaration      bool
-	exportSecrets            bool
-	meAdmin                  bool
-	meUserName               string
-	prefix                   string
-	accountLevel             bool
-	shImports                map[string]bool
-	notebooksFormat          string
-	updatedSinceStr          string
-	updatedSinceMs           int64
+	includeUserDomains                      bool
+	importAllUsers                          bool
+	exportDeletedUsersAssets                bool
+	incremental                             bool
+	mounts                                  bool
+	noFormat                                bool
+	nativeImportSupported                   bool
+	services                                map[string]struct{}
+	listing                                 map[string]struct{}
+	match                                   string
+	matchRegexStr                           string
+	matchRegex                              *regexp.Regexp
+	excludeRegexStr                         string
+	excludeRegex                            *regexp.Regexp
+	filterDirectoriesDuringWorkspaceWalking bool
+	lastActiveDays                          int64
+	lastActiveMs                            int64
+	generateDeclaration                     bool
+	exportSecrets                           bool
+	meAdmin                                 bool
+	meUserName                              string
+	prefix                                  string
+	accountLevel                            bool
+	shImports                               map[string]bool
+	notebooksFormat                         string
+	updatedSinceStr                         string
+	updatedSinceMs                          int64
 
 	waitGroup *sync.WaitGroup
 
@@ -297,6 +302,24 @@ func (ic *importContext) Run() error {
 		return fmt.Errorf("no services to import")
 	}
 
+	if ic.matchRegexStr != "" {
+		log.Printf("[DEBUG] Using regex '%s' to filter resources", ic.matchRegexStr)
+		re, err := regexp.Compile(ic.matchRegexStr)
+		if err != nil {
+			log.Printf("[ERROR] can't compile regex '%s': %v", ic.matchRegexStr, err)
+			return err
+		}
+		ic.matchRegex = re
+	}
+	if ic.excludeRegexStr != "" {
+		log.Printf("[DEBUG] Using regex '%s' to filter resources", ic.excludeRegexStr)
+		re, err := regexp.Compile(ic.excludeRegexStr)
+		if err != nil {
+			log.Printf("[ERROR] can't compile regex '%s': %v", ic.excludeRegexStr, err)
+			return err
+		}
+		ic.excludeRegex = re
+	}
 	if ic.incremental {
 		if ic.updatedSinceStr == "" {
 			ic.updatedSinceStr = getLastRunString(statsFileName)