update

databricks · May 24, 2024 · 92578dd · 92578dd
1 parent 2717d46
commit 92578dd
Show file tree

Hide file tree

Showing 7 changed files with 626 additions and 230 deletions.
diff --git a/clusters/resource_cluster.go b/clusters/resource_cluster.go
@@ -24,7 +24,7 @@ var clusterSchemaVersion = 4
 
 const (
 	numWorkerErr                     = "NumWorkers could be 0 only for SingleNode clusters. See https://docs.databricks.com/clusters/single-node.html for more details"
-	unsupportedExceptCreateOrEditErr = "unsupported type %T, must be either %scompute.CreateCluster or %scompute.EditCluster. Please report this issue to the GitHub repo"
+	unsupportedExceptCreateOrEditErr = "unsupported type %T, must be one of %scompute.CreateCluster, %scompute.ClusterSpec or %scompute.EditCluster. Please report this issue to the GitHub repo"
 )
 
 func ResourceCluster() common.Resource {
@@ -127,8 +127,15 @@ func Validate(cluster any) error {
 		profile = c.SparkConf["spark.databricks.cluster.profile"]
 		master = c.SparkConf["spark.master"]
 		resourceClass = c.CustomTags["ResourceClass"]
+	case compute.ClusterSpec:
+		if c.NumWorkers > 0 || c.Autoscale != nil {
+			return nil
+		}
+		profile = c.SparkConf["spark.databricks.cluster.profile"]
+		master = c.SparkConf["spark.master"]
+		resourceClass = c.CustomTags["ResourceClass"]
 	default:
-		return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "", "")
+		return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "", "", "")
 	}
 	if profile == "singleNode" && strings.HasPrefix(master, "local") && resourceClass == "SingleNode" {
 		return nil
@@ -140,6 +147,34 @@ func Validate(cluster any) error {
 // Long term, ModifyRequestOnInstancePool() in clusters_api.go will be removed once all the resources using clusters are migrated to Go SDK.
 func ModifyRequestOnInstancePool(cluster any) error {
 	switch c := cluster.(type) {
+	case *compute.ClusterSpec:
+		// Instance profile id does not exist or not set
+		if c.InstancePoolId == "" {
+			// Worker must use an instance pool if driver uses an instance pool,
+			// therefore empty the computed value for driver instance pool.
+			c.DriverInstancePoolId = ""
+			return nil
+		}
+		if c.AwsAttributes != nil {
+			// Reset AwsAttributes
+			awsAttributes := compute.AwsAttributes{
+				InstanceProfileArn: c.AwsAttributes.InstanceProfileArn,
+			}
+			c.AwsAttributes = &awsAttributes
+		}
+		if c.AzureAttributes != nil {
+			c.AzureAttributes = &compute.AzureAttributes{}
+		}
+		if c.GcpAttributes != nil {
+			gcpAttributes := compute.GcpAttributes{
+				GoogleServiceAccount: c.GcpAttributes.GoogleServiceAccount,
+			}
+			c.GcpAttributes = &gcpAttributes
+		}
+		c.EnableElasticDisk = false
+		c.NodeTypeId = ""
+		c.DriverNodeTypeId = ""
+		return nil
 	case *compute.CreateCluster:
 		// Instance profile id does not exist or not set
 		if c.InstancePoolId == "" {
@@ -197,20 +232,35 @@ func ModifyRequestOnInstancePool(cluster any) error {
 		c.DriverNodeTypeId = ""
 		return nil
 	default:
-		return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "*", "*")
+		return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "*", "*", "*")
 	}
 }
 
 // This method is a duplicate of FixInstancePoolChangeIfAny(d *schema.ResourceData) in clusters/clusters_api.go that uses Go SDK.
 // Long term, FixInstancePoolChangeIfAny(d *schema.ResourceData) in clusters_api.go will be removed once all the resources using clusters are migrated to Go SDK.
 // https://github.com/databricks/terraform-provider-databricks/issues/824
-func FixInstancePoolChangeIfAny(d *schema.ResourceData, cluster *compute.EditCluster) {
-	oldInstancePool, newInstancePool := d.GetChange("instance_pool_id")
-	oldDriverPool, newDriverPool := d.GetChange("driver_instance_pool_id")
-	if oldInstancePool != newInstancePool &&
-		oldDriverPool == oldInstancePool &&
-		oldDriverPool == newDriverPool {
-		cluster.DriverInstancePoolId = cluster.InstancePoolId
+func FixInstancePoolChangeIfAny(d *schema.ResourceData, cluster any) error {
+	switch c := cluster.(type) {
+	case *compute.ClusterSpec:
+		oldInstancePool, newInstancePool := d.GetChange("instance_pool_id")
+		oldDriverPool, newDriverPool := d.GetChange("driver_instance_pool_id")
+		if oldInstancePool != newInstancePool &&
+			oldDriverPool == oldInstancePool &&
+			oldDriverPool == newDriverPool {
+			c.DriverInstancePoolId = c.InstancePoolId
+		}
+		return nil
+	case *compute.EditCluster:
+		oldInstancePool, newInstancePool := d.GetChange("instance_pool_id")
+		oldDriverPool, newDriverPool := d.GetChange("driver_instance_pool_id")
+		if oldInstancePool != newInstancePool &&
+			oldDriverPool == oldInstancePool &&
+			oldDriverPool == newDriverPool {
+			c.DriverInstancePoolId = c.InstancePoolId
+		}
+		return nil
+	default:
+		return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "*", "*", "*")
 	}
 }
 
@@ -220,7 +270,6 @@ type ClusterSpec struct {
 }
 
 func (ClusterSpec) CustomizeSchemaResourceSpecific(s *common.CustomizableSchema) *common.CustomizableSchema {
-	s.SchemaPath("autotermination_minutes").SetDefault(60)
 	s.AddNewField("default_tags", &schema.Schema{
 		Type:     schema.TypeMap,
 		Computed: true,
@@ -244,6 +293,11 @@ func (ClusterSpec) CustomizeSchemaResourceSpecific(s *common.CustomizableSchema)
 		Type:     schema.TypeString,
 		Computed: true,
 	})
+	s.AddNewField("autotermination_minutes", &schema.Schema{
+		Type:     schema.TypeInt,
+		Optional: true,
+		Default:  60,
+	})
 	return s
 }
 
@@ -300,6 +354,8 @@ func (ClusterSpec) CustomizeSchema(s *common.CustomizableSchema) *common.Customi
 			Schema: common.StructToSchema(MountInfo{}, nil),
 		},
 	})
+	// Adding it back in the resource specific customization function because it is not relevant for other resources.
+	s.RemoveField("autotermination_minutes")
 
 	return s
 }
@@ -453,7 +509,10 @@ func resourceClusterUpdate(ctx context.Context, d *schema.ResourceData, c *commo
 		if err = ModifyRequestOnInstancePool(&cluster); err != nil {
 			return err
 		}
-		FixInstancePoolChangeIfAny(d, &cluster)
+		err = FixInstancePoolChangeIfAny(d, &cluster)
+		if err != nil {
+			return err
+		}
 
 		// We can only call the resize api if the cluster is in the running state
 		// and only the cluster size (ie num_workers OR autoscale) is being changed

diff --git a/common/reflect_resource.go b/common/reflect_resource.go
@@ -617,7 +617,7 @@ func iterFields(rv reflect.Value, path []string, s map[string]*schema.Schema, al
 			return fmt.Errorf("inconsistency: %s has omitempty, but is not optional", fieldName)
 		}
 		defaultEmpty := reflect.ValueOf(fieldSchema.Default).Kind() == reflect.Invalid
-		if fieldSchema.Optional && defaultEmpty && !omitEmpty {
+		if !isGoSDK && fieldSchema.Optional && defaultEmpty && !omitEmpty {
 			return fmt.Errorf("inconsistency: %s is optional, default is empty, but has no omitempty", fieldName)
 		}
 		err := cb(fieldSchema, append(path, fieldName), field)

diff --git a/docs/resources/job.md b/docs/resources/job.md
@@ -127,7 +127,7 @@ This block describes individual tasks:
   * `sql_task`
 * `library` - (Optional) (Set) An optional list of libraries to be installed on the cluster that will execute the job.
 * `depends_on` - (Optional) block specifying dependency(-ies) for a given task.
-* `job_cluster_key` - (Optional) Identifier of the Job cluster specified in the `job_cluster` block.
+* `job_cluster_key` - (Required) Identifier of the Job cluster specified in the `job_cluster` block.
 * `existing_cluster_id` - (Optional) Identifier of the [interactive cluster](cluster.md) to run job on.  *Note: running tasks on interactive clusters may lead to increased costs!*
 * `new_cluster` - (Optional) Task will run on a dedicated cluster.  See [databricks_cluster](cluster.md) documentation for specification.
 * `run_if` - (Optional) An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. One of `ALL_SUCCESS`, `AT_LEAST_ONE_SUCCESS`, `NONE_FAILED`, `ALL_DONE`, `AT_LEAST_ONE_FAILED` or `ALL_FAILED`. When omitted, defaults to `ALL_SUCCESS`.
@@ -426,9 +426,9 @@ The following parameter is only available on task level.
 This block describes health conditions for a given job or an individual task. It consists of the following attributes:
 
 * `rules` - (List) list of rules that are represented as objects with the following attributes:
-  * `metric` - (Optional) string specifying the metric to check.  The only supported metric is `RUN_DURATION_SECONDS` (check [Jobs REST API documentation](https://docs.databricks.com/api/workspace/jobs/create) for the latest information).
-  * `op` - (Optional) string specifying the operation used to evaluate the given metric. The only supported operation is `GREATER_THAN`.
-  * `value` - (Optional) integer value used to compare to the given metric.
+  * `metric` - (Required) string specifying the metric to check.  The only supported metric is `RUN_DURATION_SECONDS` (check [Jobs REST API documentation](https://docs.databricks.com/api/workspace/jobs/create) for the latest information).
+  * `op` - (Required) string specifying the operation used to evaluate the given metric. The only supported operation is `GREATER_THAN`.
+  * `value` - (Required) integer value used to compare to the given metric.
 
 ### tags Configuration Map
 
@@ -468,8 +468,8 @@ This syntax uses Jobs API 2.0 to create a job with a single task. Only a subset
 
 The job cluster is specified using either of the below argument:
 
-* `new_cluster` - (Optional) Same set of parameters as for [databricks_cluster](cluster.md) resource.
-* `existing_cluster_id` - (Optional) If existing_cluster_id, the ID of an existing [cluster](cluster.md) that will be used for all runs of this job. When running jobs on an existing cluster, you may need to manually restart the cluster if it stops responding. We strongly suggest to use `new_cluster` for greater reliability.
+* `new_cluster` - (Required) Same set of parameters as for [databricks_cluster](cluster.md) resource.
+* `job_cluster_key` - (Required) A unique name for the job cluster. This field must be unique within the job. JobTaskSettings may refer to this field to determine which cluster to launch for the task execution.
 
 ```hcl
 data "databricks_current_user" "me" {}