Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
edwardfeng-db committed May 24, 2024
1 parent 2717d46 commit 92578dd
Show file tree
Hide file tree
Showing 7 changed files with 626 additions and 230 deletions.
83 changes: 71 additions & 12 deletions clusters/resource_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var clusterSchemaVersion = 4

const (
numWorkerErr = "NumWorkers could be 0 only for SingleNode clusters. See https://docs.databricks.com/clusters/single-node.html for more details"
unsupportedExceptCreateOrEditErr = "unsupported type %T, must be either %scompute.CreateCluster or %scompute.EditCluster. Please report this issue to the GitHub repo"
unsupportedExceptCreateOrEditErr = "unsupported type %T, must be one of %scompute.CreateCluster, %scompute.ClusterSpec or %scompute.EditCluster. Please report this issue to the GitHub repo"
)

func ResourceCluster() common.Resource {
Expand Down Expand Up @@ -127,8 +127,15 @@ func Validate(cluster any) error {
profile = c.SparkConf["spark.databricks.cluster.profile"]
master = c.SparkConf["spark.master"]
resourceClass = c.CustomTags["ResourceClass"]
case compute.ClusterSpec:
if c.NumWorkers > 0 || c.Autoscale != nil {
return nil
}
profile = c.SparkConf["spark.databricks.cluster.profile"]
master = c.SparkConf["spark.master"]
resourceClass = c.CustomTags["ResourceClass"]
default:
return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "", "")
return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "", "", "")
}
if profile == "singleNode" && strings.HasPrefix(master, "local") && resourceClass == "SingleNode" {
return nil
Expand All @@ -140,6 +147,34 @@ func Validate(cluster any) error {
// Long term, ModifyRequestOnInstancePool() in clusters_api.go will be removed once all the resources using clusters are migrated to Go SDK.
func ModifyRequestOnInstancePool(cluster any) error {
switch c := cluster.(type) {
case *compute.ClusterSpec:
// Instance profile id does not exist or not set
if c.InstancePoolId == "" {
// Worker must use an instance pool if driver uses an instance pool,
// therefore empty the computed value for driver instance pool.
c.DriverInstancePoolId = ""
return nil
}
if c.AwsAttributes != nil {
// Reset AwsAttributes
awsAttributes := compute.AwsAttributes{
InstanceProfileArn: c.AwsAttributes.InstanceProfileArn,
}
c.AwsAttributes = &awsAttributes
}
if c.AzureAttributes != nil {
c.AzureAttributes = &compute.AzureAttributes{}
}
if c.GcpAttributes != nil {
gcpAttributes := compute.GcpAttributes{
GoogleServiceAccount: c.GcpAttributes.GoogleServiceAccount,
}
c.GcpAttributes = &gcpAttributes
}
c.EnableElasticDisk = false
c.NodeTypeId = ""
c.DriverNodeTypeId = ""
return nil
case *compute.CreateCluster:
// Instance profile id does not exist or not set
if c.InstancePoolId == "" {
Expand Down Expand Up @@ -197,20 +232,35 @@ func ModifyRequestOnInstancePool(cluster any) error {
c.DriverNodeTypeId = ""
return nil
default:
return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "*", "*")
return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "*", "*", "*")
}
}

// This method is a duplicate of FixInstancePoolChangeIfAny(d *schema.ResourceData) in clusters/clusters_api.go that uses Go SDK.
// Long term, FixInstancePoolChangeIfAny(d *schema.ResourceData) in clusters_api.go will be removed once all the resources using clusters are migrated to Go SDK.
// https://github.com/databricks/terraform-provider-databricks/issues/824
func FixInstancePoolChangeIfAny(d *schema.ResourceData, cluster *compute.EditCluster) {
oldInstancePool, newInstancePool := d.GetChange("instance_pool_id")
oldDriverPool, newDriverPool := d.GetChange("driver_instance_pool_id")
if oldInstancePool != newInstancePool &&
oldDriverPool == oldInstancePool &&
oldDriverPool == newDriverPool {
cluster.DriverInstancePoolId = cluster.InstancePoolId
func FixInstancePoolChangeIfAny(d *schema.ResourceData, cluster any) error {
switch c := cluster.(type) {
case *compute.ClusterSpec:
oldInstancePool, newInstancePool := d.GetChange("instance_pool_id")
oldDriverPool, newDriverPool := d.GetChange("driver_instance_pool_id")
if oldInstancePool != newInstancePool &&
oldDriverPool == oldInstancePool &&
oldDriverPool == newDriverPool {
c.DriverInstancePoolId = c.InstancePoolId
}
return nil
case *compute.EditCluster:
oldInstancePool, newInstancePool := d.GetChange("instance_pool_id")
oldDriverPool, newDriverPool := d.GetChange("driver_instance_pool_id")
if oldInstancePool != newInstancePool &&
oldDriverPool == oldInstancePool &&
oldDriverPool == newDriverPool {
c.DriverInstancePoolId = c.InstancePoolId
}
return nil
default:
return fmt.Errorf(unsupportedExceptCreateOrEditErr, cluster, "*", "*", "*")
}
}

Expand All @@ -220,7 +270,6 @@ type ClusterSpec struct {
}

func (ClusterSpec) CustomizeSchemaResourceSpecific(s *common.CustomizableSchema) *common.CustomizableSchema {
s.SchemaPath("autotermination_minutes").SetDefault(60)
s.AddNewField("default_tags", &schema.Schema{
Type: schema.TypeMap,
Computed: true,
Expand All @@ -244,6 +293,11 @@ func (ClusterSpec) CustomizeSchemaResourceSpecific(s *common.CustomizableSchema)
Type: schema.TypeString,
Computed: true,
})
s.AddNewField("autotermination_minutes", &schema.Schema{
Type: schema.TypeInt,
Optional: true,
Default: 60,
})
return s
}

Expand Down Expand Up @@ -300,6 +354,8 @@ func (ClusterSpec) CustomizeSchema(s *common.CustomizableSchema) *common.Customi
Schema: common.StructToSchema(MountInfo{}, nil),
},
})
// Adding it back in the resource specific customization function because it is not relevant for other resources.
s.RemoveField("autotermination_minutes")

return s
}
Expand Down Expand Up @@ -453,7 +509,10 @@ func resourceClusterUpdate(ctx context.Context, d *schema.ResourceData, c *commo
if err = ModifyRequestOnInstancePool(&cluster); err != nil {
return err
}
FixInstancePoolChangeIfAny(d, &cluster)
err = FixInstancePoolChangeIfAny(d, &cluster)
if err != nil {
return err
}

// We can only call the resize api if the cluster is in the running state
// and only the cluster size (ie num_workers OR autoscale) is being changed
Expand Down
2 changes: 1 addition & 1 deletion common/reflect_resource.go
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ func iterFields(rv reflect.Value, path []string, s map[string]*schema.Schema, al
return fmt.Errorf("inconsistency: %s has omitempty, but is not optional", fieldName)
}
defaultEmpty := reflect.ValueOf(fieldSchema.Default).Kind() == reflect.Invalid
if fieldSchema.Optional && defaultEmpty && !omitEmpty {
if !isGoSDK && fieldSchema.Optional && defaultEmpty && !omitEmpty {
return fmt.Errorf("inconsistency: %s is optional, default is empty, but has no omitempty", fieldName)
}
err := cb(fieldSchema, append(path, fieldName), field)
Expand Down
12 changes: 6 additions & 6 deletions docs/resources/job.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ This block describes individual tasks:
* `sql_task`
* `library` - (Optional) (Set) An optional list of libraries to be installed on the cluster that will execute the job.
* `depends_on` - (Optional) block specifying dependency(-ies) for a given task.
* `job_cluster_key` - (Optional) Identifier of the Job cluster specified in the `job_cluster` block.
* `job_cluster_key` - (Required) Identifier of the Job cluster specified in the `job_cluster` block.
* `existing_cluster_id` - (Optional) Identifier of the [interactive cluster](cluster.md) to run job on. *Note: running tasks on interactive clusters may lead to increased costs!*
* `new_cluster` - (Optional) Task will run on a dedicated cluster. See [databricks_cluster](cluster.md) documentation for specification.
* `run_if` - (Optional) An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. One of `ALL_SUCCESS`, `AT_LEAST_ONE_SUCCESS`, `NONE_FAILED`, `ALL_DONE`, `AT_LEAST_ONE_FAILED` or `ALL_FAILED`. When omitted, defaults to `ALL_SUCCESS`.
Expand Down Expand Up @@ -426,9 +426,9 @@ The following parameter is only available on task level.
This block describes health conditions for a given job or an individual task. It consists of the following attributes:

* `rules` - (List) list of rules that are represented as objects with the following attributes:
* `metric` - (Optional) string specifying the metric to check. The only supported metric is `RUN_DURATION_SECONDS` (check [Jobs REST API documentation](https://docs.databricks.com/api/workspace/jobs/create) for the latest information).
* `op` - (Optional) string specifying the operation used to evaluate the given metric. The only supported operation is `GREATER_THAN`.
* `value` - (Optional) integer value used to compare to the given metric.
* `metric` - (Required) string specifying the metric to check. The only supported metric is `RUN_DURATION_SECONDS` (check [Jobs REST API documentation](https://docs.databricks.com/api/workspace/jobs/create) for the latest information).
* `op` - (Required) string specifying the operation used to evaluate the given metric. The only supported operation is `GREATER_THAN`.
* `value` - (Required) integer value used to compare to the given metric.

### tags Configuration Map

Expand Down Expand Up @@ -468,8 +468,8 @@ This syntax uses Jobs API 2.0 to create a job with a single task. Only a subset

The job cluster is specified using either of the below argument:

* `new_cluster` - (Optional) Same set of parameters as for [databricks_cluster](cluster.md) resource.
* `existing_cluster_id` - (Optional) If existing_cluster_id, the ID of an existing [cluster](cluster.md) that will be used for all runs of this job. When running jobs on an existing cluster, you may need to manually restart the cluster if it stops responding. We strongly suggest to use `new_cluster` for greater reliability.
* `new_cluster` - (Required) Same set of parameters as for [databricks_cluster](cluster.md) resource.
* `job_cluster_key` - (Required) A unique name for the job cluster. This field must be unique within the job. JobTaskSettings may refer to this field to determine which cluster to launch for the task execution.

```hcl
data "databricks_current_user" "me" {}
Expand Down
Loading

0 comments on commit 92578dd

Please sign in to comment.