databricks · alexott · Mar 23, 2024 · Feb 15, 2024 · Feb 15, 2024 · Mar 11, 2024
@@ -19,62 +19,62 @@ resource "databricks_catalog" "sandbox" {
 }
 
 resource "databricks_schema" "things" {
-    catalog_name = databricks_catalog.sandbox.id
-    name         = "things"
-    comment      = "this database is managed by terraform"
-    properties = {
-      kind = "various"
-    }
+  catalog_name = databricks_catalog.sandbox.id
+  name         = "things"
+  comment      = "this database is managed by terraform"
+  properties = {
+    kind = "various"
   }
-
+}
+
 resource "databricks_sql_table" "myTestTable" {
-    catalog_name = "main"
-    schema_name = databricks_schema.things.name
-    name = "bar"
-    table_type = "MANAGED"
-    data_source_format = "DELTA"
-
-    column {
-        name      = "timestamp"
-        position  = 1
-        type = "int"
-    }
+  catalog_name       = "main"
+  schema_name        = databricks_schema.things.name
+  name               = "bar"
+  table_type         = "MANAGED"
+  data_source_format = "DELTA"
+
+  column {
+    name     = "timestamp"
+    position = 1
+    type     = "int"
+  }
 }
 
 resource "databricks_lakehouse_monitor" "testTimeseriesMonitor" {
-    table_name = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}.${databricks_sql_table.myTestTable.name}"
-    assets_dir = "/Shared/provider-test/databricks_lakehouse_monitoring/${databricks_sql_table.myTestTable.name}"
-    output_schema_name = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}"
-    time_series  {
-        granularities = ["1 hour"]
-        timestamp_col = "timestamp"
-    }
+  table_name         = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}.${databricks_sql_table.myTestTable.name}"
+  assets_dir         = "/Shared/provider-test/databricks_lakehouse_monitoring/${databricks_sql_table.myTestTable.name}"
+  output_schema_name = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}"
+  time_series {
+    granularities = ["1 hour"]
+    timestamp_col = "timestamp"
+  }
 }
 ```
 
 ### Inference Monitor
 
 ```hcl
 resource "databricks_lakehouse_monitor" "testMonitorInference" {
-    table_name = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}.${databricks_table.myTestTable.name}"
-    assets_dir = "/Shared/provider-test/databricks_lakehouse_monitoring/${databricks_table.myTestTable.name}"
-    output_schema_name = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}"
-    inference_log  {
-        granularities = ["1 hour"]
-        timestamp_col = "timestamp"
-        prediction_col = "prediction"
-        model_id_col = "model_id"
-        problem_type = "PROBLEM_TYPE_REGRESSION"
-    } 
+  table_name         = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}.${databricks_table.myTestTable.name}"
+  assets_dir         = "/Shared/provider-test/databricks_lakehouse_monitoring/${databricks_table.myTestTable.name}"
+  output_schema_name = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}"
+  inference_log {
+    granularities  = ["1 hour"]
+    timestamp_col  = "timestamp"
+    prediction_col = "prediction"
+    model_id_col   = "model_id"
+    problem_type   = "PROBLEM_TYPE_REGRESSION"
+  }
 }
 ```
 ### Snapshot Monitor
 ```hcl
 resource "databricks_lakehouse_monitor" "testMonitorInference" {
-    table_name = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}.${databricks_table.myTestTable.name}"
-    assets_dir = "/Shared/provider-test/databricks_lakehouse_monitoring/${databricks_table.myTestTable.name}"
-    output_schema_name = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}"
-    snapshot  {} 
+  table_name         = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}.${databricks_table.myTestTable.name}"
+  assets_dir         = "/Shared/provider-test/databricks_lakehouse_monitoring/${databricks_table.myTestTable.name}"
+  output_schema_name = "${databricks_catalog.sandbox.name}.${databricks_schema.things.name}"
+  snapshot {}
 }
 ```
 
@@ -129,4 +129,4 @@ The following resources are often used in the same context:
 
 * [databricks_catalog](catalog.md)
 * [databricks_schema](schema.md)
-* [databricks_sql_table](sql_table.md)
+* [databricks_sql_table](sql_table.md)
@@ -15,15 +15,15 @@ resource "databricks_model_serving" "this" {
   config {
     served_entities {
       name                  = "prod_model"
-      entity_name            = "ads-model"
-      entity_version         = "2"
+      entity_name           = "ads-model"
+      entity_version        = "2"
       workload_size         = "Small"
       scale_to_zero_enabled = true
     }
     served_entities {
       name                  = "candidate_model"
-      entity_name            = "ads-model"
-      entity_version         = "4"
+      entity_name           = "ads-model"
+      entity_version        = "4"
       workload_size         = "Small"
       scale_to_zero_enabled = false
     }

@@ -18,24 +18,24 @@ resource "databricks_vector_search_endpoint" "this" {
 
 ## Argument Reference
 
-The following arguments are supported:
+The following arguments are supported (change of any parameter leads to recreation of the resource):
 
-* `name` - (Required) Name of the Vector Search Endpoint to create.  If name is changed, Vector Search Endpoint is recreated.
-* `endpoint_type` (Required) type of Vector Search Endpoint.  Currently only accepting single value: `STANDARD` (See [documentation](https://docs.databricks.com/api/workspace/vectorsearchendpoints/createendpoint) for the list of currently supported values).  If it's changed, Vector Search Endpoint is recreated.
+* `name` - (Required) Name of the Vector Search Endpoint to create.
+* `endpoint_type` (Required) Type of Vector Search Endpoint.  Currently only accepting single value: `STANDARD` (See [documentation](https://docs.databricks.com/api/workspace/vectorsearchendpoints/createendpoint) for the list of currently supported values).
 
 ## Attribute Reference
 
-In addition to all arguments above, the following attributes are exported:
+In addition to all the arguments above, the following attributes are exported:
 
 * `id` - The same as the name of the endpoint.
 * `creator` - Creator of the endpoint.
 * `creation_timestamp` - Timestamp of endpoint creation (milliseconds).
 * `last_updated_user` - User who last updated the endpoint.
-* `last_updated_timestamp` - Timestamp of last update to the endpoint (milliseconds).
+* `last_updated_timestamp` - Timestamp of the last update to the endpoint (milliseconds).
 * `endpoint_id` - Unique internal identifier of the endpoint (UUID).
 * `num_indexes` - Number of indexes on the endpoint.
-* `endpoint_status` - Object describing the current status of the endpoint consisting of following fields:
-  * `state` - Current state of the endpoint. Currently following values are supported: `PROVISIONING`, `ONLINE`, `OFFLINE`.
+* `endpoint_status` - Object describing the current status of the endpoint consisting of the following fields:
+  * `state` - Current state of the endpoint. Currently following values are supported: `PROVISIONING`, `ONLINE`, and `OFFLINE`.
   * `message` - Additional status message.
 
 ## Import

@@ -0,0 +1,79 @@
+---
+subcategory: "Vector Search"
+---
+# databricks_vector_search_index Resource
+
+-> **Note** This resource could be only used on Unity Catalog-enabled workspace!
+
+This resource allows you to create [Vector Search Index](https://docs.databricks.com/en/generative-ai/create-query-vector-search.html) in Databricks.  Vector Search is a serverless similarity search engine that allows you to store a vector representation of your data, including metadata, in a vector database.  The Vector Search Index provides the ability to search data in the linked Delta Table.
+
+## Example Usage
+
+```hcl
+resource "databricks_vector_search_index" "sync" {
+  name          = "main.default.vector_search_index"
+  endpoint_name = databricks_vector_search_endpoint.this.name
+  primary_key   = "id"
+  index_type    = "DELTA_SYNC"
+  delta_sync_index_spec {
+    source_table  = "main.default.source_table"
+    pipeline_type = "TRIGGERED"
+    embedding_source_columns {
+      name                          = "text"
+      embedding_model_endpoint_name = databricks_model_serving.this.name
+    }
+  }
+}
+```
+
+## Argument Reference
+
+The following arguments are supported (change of any parameter leads to recreation of the resource):
+
+* `name` - (required) Three-level name of the Vector Search Index to create (`catalog.schema.index_name`).
+* `endpoint_name` - (required) The name of the Vector Search Endpoint that will be used for indexing the data.
+* `primary_key` - (required) The column name that will be used as a primary key.
+* `index_type` - (required) Vector Search index type. Currently supported values are:
+  * `DELTA_SYNC`: An index that automatically syncs with a source Delta Table, automatically and incrementally updating the index as the underlying data in the Delta Table changes.
+  * `DIRECT_ACCESS`: An index that supports the direct read and write of vectors and metadata through our REST and SDK APIs. With this model, the user manages index updates.
+* `delta_sync_index_spec` - (object) Specification for Delta Sync Index. Required if `index_type` is `DELTA_SYNC`.
+  * `source_table` (required) The name of the source table.
+  * `embedding_source_columns` - (required if `embedding_vector_columns` isn't provided) array of objects representing columns that contain the embedding source.  Each entry consists of:
+	* `name` - The name of the column
+	* `embedding_model_endpoint_name` - The name of the embedding model endpoint
+  * `embedding_vector_columns`  - (required if `embedding_source_columns` isn't provided)  array of objects representing columns that contain the embedding vectors. Each entry consists of:
+	* `name` - The name of the column.
+	* `embedding_dimension` - Dimension of the embedding vector.
+  * `pipeline_type` - Pipeline execution mode. Possible values are:
+	* `TRIGGERED`: If the pipeline uses the triggered execution mode, the system stops processing after successfully refreshing the source table in the pipeline once, ensuring the table is updated based on the data available when the update started.
+	* `CONTINUOUS`: If the pipeline uses continuous execution, the pipeline processes new data as it arrives in the source table to keep the vector index fresh.
+* `direct_access_index_spec` - (object) Specification for Direct Vector Access Index. Required if `index_type` is `DIRECT_ACCESS`.
+  * `schema_json` - The schema of the index in JSON format.  Check the [API documentation](https://docs.databricks.com/api/workspace/vectorsearchindexes/createindex#direct_access_index_spec-schema_json) for a list of supported data types.
+  * `embedding_source_columns` - (required if `embedding_vector_columns` isn't provided) array of objects representing columns that contain the embedding source.  Each entry consists of:
+	* `name` - The name of the column
+	* `embedding_model_endpoint_name` - The name of the embedding model endpoint
+  * `embedding_vector_columns`  - (required if `embedding_source_columns` isn't provided)  array of objects representing columns that contain the embedding vectors. Each entry consists of:
+	* `name` - The name of the column.
+	* `embedding_dimension` - Dimension of the embedding vector.
+
+## Attribute Reference
+
+In addition to all arguments above, the following attributes are exported:
+
+* `id` - The same as the name of the index.
+* `creator` - Creator of the endpoint.
+* `delta_sync_index_spec`:
+  * `pipeline_id` - ID of the associated Delta Live Table pipeline.
+* `status` - Object describing the current status of the index consisting of the following fields:
+  * `message` - Message associated with the index status
+  * `indexed_row_count` - Number of rows indexed
+  * `ready` - Whether the index is ready for search
+  * `index_url` - Index API Url to be used to perform operations on the index
+
+## Import
+
+The resource can be imported using the name of the Vector Search Index
+
+```bash
+terraform import databricks_vector_search_index.this <index-name>
+```
@@ -180,6 +180,7 @@ func DatabricksProvider() *schema.Provider {
 			"databricks_user_instance_profile":       aws.ResourceUserInstanceProfile().ToResource(),
 			"databricks_user_role":                   aws.ResourceUserRole().ToResource(),
 			"databricks_vector_search_endpoint":      vectorsearch.ResourceVectorSearchEndpoint().ToResource(),
+			"databricks_vector_search_index":         vectorsearch.ResourceVectorSearchIndex().ToResource(),
 			"databricks_volume":                      catalog.ResourceVolume().ToResource(),
 			"databricks_workspace_conf":              workspace.ResourceWorkspaceConf().ToResource(),
 			"databricks_workspace_file":              workspace.ResourceWorkspaceFile().ToResource(),

@@ -11,7 +11,7 @@ import (
 	"github.com/databricks/databricks-sdk-go/service/vectorsearch"
 )
 
-const defaultProvisionTimeout = 75 * time.Minute
+const defaultEndpointProvisionTimeout = 75 * time.Minute
 const deleteCallTimeout = 10 * time.Second
 
 func ResourceVectorSearchEndpoint() common.Resource {
@@ -86,7 +86,7 @@ func ResourceVectorSearchEndpoint() common.Resource {
 		Schema:         s,
 		SchemaVersion:  0,
 		Timeouts: &schema.ResourceTimeout{
-			Create: schema.DefaultTimeout(defaultProvisionTimeout),
+			Create: schema.DefaultTimeout(defaultEndpointProvisionTimeout),
 		},
 	}
 }
@@ -69,10 +69,10 @@ func TestVectorSearchEndpointRead(t *testing.T) {
 	})
 }
 
-func TestResourcePASDelete(t *testing.T) {
+func TestVectorSearchEndpointDelete(t *testing.T) {
 	qa.ResourceFixture{
-		MockWorkspaceClientFunc: func(a *mocks.MockWorkspaceClient) {
-			a.GetMockVectorSearchEndpointsAPI().EXPECT().DeleteEndpointByEndpointName(mock.Anything, "abc").Return(nil)
+		MockWorkspaceClientFunc: func(w *mocks.MockWorkspaceClient) {
+			w.GetMockVectorSearchEndpointsAPI().EXPECT().DeleteEndpointByEndpointName(mock.Anything, "abc").Return(nil)
 		},
 		Resource: ResourceVectorSearchEndpoint(),
 		Delete:   true,

@@ -0,0 +1,118 @@
+package vectorsearch
+
+import (
+	"context"
+	"errors"
+	"fmt"
+	"log"
+	"time"
+
+	"github.com/databricks/databricks-sdk-go"
+	"github.com/databricks/terraform-provider-databricks/common"
+	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/retry"
+	"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
+
+	"github.com/databricks/databricks-sdk-go/apierr"
+	"github.com/databricks/databricks-sdk-go/service/vectorsearch"
+)
+
+const defaultIndexProvisionTimeout = 15 * time.Minute
+
+func waitForVectorSearchIndexDeletion(w *databricks.WorkspaceClient, ctx context.Context, searchIndexName string) error {
+	return retry.RetryContext(ctx, defaultIndexProvisionTimeout, func() *retry.RetryError {
+		_, err := w.VectorSearchIndexes.GetIndexByIndexName(ctx, searchIndexName)
+		if err == nil {
+			return retry.RetryableError(fmt.Errorf("vector search index %s is still not deleted", searchIndexName))
+		}
+		if errors.Is(err, apierr.ErrResourceDoesNotExist) || errors.Is(err, apierr.ErrNotFound) {
+			return nil
+		}
+		return retry.NonRetryableError(fmt.Errorf("vector search index %w", err))
+	})
+}
+
+func waitForSearchIndexCreation(w *databricks.WorkspaceClient, ctx context.Context, searchIndexName string) error {
+	return retry.RetryContext(ctx, defaultIndexProvisionTimeout-deleteCallTimeout, func() *retry.RetryError {
+		index, err := w.VectorSearchIndexes.GetIndexByIndexName(ctx, searchIndexName)
+		if err != nil {
+			return retry.NonRetryableError(err)
+		}
+		if index.Status.Ready { // We really need to depend on the detailed status of the index, but it's not available in the API yet
+			return nil
+		}
+		return retry.RetryableError(fmt.Errorf("vector search index %s is still pending", searchIndexName))
+	})
+}
+
+func ResourceVectorSearchIndex() common.Resource {
+	s := common.StructToSchema(
+		vectorsearch.VectorIndex{},
+		func(s map[string]*schema.Schema) map[string]*schema.Schema {
+			common.MustSchemaPath(s, "delta_sync_index_spec", "embedding_vector_columns").MinItems = 1
+			exof := []string{"delta_sync_index_spec", "direct_access_index_spec"}
+			s["delta_sync_index_spec"].ExactlyOneOf = exof
+			s["direct_access_index_spec"].ExactlyOneOf = exof
+
+			common.CustomizeSchemaPath(s, "endpoint_name").SetRequired()
+			common.CustomizeSchemaPath(s, "primary_key").SetRequired()
+			common.CustomizeSchemaPath(s, "status").SetReadOnly()
+			common.CustomizeSchemaPath(s, "creator").SetReadOnly()
+			common.CustomizeSchemaPath(s, "name").SetRequired()
+			common.CustomizeSchemaPath(s, "index_type").SetRequired()
+			common.CustomizeSchemaPath(s, "delta_sync_index_spec", "pipeline_id").SetReadOnly()
+			return s
+		})
+
+	return common.Resource{
+		Create: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
+			w, err := c.WorkspaceClient()
+			if err != nil {
+				return err
+			}
+			var req vectorsearch.CreateVectorIndexRequest
+			common.DataToStructPointer(d, s, &req)
+			_, err = w.VectorSearchIndexes.CreateIndex(ctx, req)
+			if err != nil {
+				return err
+			}
+			err = waitForSearchIndexCreation(w, ctx, req.Name)
+			if err != nil {
+				nestedErr := w.VectorSearchIndexes.DeleteIndexByIndexName(ctx, req.Name)
+				if nestedErr != nil {
+					log.Printf("[ERROR] Error cleaning up search index: %s", nestedErr.Error())
+				}
+				return err
+			}
+			d.SetId(req.Name)
+			return nil
+		},
+		Read: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
+			w, err := c.WorkspaceClient()
+			if err != nil {
+				return err
+			}
+			index, err := w.VectorSearchIndexes.GetIndexByIndexName(ctx, d.Id())
+			if err != nil {
+				return err
+			}
+			return common.StructToData(*index, s, d)
+		},
+		Delete: func(ctx context.Context, d *schema.ResourceData, c *common.DatabricksClient) error {
+			w, err := c.WorkspaceClient()
+			if err != nil {
+				return err
+			}
+			err = w.VectorSearchIndexes.DeleteIndexByIndexName(ctx, d.Id())
+			if err != nil {
+				return err
+			}
+			return waitForVectorSearchIndexDeletion(w, ctx, d.Id())
+		},
+		StateUpgraders: []schema.StateUpgrader{},
+		Schema:         s,
+		SchemaVersion:  0,
+		Timeouts: &schema.ResourceTimeout{
+			Create: schema.DefaultTimeout(defaultIndexProvisionTimeout),
+		},
+	}
+}