From 47fdfa09294c993bc0e3e539ed46f5e5c0298447 Mon Sep 17 00:00:00 2001 From: Evgenii Kniazev Date: Mon, 12 Jan 2026 14:28:06 +0000 Subject: [PATCH 1/2] aitools: Add SDK documentation query tool for MCP server Add a new MCP tool `databricks_query_sdk_docs` that allows LLM agents to search Databricks SDK documentation for methods, types, and examples. This addresses the problem where LLMs struggle with the Databricks SDK because they lack indexed documentation. Instead of guessing API calls, agents can now query for proper method signatures, parameters, and usage. Features: - Fuzzy/keyword search across services, methods, types, and enums - Category and service filtering - Score-based result ranking - LLM-friendly markdown output with signatures and examples Implementation: - New sdkdocs provider with embedded JSON documentation index - Index generator tool that parses annotations_openapi.yml - Generated index includes 7 core services, 277 types, and 3 enums - Full unit test coverage for search and index loading Co-Authored-By: Claude Opus 4.5 --- experimental/aitools/lib/prompts/flow.tmpl | 1 + .../aitools/lib/providers/sdkdocs/index.go | 127 + .../lib/providers/sdkdocs/index_test.go | 94 + .../aitools/lib/providers/sdkdocs/provider.go | 82 + .../lib/providers/sdkdocs/query_sdk_docs.go | 210 + .../lib/providers/sdkdocs/sdk_docs_index.json | 8163 +++++++++++++++++ .../aitools/lib/providers/sdkdocs/search.go | 265 + .../lib/providers/sdkdocs/search_test.go | 274 + experimental/aitools/lib/server/server.go | 22 + tools/gen_sdk_docs_index.go | 664 ++ 10 files changed, 9902 insertions(+) create mode 100644 experimental/aitools/lib/providers/sdkdocs/index.go create mode 100644 experimental/aitools/lib/providers/sdkdocs/index_test.go create mode 100644 experimental/aitools/lib/providers/sdkdocs/provider.go create mode 100644 experimental/aitools/lib/providers/sdkdocs/query_sdk_docs.go create mode 100644 experimental/aitools/lib/providers/sdkdocs/sdk_docs_index.json create mode 100644 experimental/aitools/lib/providers/sdkdocs/search.go create mode 100644 experimental/aitools/lib/providers/sdkdocs/search_test.go create mode 100644 tools/gen_sdk_docs_index.go diff --git a/experimental/aitools/lib/prompts/flow.tmpl b/experimental/aitools/lib/prompts/flow.tmpl index 3a71c91e02..1bd21bdcd4 100644 --- a/experimental/aitools/lib/prompts/flow.tmpl +++ b/experimental/aitools/lib/prompts/flow.tmpl @@ -9,6 +9,7 @@ - **databricks_discover**: MUST call first - returns scaffolding commands - **invoke_databricks_cli**: Execute CLI commands including init-template for scaffolding - **databricks_configure_auth**: Switch workspace profile/host +- **databricks_query_sdk_docs**: Search SDK documentation for methods, types, and examples ## Critical Workflow Rules 1. ALWAYS call databricks_discover FIRST to get scaffolding guidance diff --git a/experimental/aitools/lib/providers/sdkdocs/index.go b/experimental/aitools/lib/providers/sdkdocs/index.go new file mode 100644 index 0000000000..808f37f155 --- /dev/null +++ b/experimental/aitools/lib/providers/sdkdocs/index.go @@ -0,0 +1,127 @@ +package sdkdocs + +import ( + "embed" + "encoding/json" + "fmt" +) + +//go:embed sdk_docs_index.json +var indexFS embed.FS + +// SDKDocsIndex represents the complete SDK documentation index. +type SDKDocsIndex struct { + Version string `json:"version"` + GeneratedAt string `json:"generated_at"` + Services map[string]*ServiceDoc `json:"services"` + Types map[string]*TypeDoc `json:"types"` + Enums map[string]*EnumDoc `json:"enums"` +} + +// ServiceDoc represents documentation for an API service. +type ServiceDoc struct { + Name string `json:"name"` + Description string `json:"description"` + Package string `json:"package"` + Methods map[string]*MethodDoc `json:"methods"` +} + +// MethodDoc represents documentation for an API method. +type MethodDoc struct { + Name string `json:"name"` + Description string `json:"description"` + Signature string `json:"signature"` + Parameters []ParamDoc `json:"parameters"` + Returns *ReturnDoc `json:"returns,omitempty"` + Example string `json:"example,omitempty"` + HTTPMethod string `json:"http_method,omitempty"` + HTTPPath string `json:"http_path,omitempty"` +} + +// ParamDoc represents documentation for a method parameter. +type ParamDoc struct { + Name string `json:"name"` + Type string `json:"type"` + Description string `json:"description"` + Required bool `json:"required"` +} + +// ReturnDoc represents documentation for a method return type. +type ReturnDoc struct { + Type string `json:"type"` + Description string `json:"description"` +} + +// TypeDoc represents documentation for a data type. +type TypeDoc struct { + Name string `json:"name"` + Package string `json:"package"` + Description string `json:"description"` + Fields map[string]*FieldDoc `json:"fields"` +} + +// FieldDoc represents documentation for a struct field. +type FieldDoc struct { + Name string `json:"name"` + Type string `json:"type"` + Description string `json:"description"` + Required bool `json:"required"` + OutputOnly bool `json:"output_only,omitempty"` + Deprecated bool `json:"deprecated,omitempty"` +} + +// EnumDoc represents documentation for an enum type. +type EnumDoc struct { + Name string `json:"name"` + Package string `json:"package"` + Description string `json:"description"` + Values []string `json:"values"` +} + +// LoadIndex loads the embedded SDK documentation index. +func LoadIndex() (*SDKDocsIndex, error) { + data, err := indexFS.ReadFile("sdk_docs_index.json") + if err != nil { + return nil, fmt.Errorf("failed to read embedded SDK docs index: %w", err) + } + + var index SDKDocsIndex + if err := json.Unmarshal(data, &index); err != nil { + return nil, fmt.Errorf("failed to parse SDK docs index: %w", err) + } + + return &index, nil +} + +// GetMethod retrieves a method by its path (e.g., "jobs.Create"). +func (idx *SDKDocsIndex) GetMethod(serviceName, methodName string) *MethodDoc { + service, ok := idx.Services[serviceName] + if !ok { + return nil + } + return service.Methods[methodName] +} + +// GetType retrieves a type by its full path (e.g., "jobs.CreateJob"). +func (idx *SDKDocsIndex) GetType(typePath string) *TypeDoc { + return idx.Types[typePath] +} + +// GetEnum retrieves an enum by its full path. +func (idx *SDKDocsIndex) GetEnum(enumPath string) *EnumDoc { + return idx.Enums[enumPath] +} + +// GetService retrieves a service by name. +func (idx *SDKDocsIndex) GetService(serviceName string) *ServiceDoc { + return idx.Services[serviceName] +} + +// ListServices returns all service names. +func (idx *SDKDocsIndex) ListServices() []string { + names := make([]string, 0, len(idx.Services)) + for name := range idx.Services { + names = append(names, name) + } + return names +} diff --git a/experimental/aitools/lib/providers/sdkdocs/index_test.go b/experimental/aitools/lib/providers/sdkdocs/index_test.go new file mode 100644 index 0000000000..54b5f5912c --- /dev/null +++ b/experimental/aitools/lib/providers/sdkdocs/index_test.go @@ -0,0 +1,94 @@ +package sdkdocs + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLoadIndex(t *testing.T) { + index, err := LoadIndex() + require.NoError(t, err) + require.NotNil(t, index) + + // Verify the index has expected structure + assert.NotEmpty(t, index.Version) + assert.NotEmpty(t, index.GeneratedAt) + assert.NotEmpty(t, index.Services) + + // Check that jobs service exists and has methods + jobsService := index.GetService("jobs") + require.NotNil(t, jobsService, "jobs service should exist") + assert.Equal(t, "Jobs", jobsService.Name) + assert.NotEmpty(t, jobsService.Methods) + + // Check that Create method exists + createMethod := index.GetMethod("jobs", "Create") + require.NotNil(t, createMethod, "jobs.Create method should exist") + assert.Equal(t, "Create", createMethod.Name) + assert.NotEmpty(t, createMethod.Description) +} + +func TestGetMethod(t *testing.T) { + index, err := LoadIndex() + require.NoError(t, err) + + t.Run("existing method", func(t *testing.T) { + method := index.GetMethod("jobs", "Create") + require.NotNil(t, method) + assert.Equal(t, "Create", method.Name) + }) + + t.Run("non-existing method", func(t *testing.T) { + method := index.GetMethod("jobs", "NonExistent") + assert.Nil(t, method) + }) + + t.Run("non-existing service", func(t *testing.T) { + method := index.GetMethod("nonexistent", "Create") + assert.Nil(t, method) + }) +} + +func TestGetService(t *testing.T) { + index, err := LoadIndex() + require.NoError(t, err) + + t.Run("existing service", func(t *testing.T) { + service := index.GetService("jobs") + require.NotNil(t, service) + assert.Equal(t, "Jobs", service.Name) + }) + + t.Run("non-existing service", func(t *testing.T) { + service := index.GetService("nonexistent") + assert.Nil(t, service) + }) +} + +func TestListServices(t *testing.T) { + index, err := LoadIndex() + require.NoError(t, err) + + services := index.ListServices() + assert.NotEmpty(t, services) + assert.Contains(t, services, "jobs") +} + +func TestGetEnum(t *testing.T) { + index, err := LoadIndex() + require.NoError(t, err) + + t.Run("existing enum", func(t *testing.T) { + enum := index.GetEnum("jobs.RunLifeCycleState") + require.NotNil(t, enum) + assert.Equal(t, "RunLifeCycleState", enum.Name) + assert.NotEmpty(t, enum.Values) + }) + + t.Run("non-existing enum", func(t *testing.T) { + enum := index.GetEnum("nonexistent.Enum") + assert.Nil(t, enum) + }) +} diff --git a/experimental/aitools/lib/providers/sdkdocs/provider.go b/experimental/aitools/lib/providers/sdkdocs/provider.go new file mode 100644 index 0000000000..a6e4a21c77 --- /dev/null +++ b/experimental/aitools/lib/providers/sdkdocs/provider.go @@ -0,0 +1,82 @@ +package sdkdocs + +import ( + "context" + + mcp "github.com/databricks/cli/experimental/aitools/lib" + mcpsdk "github.com/databricks/cli/experimental/aitools/lib/mcp" + "github.com/databricks/cli/experimental/aitools/lib/providers" + "github.com/databricks/cli/experimental/aitools/lib/session" + "github.com/databricks/cli/libs/log" +) + +func init() { + providers.Register("sdkdocs", func(ctx context.Context, cfg *mcp.Config, sess *session.Session) (providers.Provider, error) { + return NewProvider(ctx, cfg, sess) + }, providers.ProviderConfig{ + Always: true, + }) +} + +// Provider provides SDK documentation search capabilities. +type Provider struct { + config *mcp.Config + session *session.Session + ctx context.Context + index *SDKDocsIndex +} + +// NewProvider creates a new SDK docs provider. +func NewProvider(ctx context.Context, cfg *mcp.Config, sess *session.Session) (*Provider, error) { + index, err := LoadIndex() + if err != nil { + log.Warnf(ctx, "Failed to load SDK docs index: %v", err) + // Return a provider with an empty index rather than failing + index = &SDKDocsIndex{ + Services: make(map[string]*ServiceDoc), + Types: make(map[string]*TypeDoc), + Enums: make(map[string]*EnumDoc), + } + } + + log.Infof(ctx, "SDK docs provider initialized: %d services, %d types, %d enums", + len(index.Services), len(index.Types), len(index.Enums)) + + return &Provider{ + config: cfg, + session: sess, + ctx: ctx, + index: index, + }, nil +} + +// Name returns the provider name. +func (p *Provider) Name() string { + return "sdkdocs" +} + +// RegisterTools registers the SDK documentation tools with the MCP server. +func (p *Provider) RegisterTools(server *mcpsdk.Server) error { + log.Info(p.ctx, "Registering SDK docs tools") + + mcpsdk.AddTool(server, + &mcpsdk.Tool{ + Name: "databricks_query_sdk_docs", + Description: `Search Databricks SDK documentation for methods, types, and examples. + +Use this tool to find: +- API methods: "how to create a job", "list clusters", "run pipeline" +- Type definitions: "JobSettings fields", "ClusterSpec parameters" +- Enums: "run lifecycle states", "cluster state values" + +Returns method signatures, parameter descriptions, return types, and usage examples. +This is useful when you need to understand the correct way to call Databricks APIs.`, + }, + func(ctx context.Context, req *mcpsdk.CallToolRequest, args QuerySDKDocsInput) (*mcpsdk.CallToolResult, any, error) { + return p.querySDKDocs(ctx, args) + }, + ) + + log.Infof(p.ctx, "Registered SDK docs tools: count=%d", 1) + return nil +} diff --git a/experimental/aitools/lib/providers/sdkdocs/query_sdk_docs.go b/experimental/aitools/lib/providers/sdkdocs/query_sdk_docs.go new file mode 100644 index 0000000000..0fcdcdb89e --- /dev/null +++ b/experimental/aitools/lib/providers/sdkdocs/query_sdk_docs.go @@ -0,0 +1,210 @@ +package sdkdocs + +import ( + "context" + "fmt" + "strings" + + mcpsdk "github.com/databricks/cli/experimental/aitools/lib/mcp" + "github.com/databricks/cli/libs/log" +) + +// QuerySDKDocsInput represents the input for the databricks_query_sdk_docs tool. +type QuerySDKDocsInput struct { + Query string `json:"query" jsonschema:"required" jsonschema_description:"Search query for SDK documentation (e.g., 'how to create a job', 'cluster configuration', 'JobSettings fields')"` + Category string `json:"category,omitempty" jsonschema_description:"Optional category filter: 'services', 'methods', 'types', or 'enums'"` + Service string `json:"service,omitempty" jsonschema_description:"Optional service filter (e.g., 'jobs', 'clusters', 'pipelines', 'catalog')"` + Limit int `json:"limit,omitempty" jsonschema_description:"Maximum number of results to return (default: 10, max: 50)"` +} + +// querySDKDocs handles the databricks_query_sdk_docs tool invocation. +func (p *Provider) querySDKDocs(ctx context.Context, args QuerySDKDocsInput) (*mcpsdk.CallToolResult, any, error) { + log.Debugf(ctx, "databricks_query_sdk_docs called: query=%q, category=%q, service=%q, limit=%d", + args.Query, args.Category, args.Service, args.Limit) + + results := p.index.Search(SearchOptions{ + Query: args.Query, + Category: args.Category, + Service: args.Service, + Limit: args.Limit, + }) + + if len(results) == 0 { + return mcpsdk.CreateNewTextContentResult( + fmt.Sprintf("No SDK documentation found for query: %q\n\nTry:\n- Using different keywords\n- Removing filters\n- Checking available services: %s", + args.Query, strings.Join(p.index.ListServices(), ", ")), + ), nil, nil + } + + response := p.formatResponse(results) + return mcpsdk.CreateNewTextContentResult(response), nil, nil +} + +// formatResponse formats search results for LLM consumption. +func (p *Provider) formatResponse(results []SearchResult) string { + var sb strings.Builder + + sb.WriteString("## SDK Documentation Results\n\n") + + for _, result := range results { + switch result.Type { + case "method": + p.formatMethodResult(&sb, result) + case "type": + p.formatTypeResult(&sb, result) + case "service": + p.formatServiceResult(&sb, result) + case "enum": + p.formatEnumResult(&sb, result) + } + sb.WriteString("\n---\n\n") + } + + return sb.String() +} + +// formatMethodResult formats a method search result. +func (p *Provider) formatMethodResult(sb *strings.Builder, result SearchResult) { + method := p.index.GetMethod(result.Service, result.Name) + if method == nil { + sb.WriteString(fmt.Sprintf("### Method: %s\n\n%s\n", result.Name, result.Description)) + return + } + + sb.WriteString(fmt.Sprintf("### Method: %s.%s\n\n", result.Service, method.Name)) + + if method.Signature != "" { + sb.WriteString("**Signature:**\n```go\n") + sb.WriteString(method.Signature) + sb.WriteString("\n```\n\n") + } + + if method.Description != "" { + sb.WriteString("**Description:**\n") + sb.WriteString(method.Description) + sb.WriteString("\n\n") + } + + if len(method.Parameters) > 0 { + sb.WriteString("**Parameters:**\n") + for _, param := range method.Parameters { + required := "" + if param.Required { + required = " (required)" + } + sb.WriteString(fmt.Sprintf("- `%s` (%s)%s: %s\n", param.Name, param.Type, required, param.Description)) + } + sb.WriteString("\n") + } + + if method.Returns != nil { + sb.WriteString("**Returns:**\n") + sb.WriteString(fmt.Sprintf("- `%s`: %s\n\n", method.Returns.Type, method.Returns.Description)) + } + + if method.Example != "" { + sb.WriteString("**Example:**\n```go\n") + sb.WriteString(method.Example) + sb.WriteString("\n```\n") + } +} + +// formatTypeResult formats a type search result. +func (p *Provider) formatTypeResult(sb *strings.Builder, result SearchResult) { + typeDoc := p.index.GetType(result.Path) + if typeDoc == nil { + sb.WriteString(fmt.Sprintf("### Type: %s\n\n%s\n", result.Name, result.Description)) + return + } + + sb.WriteString(fmt.Sprintf("### Type: %s\n\n", typeDoc.Name)) + + if typeDoc.Package != "" { + sb.WriteString(fmt.Sprintf("**Package:** `%s`\n\n", typeDoc.Package)) + } + + if typeDoc.Description != "" { + sb.WriteString("**Description:**\n") + sb.WriteString(typeDoc.Description) + sb.WriteString("\n\n") + } + + if len(typeDoc.Fields) > 0 { + sb.WriteString("**Fields:**\n\n") + sb.WriteString("| Field | Type | Required | Description |\n") + sb.WriteString("|-------|------|----------|-------------|\n") + + for _, field := range typeDoc.Fields { + required := "No" + if field.Required { + required = "Yes" + } + desc := field.Description + if field.OutputOnly { + desc = "(output-only) " + desc + } + if field.Deprecated { + desc = "(deprecated) " + desc + } + // Escape pipe characters in descriptions + desc = strings.ReplaceAll(desc, "|", "\\|") + sb.WriteString(fmt.Sprintf("| %s | %s | %s | %s |\n", field.Name, field.Type, required, truncate(desc, 100))) + } + sb.WriteString("\n") + } +} + +// formatServiceResult formats a service search result. +func (p *Provider) formatServiceResult(sb *strings.Builder, result SearchResult) { + service := p.index.GetService(result.Path) + if service == nil { + sb.WriteString(fmt.Sprintf("### Service: %s\n\n%s\n", result.Name, result.Description)) + return + } + + sb.WriteString(fmt.Sprintf("### Service: %s\n\n", service.Name)) + + if service.Description != "" { + sb.WriteString("**Description:**\n") + sb.WriteString(service.Description) + sb.WriteString("\n\n") + } + + if len(service.Methods) > 0 { + sb.WriteString("**Available Methods:**\n") + for methodName, method := range service.Methods { + desc := truncate(method.Description, 80) + sb.WriteString(fmt.Sprintf("- `%s`: %s\n", methodName, desc)) + } + sb.WriteString("\n") + } +} + +// formatEnumResult formats an enum search result. +func (p *Provider) formatEnumResult(sb *strings.Builder, result SearchResult) { + enumDoc := p.index.GetEnum(result.Path) + if enumDoc == nil { + sb.WriteString(fmt.Sprintf("### Enum: %s\n\n%s\n", result.Name, result.Description)) + return + } + + sb.WriteString(fmt.Sprintf("### Enum: %s\n\n", enumDoc.Name)) + + if enumDoc.Package != "" { + sb.WriteString(fmt.Sprintf("**Package:** `%s`\n\n", enumDoc.Package)) + } + + if enumDoc.Description != "" { + sb.WriteString("**Description:**\n") + sb.WriteString(enumDoc.Description) + sb.WriteString("\n\n") + } + + if len(enumDoc.Values) > 0 { + sb.WriteString("**Values:**\n") + for _, value := range enumDoc.Values { + sb.WriteString(fmt.Sprintf("- `%s`\n", value)) + } + sb.WriteString("\n") + } +} diff --git a/experimental/aitools/lib/providers/sdkdocs/sdk_docs_index.json b/experimental/aitools/lib/providers/sdkdocs/sdk_docs_index.json new file mode 100644 index 0000000000..140559415c --- /dev/null +++ b/experimental/aitools/lib/providers/sdkdocs/sdk_docs_index.json @@ -0,0 +1,8163 @@ +{ + "version": "1.0", + "generated_at": "2026-01-12T14:13:07Z", + "services": { + "apps": { + "name": "Apps", + "description": "Databricks Apps API for deploying and managing web applications on Databricks.", + "package": "github.com/databricks/databricks-sdk-go/service/apps", + "methods": { + "Create": { + "name": "Create", + "description": "Creates a new app.", + "signature": "Create(ctx context.Context, request CreateAppRequest) (*App, error)", + "parameters": [ + { + "name": "request", + "type": "CreateAppRequest", + "description": "App configuration including name and description", + "required": true + } + ], + "returns": { + "type": "*App", + "description": "The created app details" + } + }, + "Deploy": { + "name": "Deploy", + "description": "Deploys an app to Databricks Apps.", + "signature": "Deploy(ctx context.Context, request CreateAppDeploymentRequest) (*AppDeployment, error)", + "parameters": [ + { + "name": "request", + "type": "CreateAppDeploymentRequest", + "description": "Deployment configuration", + "required": true + } + ], + "returns": { + "type": "*AppDeployment", + "description": "Deployment status and details" + } + }, + "List": { + "name": "List", + "description": "Lists all apps in the workspace.", + "signature": "List(ctx context.Context, request ListAppsRequest) listing.Iterator[App]", + "parameters": null, + "returns": { + "type": "listing.Iterator[App]", + "description": "Iterator over apps" + } + } + } + }, + "catalog": { + "name": "Catalog", + "description": "Unity Catalog APIs for managing catalogs, schemas, tables, and other data assets.", + "package": "github.com/databricks/databricks-sdk-go/service/catalog", + "methods": { + "ListCatalogs": { + "name": "ListCatalogs", + "description": "Lists all catalogs in the metastore.", + "signature": "List(ctx context.Context, request ListCatalogsRequest) listing.Iterator[CatalogInfo]", + "parameters": null, + "returns": { + "type": "listing.Iterator[CatalogInfo]", + "description": "Iterator over catalog information" + } + }, + "ListSchemas": { + "name": "ListSchemas", + "description": "Lists all schemas in a catalog.", + "signature": "List(ctx context.Context, request ListSchemasRequest) listing.Iterator[SchemaInfo]", + "parameters": [ + { + "name": "request", + "type": "ListSchemasRequest", + "description": "Contains catalog_name to list schemas from", + "required": true + } + ], + "returns": { + "type": "listing.Iterator[SchemaInfo]", + "description": "Iterator over schema information" + } + }, + "ListTables": { + "name": "ListTables", + "description": "Lists all tables in a schema.", + "signature": "List(ctx context.Context, request ListTablesRequest) listing.Iterator[TableInfo]", + "parameters": [ + { + "name": "request", + "type": "ListTablesRequest", + "description": "Contains catalog_name and schema_name", + "required": true + } + ], + "returns": { + "type": "listing.Iterator[TableInfo]", + "description": "Iterator over table information" + } + } + } + }, + "compute": { + "name": "Clusters", + "description": "The Clusters API allows you to create, start, edit, and terminate clusters. Clusters are managed cloud resources for running Spark workloads.", + "package": "github.com/databricks/databricks-sdk-go/service/compute", + "methods": { + "Create": { + "name": "Create", + "description": "Create a new Spark cluster.", + "signature": "Create(ctx context.Context, request CreateCluster) (*CreateClusterResponse, error)", + "parameters": [ + { + "name": "request", + "type": "CreateCluster", + "description": "Cluster configuration including node types, autoscaling, and Spark version", + "required": true + } + ], + "returns": { + "type": "*CreateClusterResponse", + "description": "Contains cluster_id of the created cluster" + } + }, + "Delete": { + "name": "Delete", + "description": "Permanently deletes a Spark cluster.", + "signature": "Delete(ctx context.Context, request DeleteCluster) error", + "parameters": [ + { + "name": "request", + "type": "DeleteCluster", + "description": "Contains cluster_id to delete", + "required": true + } + ] + }, + "Get": { + "name": "Get", + "description": "Retrieves the information for a cluster given its identifier.", + "signature": "Get(ctx context.Context, request GetClusterRequest) (*ClusterDetails, error)", + "parameters": [ + { + "name": "request", + "type": "GetClusterRequest", + "description": "Contains cluster_id", + "required": true + } + ], + "returns": { + "type": "*ClusterDetails", + "description": "Full cluster configuration and state" + } + }, + "List": { + "name": "List", + "description": "Returns information about all clusters.", + "signature": "List(ctx context.Context, request ListClustersRequest) listing.Iterator[ClusterDetails]", + "parameters": null, + "returns": { + "type": "listing.Iterator[ClusterDetails]", + "description": "Iterator over cluster details" + } + }, + "Start": { + "name": "Start", + "description": "Starts a terminated cluster.", + "signature": "Start(ctx context.Context, request StartCluster) error", + "parameters": [ + { + "name": "request", + "type": "StartCluster", + "description": "Contains cluster_id to start", + "required": true + } + ] + } + } + }, + "jobs": { + "name": "Jobs", + "description": "The Jobs API allows you to create, edit, and delete jobs. Jobs are the primary unit of scheduled execution in Databricks.", + "package": "github.com/databricks/databricks-sdk-go/service/jobs", + "methods": { + "Create": { + "name": "Create", + "description": "Create a new job.", + "signature": "Create(ctx context.Context, request CreateJob) (*CreateResponse, error)", + "parameters": [ + { + "name": "request", + "type": "CreateJob", + "description": "Job creation parameters including name, tasks, and schedule", + "required": true + } + ], + "returns": { + "type": "*CreateResponse", + "description": "Contains the job_id of the created job" + }, + "example": "resp, err := w.Jobs.Create(ctx, jobs.CreateJob{\n Name: \"my-job\",\n Tasks: []jobs.Task{{TaskKey: \"main\", ...}},\n})" + }, + "Delete": { + "name": "Delete", + "description": "Deletes a job.", + "signature": "Delete(ctx context.Context, request DeleteJob) error", + "parameters": [ + { + "name": "request", + "type": "DeleteJob", + "description": "Contains job_id to delete", + "required": true + } + ] + }, + "Get": { + "name": "Get", + "description": "Retrieves the details for a single job.", + "signature": "Get(ctx context.Context, request GetJobRequest) (*Job, error)", + "parameters": [ + { + "name": "request", + "type": "GetJobRequest", + "description": "Contains job_id to retrieve", + "required": true + } + ], + "returns": { + "type": "*Job", + "description": "Full job details including settings and run history" + } + }, + "List": { + "name": "List", + "description": "Retrieves a list of jobs.", + "signature": "List(ctx context.Context, request ListJobsRequest) listing.Iterator[BaseJob]", + "parameters": [ + { + "name": "request", + "type": "ListJobsRequest", + "description": "Filter and pagination parameters", + "required": false + } + ], + "returns": { + "type": "listing.Iterator[BaseJob]", + "description": "Iterator over jobs matching the filter" + } + }, + "RunNow": { + "name": "RunNow", + "description": "Triggers an immediate run of a job.", + "signature": "RunNow(ctx context.Context, request RunNow) (*RunNowResponse, error)", + "parameters": [ + { + "name": "request", + "type": "RunNow", + "description": "Job ID and optional parameters for the run", + "required": true + } + ], + "returns": { + "type": "*RunNowResponse", + "description": "Contains run_id of the triggered run" + } + } + } + }, + "pipelines": { + "name": "Pipelines", + "description": "The Delta Live Tables API allows you to create, edit, and run pipelines for data transformation and ingestion.", + "package": "github.com/databricks/databricks-sdk-go/service/pipelines", + "methods": { + "Create": { + "name": "Create", + "description": "Creates a new data processing pipeline.", + "signature": "Create(ctx context.Context, request CreatePipeline) (*CreatePipelineResponse, error)", + "parameters": [ + { + "name": "request", + "type": "CreatePipeline", + "description": "Pipeline configuration including clusters, libraries, and target", + "required": true + } + ], + "returns": { + "type": "*CreatePipelineResponse", + "description": "Contains pipeline_id of the created pipeline" + } + }, + "List": { + "name": "List", + "description": "Lists pipelines defined in the workspace.", + "signature": "List(ctx context.Context, request ListPipelinesRequest) listing.Iterator[PipelineStateInfo]", + "parameters": null, + "returns": { + "type": "listing.Iterator[PipelineStateInfo]", + "description": "Iterator over pipeline info" + } + }, + "StartUpdate": { + "name": "StartUpdate", + "description": "Starts a new update for the pipeline.", + "signature": "StartUpdate(ctx context.Context, request StartUpdate) (*StartUpdateResponse, error)", + "parameters": [ + { + "name": "request", + "type": "StartUpdate", + "description": "Pipeline ID and update options", + "required": true + } + ], + "returns": { + "type": "*StartUpdateResponse", + "description": "Contains update_id of the started update" + } + } + } + }, + "sql": { + "name": "SQL", + "description": "Databricks SQL APIs for managing warehouses, queries, and dashboards.", + "package": "github.com/databricks/databricks-sdk-go/service/sql", + "methods": { + "ExecuteStatement": { + "name": "ExecuteStatement", + "description": "Execute a SQL statement and return results.", + "signature": "ExecuteStatement(ctx context.Context, request ExecuteStatementRequest) (*ExecuteStatementResponse, error)", + "parameters": [ + { + "name": "request", + "type": "ExecuteStatementRequest", + "description": "SQL statement, warehouse ID, and execution options", + "required": true + } + ], + "returns": { + "type": "*ExecuteStatementResponse", + "description": "Query results or statement ID for async execution" + } + }, + "ListWarehouses": { + "name": "ListWarehouses", + "description": "Lists all SQL warehouses.", + "signature": "List(ctx context.Context, request ListWarehousesRequest) listing.Iterator[EndpointInfo]", + "parameters": null, + "returns": { + "type": "listing.Iterator[EndpointInfo]", + "description": "Iterator over warehouse information" + } + } + } + }, + "workspace": { + "name": "Workspace", + "description": "Workspace API for managing notebooks, folders, and other workspace objects.", + "package": "github.com/databricks/databricks-sdk-go/service/workspace", + "methods": { + "GetStatus": { + "name": "GetStatus", + "description": "Gets the status of a workspace object.", + "signature": "GetStatus(ctx context.Context, request GetStatusRequest) (*ObjectInfo, error)", + "parameters": [ + { + "name": "request", + "type": "GetStatusRequest", + "description": "Contains path to get status for", + "required": true + } + ], + "returns": { + "type": "*ObjectInfo", + "description": "Object information including type and path" + } + }, + "Import": { + "name": "Import", + "description": "Imports a notebook or file into the workspace.", + "signature": "Import(ctx context.Context, request Import) error", + "parameters": [ + { + "name": "request", + "type": "Import", + "description": "Path, content, and format of the object to import", + "required": true + } + ] + }, + "List": { + "name": "List", + "description": "Lists the contents of a directory.", + "signature": "List(ctx context.Context, request ListWorkspaceRequest) listing.Iterator[ObjectInfo]", + "parameters": [ + { + "name": "request", + "type": "ListWorkspaceRequest", + "description": "Contains path to list", + "required": true + } + ], + "returns": { + "type": "listing.Iterator[ObjectInfo]", + "description": "Iterator over workspace objects" + } + } + } + } + }, + "types": { + "apps.AppDeployment": { + "name": "AppDeployment", + "package": "apps", + "description": "app deployment configuration.", + "fields": { + "command": { + "name": "command", + "type": "any", + "description": "The command with which to run the app. This will override the command specified in the app.yaml file.", + "required": false + }, + "create_time": { + "name": "create_time", + "type": "string (timestamp)", + "description": "The creation time of the deployment. Formatted timestamp in ISO 6801.", + "required": false, + "output_only": true + }, + "creator": { + "name": "creator", + "type": "any", + "description": "The email of the user creates the deployment.", + "required": false, + "output_only": true + }, + "deployment_artifacts": { + "name": "deployment_artifacts", + "type": "any", + "description": "The deployment artifacts for an app.", + "required": false, + "output_only": true + }, + "deployment_id": { + "name": "deployment_id", + "type": "string", + "description": "The unique id of the deployment.", + "required": false + }, + "env_vars": { + "name": "env_vars", + "type": "any", + "description": "The environment variables to set in the app runtime environment. This will override the environment variables specified in the app.yaml file.", + "required": false + }, + "git_source": { + "name": "git_source", + "type": "any", + "description": "Git repository to use as the source for the app deployment.", + "required": false + }, + "mode": { + "name": "mode", + "type": "any", + "description": "The mode of which the deployment will manage the source code.", + "required": false + }, + "source_code_path": { + "name": "source_code_path", + "type": "string", + "description": "The workspace file system path of the source code used to create the app deployment. This is different from\n`deployment_artifacts.source_code_path`, which is the path used by the deployed app. The former refers\nto the original source code location of the app in the workspace during deployment creation, whereas\nthe latter provides a system generated stable snapshotted source code path used by the deployment.", + "required": false + }, + "status": { + "name": "status", + "type": "any", + "description": "Status and status message of the deployment", + "required": false, + "output_only": true + }, + "update_time": { + "name": "update_time", + "type": "string (timestamp)", + "description": "The update time of the deployment. Formatted timestamp in ISO 6801.", + "required": false, + "output_only": true + } + } + }, + "apps.AppDeploymentArtifacts": { + "name": "AppDeploymentArtifacts", + "package": "apps", + "description": "app deployment artifacts configuration.", + "fields": { + "source_code_path": { + "name": "source_code_path", + "type": "string", + "description": "The snapshotted workspace file system path of the source code loaded by the deployed app.", + "required": false + } + } + }, + "apps.AppDeploymentMode": { + "name": "AppDeploymentMode", + "package": "apps", + "description": "app deployment mode configuration.", + "fields": {} + }, + "apps.AppDeploymentState": { + "name": "AppDeploymentState", + "package": "apps", + "description": "app deployment state configuration.", + "fields": {} + }, + "apps.AppDeploymentStatus": { + "name": "AppDeploymentStatus", + "package": "apps", + "description": "app deployment status configuration.", + "fields": { + "message": { + "name": "message", + "type": "any", + "description": "Message corresponding with the deployment state.", + "required": false, + "output_only": true + }, + "state": { + "name": "state", + "type": "any", + "description": "State of the deployment.", + "required": false, + "output_only": true + } + } + }, + "apps.AppResource": { + "name": "AppResource", + "package": "apps", + "description": "app resource configuration.", + "fields": { + "database": { + "name": "database", + "type": "any", + "description": "", + "required": false + }, + "description": { + "name": "description", + "type": "string", + "description": "Description of the App Resource.", + "required": false + }, + "experiment": { + "name": "experiment", + "type": "any", + "description": "", + "required": false + }, + "genie_space": { + "name": "genie_space", + "type": "any", + "description": "", + "required": false + }, + "job": { + "name": "job", + "type": "any", + "description": "", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "Name of the App Resource.", + "required": false + }, + "secret": { + "name": "secret", + "type": "any", + "description": "", + "required": false + }, + "serving_endpoint": { + "name": "serving_endpoint", + "type": "any", + "description": "", + "required": false + }, + "sql_warehouse": { + "name": "sql_warehouse", + "type": "any", + "description": "", + "required": false + }, + "uc_securable": { + "name": "uc_securable", + "type": "any", + "description": "", + "required": false + } + } + }, + "apps.AppResourceDatabase": { + "name": "AppResourceDatabase", + "package": "apps", + "description": "app resource database configuration.", + "fields": { + "database_name": { + "name": "database_name", + "type": "string", + "description": "", + "required": false + }, + "instance_name": { + "name": "instance_name", + "type": "string", + "description": "", + "required": false + }, + "permission": { + "name": "permission", + "type": "any", + "description": "", + "required": false + } + } + }, + "apps.AppResourceDatabaseDatabasePermission": { + "name": "AppResourceDatabaseDatabasePermission", + "package": "apps", + "description": "app resource database database permission configuration.", + "fields": {} + }, + "apps.AppResourceExperiment": { + "name": "AppResourceExperiment", + "package": "apps", + "description": "app resource experiment configuration.", + "fields": { + "experiment_id": { + "name": "experiment_id", + "type": "string", + "description": "", + "required": false + }, + "permission": { + "name": "permission", + "type": "any", + "description": "", + "required": false + } + } + }, + "apps.AppResourceExperimentExperimentPermission": { + "name": "AppResourceExperimentExperimentPermission", + "package": "apps", + "description": "app resource experiment experiment permission configuration.", + "fields": {} + }, + "apps.AppResourceGenieSpace": { + "name": "AppResourceGenieSpace", + "package": "apps", + "description": "app resource genie space configuration.", + "fields": { + "name": { + "name": "name", + "type": "any", + "description": "", + "required": false + }, + "permission": { + "name": "permission", + "type": "any", + "description": "", + "required": false + }, + "space_id": { + "name": "space_id", + "type": "string", + "description": "", + "required": false + } + } + }, + "apps.AppResourceGenieSpaceGenieSpacePermission": { + "name": "AppResourceGenieSpaceGenieSpacePermission", + "package": "apps", + "description": "app resource genie space genie space permission configuration.", + "fields": {} + }, + "apps.AppResourceJob": { + "name": "AppResourceJob", + "package": "apps", + "description": "app resource job configuration.", + "fields": { + "id": { + "name": "id", + "type": "any", + "description": "Id of the job to grant permission on.", + "required": false + }, + "permission": { + "name": "permission", + "type": "any", + "description": "Permissions to grant on the Job. Supported permissions are: \"CAN_MANAGE\", \"IS_OWNER\", \"CAN_MANAGE_RUN\", \"CAN_VIEW\".", + "required": false + } + } + }, + "apps.AppResourceJobJobPermission": { + "name": "AppResourceJobJobPermission", + "package": "apps", + "description": "app resource job job permission configuration.", + "fields": {} + }, + "apps.AppResourceSecret": { + "name": "AppResourceSecret", + "package": "apps", + "description": "app resource secret configuration.", + "fields": { + "key": { + "name": "key", + "type": "any", + "description": "Key of the secret to grant permission on.", + "required": false + }, + "permission": { + "name": "permission", + "type": "any", + "description": "Permission to grant on the secret scope. For secrets, only one permission is allowed. Permission must be one of: \"READ\", \"WRITE\", \"MANAGE\".", + "required": false + }, + "scope": { + "name": "scope", + "type": "any", + "description": "Scope of the secret to grant permission on.", + "required": false + } + } + }, + "apps.AppResourceSecretSecretPermission": { + "name": "AppResourceSecretSecretPermission", + "package": "apps", + "description": "Permission to grant on the secret scope. Supported permissions are: \"READ\", \"WRITE\", \"MANAGE\".", + "fields": {} + }, + "apps.AppResourceServingEndpoint": { + "name": "AppResourceServingEndpoint", + "package": "apps", + "description": "app resource serving endpoint configuration.", + "fields": { + "name": { + "name": "name", + "type": "any", + "description": "Name of the serving endpoint to grant permission on.", + "required": false + }, + "permission": { + "name": "permission", + "type": "any", + "description": "Permission to grant on the serving endpoint. Supported permissions are: \"CAN_MANAGE\", \"CAN_QUERY\", \"CAN_VIEW\".", + "required": false + } + } + }, + "apps.AppResourceServingEndpointServingEndpointPermission": { + "name": "AppResourceServingEndpointServingEndpointPermission", + "package": "apps", + "description": "app resource serving endpoint serving endpoint permission configuration.", + "fields": {} + }, + "apps.AppResourceSqlWarehouse": { + "name": "AppResourceSqlWarehouse", + "package": "apps", + "description": "app resource sql warehouse configuration.", + "fields": { + "id": { + "name": "id", + "type": "any", + "description": "Id of the SQL warehouse to grant permission on.", + "required": false + }, + "permission": { + "name": "permission", + "type": "any", + "description": "Permission to grant on the SQL warehouse. Supported permissions are: \"CAN_MANAGE\", \"CAN_USE\", \"IS_OWNER\".", + "required": false + } + } + }, + "apps.AppResourceSqlWarehouseSqlWarehousePermission": { + "name": "AppResourceSqlWarehouseSqlWarehousePermission", + "package": "apps", + "description": "app resource sql warehouse sql warehouse permission configuration.", + "fields": {} + }, + "apps.AppResourceUcSecurable": { + "name": "AppResourceUcSecurable", + "package": "apps", + "description": "app resource uc securable configuration.", + "fields": { + "permission": { + "name": "permission", + "type": "any", + "description": "", + "required": false + }, + "securable_full_name": { + "name": "securable_full_name", + "type": "string", + "description": "", + "required": false + }, + "securable_type": { + "name": "securable_type", + "type": "any", + "description": "", + "required": false + } + } + }, + "apps.AppResourceUcSecurableUcSecurablePermission": { + "name": "AppResourceUcSecurableUcSecurablePermission", + "package": "apps", + "description": "app resource uc securable uc securable permission configuration.", + "fields": {} + }, + "apps.AppResourceUcSecurableUcSecurableType": { + "name": "AppResourceUcSecurableUcSecurableType", + "package": "apps", + "description": "app resource uc securable uc securable type configuration.", + "fields": {} + }, + "apps.ApplicationState": { + "name": "ApplicationState", + "package": "apps", + "description": "application state configuration.", + "fields": {} + }, + "apps.ApplicationStatus": { + "name": "ApplicationStatus", + "package": "apps", + "description": "application status configuration.", + "fields": { + "message": { + "name": "message", + "type": "any", + "description": "Application status message", + "required": false, + "output_only": true + }, + "state": { + "name": "state", + "type": "any", + "description": "State of the application.", + "required": false, + "output_only": true + } + } + }, + "apps.ComputeSize": { + "name": "ComputeSize", + "package": "apps", + "description": "compute size configuration.", + "fields": {} + }, + "apps.ComputeState": { + "name": "ComputeState", + "package": "apps", + "description": "compute state configuration.", + "fields": {} + }, + "apps.ComputeStatus": { + "name": "ComputeStatus", + "package": "apps", + "description": "compute status configuration.", + "fields": { + "message": { + "name": "message", + "type": "any", + "description": "Compute status message", + "required": false, + "output_only": true + }, + "state": { + "name": "state", + "type": "any", + "description": "State of the app compute.", + "required": false, + "output_only": true + } + } + }, + "apps.EnvVar": { + "name": "EnvVar", + "package": "apps", + "description": "env var configuration.", + "fields": { + "name": { + "name": "name", + "type": "any", + "description": "The name of the environment variable.", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "The value for the environment variable.", + "required": false + }, + "value_from": { + "name": "value_from", + "type": "any", + "description": "The name of an external Databricks resource that contains the value, such as a secret or a database table.", + "required": false + } + } + }, + "apps.GitRepository": { + "name": "GitRepository", + "package": "apps", + "description": "Git repository configuration specifying the location of the repository.", + "fields": { + "provider": { + "name": "provider", + "type": "any", + "description": "Git provider. Case insensitive. Supported values: gitHub, gitHubEnterprise, bitbucketCloud,\nbitbucketServer, azureDevOpsServices, gitLab, gitLabEnterpriseEdition, awsCodeCommit.", + "required": false + }, + "url": { + "name": "url", + "type": "any", + "description": "URL of the Git repository.", + "required": false + } + } + }, + "apps.GitSource": { + "name": "GitSource", + "package": "apps", + "description": "Complete git source specification including repository location and reference.", + "fields": { + "branch": { + "name": "branch", + "type": "any", + "description": "Git branch to checkout.", + "required": false + }, + "commit": { + "name": "commit", + "type": "any", + "description": "Git commit SHA to checkout.", + "required": false + }, + "git_repository": { + "name": "git_repository", + "type": "any", + "description": "Git repository configuration. Populated from the app's git_repository configuration.", + "required": false, + "output_only": true + }, + "resolved_commit": { + "name": "resolved_commit", + "type": "any", + "description": "The resolved commit SHA that was actually used for the deployment. This is populated by the\nsystem after resolving the reference (branch, tag, or commit). If commit is specified\ndirectly, this will match commit. If a branch or tag is specified, this contains the\ncommit SHA that the branch or tag pointed to at deployment time.", + "required": false, + "output_only": true + }, + "source_code_path": { + "name": "source_code_path", + "type": "string", + "description": "Relative path to the app source code within the Git repository. If not specified, the root\nof the repository is used.", + "required": false + }, + "tag": { + "name": "tag", + "type": "any", + "description": "Git tag to checkout.", + "required": false + } + } + }, + "bundle.Alert": { + "name": "Alert", + "package": "resources", + "description": "alert configuration.", + "fields": { + "create_time": { + "name": "create_time", + "type": "string (timestamp)", + "description": "The timestamp indicating when the alert was created.", + "required": false, + "output_only": true + }, + "custom_description": { + "name": "custom_description", + "type": "string", + "description": "Custom description for the alert. support mustache template.", + "required": false + }, + "custom_summary": { + "name": "custom_summary", + "type": "any", + "description": "Custom summary for the alert. support mustache template.", + "required": false + }, + "display_name": { + "name": "display_name", + "type": "string", + "description": "The display name of the alert.", + "required": false + }, + "effective_run_as": { + "name": "effective_run_as", + "type": "any", + "description": "The actual identity that will be used to execute the alert.\nThis is an output-only field that shows the resolved run-as identity after applying\npermissions and defaults.", + "required": false, + "output_only": true + }, + "evaluation": { + "name": "evaluation", + "type": "any", + "description": "", + "required": false + }, + "id": { + "name": "id", + "type": "any", + "description": "UUID identifying the alert.", + "required": false, + "output_only": true + }, + "lifecycle_state": { + "name": "lifecycle_state", + "type": "any", + "description": "Indicates whether the query is trashed.", + "required": false, + "output_only": true + }, + "owner_user_name": { + "name": "owner_user_name", + "type": "string", + "description": "The owner's username. This field is set to \"Unavailable\" if the user has been deleted.", + "required": false, + "output_only": true + }, + "parent_path": { + "name": "parent_path", + "type": "string", + "description": "The workspace path of the folder containing the alert. Can only be set on create, and cannot be updated.", + "required": false + }, + "query_text": { + "name": "query_text", + "type": "any", + "description": "Text of the query to be run.", + "required": false + }, + "run_as": { + "name": "run_as", + "type": "any", + "description": "Specifies the identity that will be used to run the alert.\nThis field allows you to configure alerts to run as a specific user or service principal.\n- For user identity: Set `user_name` to the email of an active workspace user. Users can only set this to their own email.\n- For service principal: Set `service_principal_name` to the application ID. Requires the `servicePrincipal/user` role.\nIf not specified, the alert will run as the request user.", + "required": false + }, + "run_as_user_name": { + "name": "run_as_user_name", + "type": "string", + "description": "The run as username or application ID of service principal.\nOn Create and Update, this field can be set to application ID of an active service principal. Setting this field requires the servicePrincipal/user role.\nDeprecated: Use `run_as` field instead. This field will be removed in a future release.", + "required": false, + "deprecated": true + }, + "schedule": { + "name": "schedule", + "type": "any", + "description": "", + "required": false + }, + "update_time": { + "name": "update_time", + "type": "string (timestamp)", + "description": "The timestamp indicating when the alert was updated.", + "required": false, + "output_only": true + }, + "warehouse_id": { + "name": "warehouse_id", + "type": "string", + "description": "ID of the SQL warehouse attached to the alert.", + "required": false + } + } + }, + "bundle.App": { + "name": "App", + "package": "resources", + "description": "app configuration.", + "fields": { + "active_deployment": { + "name": "active_deployment", + "type": "any", + "description": "The active deployment of the app. A deployment is considered active when it has been deployed\nto the app compute.", + "required": false, + "output_only": true + }, + "app_status": { + "name": "app_status", + "type": "any", + "description": "", + "required": false, + "output_only": true + }, + "budget_policy_id": { + "name": "budget_policy_id", + "type": "string", + "description": "", + "required": false + }, + "compute_size": { + "name": "compute_size", + "type": "int", + "description": "", + "required": false + }, + "compute_status": { + "name": "compute_status", + "type": "any", + "description": "", + "required": false, + "output_only": true + }, + "create_time": { + "name": "create_time", + "type": "string (timestamp)", + "description": "The creation time of the app. Formatted timestamp in ISO 6801.", + "required": false, + "output_only": true + }, + "creator": { + "name": "creator", + "type": "any", + "description": "The email of the user that created the app.", + "required": false, + "output_only": true + }, + "default_source_code_path": { + "name": "default_source_code_path", + "type": "string", + "description": "The default workspace file system path of the source code from which app deployment are\ncreated. This field tracks the workspace source code path of the last active deployment.", + "required": false, + "output_only": true + }, + "description": { + "name": "description", + "type": "string", + "description": "The description of the app.", + "required": false + }, + "effective_budget_policy_id": { + "name": "effective_budget_policy_id", + "type": "string", + "description": "", + "required": false, + "output_only": true + }, + "effective_usage_policy_id": { + "name": "effective_usage_policy_id", + "type": "string", + "description": "", + "required": false, + "output_only": true + }, + "effective_user_api_scopes": { + "name": "effective_user_api_scopes", + "type": "any", + "description": "The effective api scopes granted to the user access token.", + "required": false, + "output_only": true + }, + "git_repository": { + "name": "git_repository", + "type": "any", + "description": "Git repository configuration for app deployments. When specified, deployments can\nreference code from this repository by providing only the git reference (branch, tag, or commit).", + "required": false + }, + "id": { + "name": "id", + "type": "any", + "description": "The unique identifier of the app.", + "required": false, + "output_only": true + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of the app. The name must contain only lowercase alphanumeric characters and hyphens.\nIt must be unique within the workspace.", + "required": false + }, + "oauth2_app_client_id": { + "name": "oauth2_app_client_id", + "type": "string", + "description": "", + "required": false, + "output_only": true + }, + "oauth2_app_integration_id": { + "name": "oauth2_app_integration_id", + "type": "string", + "description": "", + "required": false, + "output_only": true + }, + "pending_deployment": { + "name": "pending_deployment", + "type": "any", + "description": "The pending deployment of the app. A deployment is considered pending when it is being prepared\nfor deployment to the app compute.", + "required": false, + "output_only": true + }, + "resources": { + "name": "resources", + "type": "any", + "description": "Resources for the app.", + "required": false + }, + "service_principal_client_id": { + "name": "service_principal_client_id", + "type": "string", + "description": "", + "required": false, + "output_only": true + }, + "service_principal_id": { + "name": "service_principal_id", + "type": "string", + "description": "", + "required": false, + "output_only": true + }, + "service_principal_name": { + "name": "service_principal_name", + "type": "string", + "description": "", + "required": false, + "output_only": true + }, + "update_time": { + "name": "update_time", + "type": "string (timestamp)", + "description": "The update time of the app. Formatted timestamp in ISO 6801.", + "required": false, + "output_only": true + }, + "updater": { + "name": "updater", + "type": "any", + "description": "The email of the user that last updated the app.", + "required": false, + "output_only": true + }, + "url": { + "name": "url", + "type": "any", + "description": "The URL of the app once it is deployed.", + "required": false, + "output_only": true + }, + "usage_policy_id": { + "name": "usage_policy_id", + "type": "string", + "description": "", + "required": false + }, + "user_api_scopes": { + "name": "user_api_scopes", + "type": "any", + "description": "", + "required": false + } + } + }, + "bundle.Cluster": { + "name": "Cluster", + "package": "resources", + "description": "Contains a snapshot of the latest user specified settings that were used to create/edit the cluster.", + "fields": { + "apply_policy_default_values": { + "name": "apply_policy_default_values", + "type": "any", + "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied.", + "required": false + }, + "autoscale": { + "name": "autoscale", + "type": "any", + "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", + "required": false + }, + "autotermination_minutes": { + "name": "autotermination_minutes", + "type": "int", + "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination.", + "required": false + }, + "aws_attributes": { + "name": "aws_attributes", + "type": "any", + "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "required": false + }, + "azure_attributes": { + "name": "azure_attributes", + "type": "any", + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", + "required": false + }, + "cluster_log_conf": { + "name": "cluster_log_conf", + "type": "any", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nThree kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "required": false + }, + "cluster_name": { + "name": "cluster_name", + "type": "string", + "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\nFor job clusters, the cluster name is automatically set based on the job and job run IDs.", + "required": false + }, + "custom_tags": { + "name": "custom_tags", + "type": "map[string]string", + "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", + "required": false + }, + "data_security_mode": { + "name": "data_security_mode", + "type": "any", + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used when `kind = CLASSIC_PREVIEW`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.", + "required": false + }, + "docker_image": { + "name": "docker_image", + "type": "any", + "description": "Custom docker image BYOC", + "required": false + }, + "driver_instance_pool_id": { + "name": "driver_instance_pool_id", + "type": "string", + "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned.", + "required": false + }, + "driver_node_type_id": { + "name": "driver_node_type_id", + "type": "string", + "description": "The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n\nThis field, along with node_type_id, should not be set if virtual_cluster_size is set.\nIf both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence.", + "required": false + }, + "enable_elastic_disk": { + "name": "enable_elastic_disk", + "type": "bool", + "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space.", + "required": false + }, + "enable_local_disk_encryption": { + "name": "enable_local_disk_encryption", + "type": "bool", + "description": "Whether to enable LUKS on cluster VMs' local disks", + "required": false + }, + "gcp_attributes": { + "name": "gcp_attributes", + "type": "any", + "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", + "required": false + }, + "init_scripts": { + "name": "init_scripts", + "type": "any", + "description": "The configuration for storing init scripts. Any number of destinations can be specified.\nThe scripts are executed sequentially in the order provided.\nIf `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "required": false + }, + "instance_pool_id": { + "name": "instance_pool_id", + "type": "string", + "description": "The optional ID of the instance pool to which the cluster belongs.", + "required": false + }, + "is_single_node": { + "name": "is_single_node", + "type": "bool", + "description": "This field can only be used when `kind = CLASSIC_PREVIEW`.\n\nWhen set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers`", + "required": false + }, + "kind": { + "name": "kind", + "type": "any", + "description": "The kind of compute described by this compute specification.\n\nDepending on `kind`, different validations and default values will be applied.\n\nClusters with `kind = CLASSIC_PREVIEW` support the following fields, whereas clusters with no specified `kind` do not.\n* [is_single_node](/api/workspace/clusters/create#is_single_node)\n* [use_ml_runtime](/api/workspace/clusters/create#use_ml_runtime)\n* [data_security_mode](/api/workspace/clusters/create#data_security_mode) set to `DATA_SECURITY_MODE_AUTO`, `DATA_SECURITY_MODE_DEDICATED`, or `DATA_SECURITY_MODE_STANDARD`\n\nBy using the [simple form](https://docs.databricks.com/compute/simple-form.html), your clusters are automatically using `kind = CLASSIC_PREVIEW`.", + "required": false + }, + "node_type_id": { + "name": "node_type_id", + "type": "string", + "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.", + "required": false + }, + "num_workers": { + "name": "num_workers", + "type": "any", + "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.", + "required": false + }, + "policy_id": { + "name": "policy_id", + "type": "string", + "description": "The ID of the cluster policy used to create the cluster if applicable.", + "required": false + }, + "remote_disk_throughput": { + "name": "remote_disk_throughput", + "type": "any", + "description": "If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks.", + "required": false + }, + "runtime_engine": { + "name": "runtime_engine", + "type": "any", + "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.", + "required": false + }, + "single_user_name": { + "name": "single_user_name", + "type": "string", + "description": "Single user name if data_security_mode is `SINGLE_USER`", + "required": false + }, + "spark_conf": { + "name": "spark_conf", + "type": "any", + "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.", + "required": false + }, + "spark_env_vars": { + "name": "spark_env_vars", + "type": "any", + "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", + "required": false + }, + "spark_version": { + "name": "spark_version", + "type": "any", + "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.", + "required": false + }, + "ssh_public_keys": { + "name": "ssh_public_keys", + "type": "any", + "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", + "required": false + }, + "total_initial_remote_disk_size": { + "name": "total_initial_remote_disk_size", + "type": "int", + "description": "If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks.", + "required": false + }, + "use_ml_runtime": { + "name": "use_ml_runtime", + "type": "any", + "description": "This field can only be used when `kind = CLASSIC_PREVIEW`.\n\n`effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not.", + "required": false + }, + "workload_type": { + "name": "workload_type", + "type": "any", + "description": "Cluster Attributes showing for clusters workload types.", + "required": false + } + } + }, + "bundle.DatabaseCatalog": { + "name": "DatabaseCatalog", + "package": "resources", + "description": "database catalog configuration.", + "fields": { + "create_database_if_not_exists": { + "name": "create_database_if_not_exists", + "type": "any", + "description": "", + "required": false + }, + "database_instance_name": { + "name": "database_instance_name", + "type": "string", + "description": "The name of the DatabaseInstance housing the database.", + "required": false + }, + "database_name": { + "name": "database_name", + "type": "string", + "description": "The name of the database (in a instance) associated with the catalog.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of the catalog in UC.", + "required": false + }, + "uid": { + "name": "uid", + "type": "any", + "description": "", + "required": false, + "output_only": true + } + } + }, + "bundle.DatabaseInstance": { + "name": "DatabaseInstance", + "package": "resources", + "description": "A DatabaseInstance represents a logical Postgres instance, comprised of both compute and storage.", + "fields": { + "capacity": { + "name": "capacity", + "type": "any", + "description": "The sku of the instance. Valid values are \"CU_1\", \"CU_2\", \"CU_4\", \"CU_8\".", + "required": false + }, + "child_instance_refs": { + "name": "child_instance_refs", + "type": "any", + "description": "The refs of the child instances. This is only available if the instance is\nparent instance.", + "required": false, + "output_only": true + }, + "creation_time": { + "name": "creation_time", + "type": "string (timestamp)", + "description": "The timestamp when the instance was created.", + "required": false, + "output_only": true + }, + "creator": { + "name": "creator", + "type": "any", + "description": "The email of the creator of the instance.", + "required": false, + "output_only": true + }, + "custom_tags": { + "name": "custom_tags", + "type": "map[string]string", + "description": "Custom tags associated with the instance. This field is only included on create and update responses.", + "required": false + }, + "effective_capacity": { + "name": "effective_capacity", + "type": "any", + "description": "Deprecated. The sku of the instance; this field will always match the value of capacity.", + "required": false, + "output_only": true, + "deprecated": true + }, + "effective_custom_tags": { + "name": "effective_custom_tags", + "type": "map[string]string", + "description": "The recorded custom tags associated with the instance.", + "required": false, + "output_only": true + }, + "effective_enable_pg_native_login": { + "name": "effective_enable_pg_native_login", + "type": "any", + "description": "Whether the instance has PG native password login enabled.", + "required": false, + "output_only": true + }, + "effective_enable_readable_secondaries": { + "name": "effective_enable_readable_secondaries", + "type": "any", + "description": "Whether secondaries serving read-only traffic are enabled. Defaults to false.", + "required": false, + "output_only": true + }, + "effective_node_count": { + "name": "effective_node_count", + "type": "int", + "description": "The number of nodes in the instance, composed of 1 primary and 0 or more secondaries. Defaults to\n1 primary and 0 secondaries.", + "required": false, + "output_only": true + }, + "effective_retention_window_in_days": { + "name": "effective_retention_window_in_days", + "type": "any", + "description": "The retention window for the instance. This is the time window in days\nfor which the historical data is retained.", + "required": false, + "output_only": true + }, + "effective_stopped": { + "name": "effective_stopped", + "type": "any", + "description": "Whether the instance is stopped.", + "required": false, + "output_only": true + }, + "effective_usage_policy_id": { + "name": "effective_usage_policy_id", + "type": "string", + "description": "The policy that is applied to the instance.", + "required": false, + "output_only": true + }, + "enable_pg_native_login": { + "name": "enable_pg_native_login", + "type": "bool", + "description": "Whether to enable PG native password login on the instance. Defaults to false.", + "required": false + }, + "enable_readable_secondaries": { + "name": "enable_readable_secondaries", + "type": "bool", + "description": "Whether to enable secondaries to serve read-only traffic. Defaults to false.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of the instance. This is the unique identifier for the instance.", + "required": false + }, + "node_count": { + "name": "node_count", + "type": "int", + "description": "The number of nodes in the instance, composed of 1 primary and 0 or more secondaries. Defaults to\n1 primary and 0 secondaries. This field is input only, see effective_node_count for the output.", + "required": false + }, + "parent_instance_ref": { + "name": "parent_instance_ref", + "type": "any", + "description": "The ref of the parent instance. This is only available if the instance is\nchild instance.\nInput: For specifying the parent instance to create a child instance. Optional.\nOutput: Only populated if provided as input to create a child instance.", + "required": false + }, + "pg_version": { + "name": "pg_version", + "type": "any", + "description": "The version of Postgres running on the instance.", + "required": false, + "output_only": true + }, + "read_only_dns": { + "name": "read_only_dns", + "type": "any", + "description": "The DNS endpoint to connect to the instance for read only access. This is only available if\nenable_readable_secondaries is true.", + "required": false, + "output_only": true + }, + "read_write_dns": { + "name": "read_write_dns", + "type": "any", + "description": "The DNS endpoint to connect to the instance for read+write access.", + "required": false, + "output_only": true + }, + "retention_window_in_days": { + "name": "retention_window_in_days", + "type": "any", + "description": "The retention window for the instance. This is the time window in days\nfor which the historical data is retained. The default value is 7 days.\nValid values are 2 to 35 days.", + "required": false + }, + "state": { + "name": "state", + "type": "any", + "description": "The current state of the instance.", + "required": false, + "output_only": true + }, + "stopped": { + "name": "stopped", + "type": "any", + "description": "Whether to stop the instance. An input only param, see effective_stopped for the output.", + "required": false + }, + "uid": { + "name": "uid", + "type": "any", + "description": "An immutable UUID identifier for the instance.", + "required": false, + "output_only": true + }, + "usage_policy_id": { + "name": "usage_policy_id", + "type": "string", + "description": "The desired usage policy to associate with the instance.", + "required": false + } + } + }, + "bundle.Job": { + "name": "Job", + "package": "resources", + "description": "job configuration.", + "fields": { + "budget_policy_id": { + "name": "budget_policy_id", + "type": "string", + "description": "The id of the user specified budget policy to use for this job.\nIf not specified, a default budget policy may be applied when creating or modifying the job.\nSee `effective_budget_policy_id` for the budget policy used by this workload.", + "required": false + }, + "continuous": { + "name": "continuous", + "type": "any", + "description": "An optional continuous property for this job. The continuous property will ensure that there is always one run executing. Only one of `schedule` and `continuous` can be used.", + "required": false + }, + "deployment": { + "name": "deployment", + "type": "any", + "description": "Deployment information for jobs managed by external sources.", + "required": false + }, + "description": { + "name": "description", + "type": "string", + "description": "An optional description for the job. The maximum length is 27700 characters in UTF-8 encoding.", + "required": false + }, + "edit_mode": { + "name": "edit_mode", + "type": "any", + "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", + "required": false + }, + "email_notifications": { + "name": "email_notifications", + "type": "any", + "description": "An optional set of email addresses that is notified when runs of this job begin or complete as well as when this job is deleted.", + "required": false + }, + "environments": { + "name": "environments", + "type": "any", + "description": "A list of task execution environment specifications that can be referenced by serverless tasks of this job.\nFor serverless notebook tasks, if the environment_key is not specified, the notebook environment will be used if present. If a jobs environment is specified, it will override the notebook environment.\nFor other serverless tasks, the task environment is required to be specified using environment_key in the task settings.", + "required": false + }, + "format": { + "name": "format", + "type": "any", + "description": "Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `\"MULTI_TASK\"`.", + "required": false, + "deprecated": true + }, + "git_source": { + "name": "git_source", + "type": "any", + "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", + "required": false + }, + "health": { + "name": "health", + "type": "any", + "description": "An optional set of health rules that can be defined for this job.", + "required": false + }, + "job_clusters": { + "name": "job_clusters", + "type": "any", + "description": "A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.", + "required": false + }, + "max_concurrent_runs": { + "name": "max_concurrent_runs", + "type": "any", + "description": "An optional maximum allowed number of concurrent runs of the job.\nSet this value if you want to be able to execute multiple runs of the same job concurrently.\nThis is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.\nThis setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs.\nHowever, from then on, new runs are skipped unless there are fewer than 3 active runs.\nThis value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "An optional name for the job. The maximum length is 4096 bytes in UTF-8 encoding.", + "required": false + }, + "notification_settings": { + "name": "notification_settings", + "type": "any", + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this job.", + "required": false + }, + "parameters": { + "name": "parameters", + "type": "any", + "description": "Job-level parameter definitions", + "required": false + }, + "performance_target": { + "name": "performance_target", + "type": "any", + "description": "The performance mode on a serverless job. This field determines the level of compute performance or cost-efficiency for the run.\nThe performance target does not apply to tasks that run on Serverless GPU compute.\n\n* `STANDARD`: Enables cost-efficient execution of serverless workloads.\n* `PERFORMANCE_OPTIMIZED`: Prioritizes fast startup and execution times through rapid scaling and optimized cluster performance.", + "required": false + }, + "queue": { + "name": "queue", + "type": "any", + "description": "The queue settings of the job.", + "required": false + }, + "run_as": { + "name": "run_as", + "type": "any", + "description": "The user or service principal that the job runs as, if specified in the request.\nThis field indicates the explicit configuration of `run_as` for the job.\nTo find the value in all cases, explicit or implicit, use `run_as_user_name`.", + "required": false + }, + "schedule": { + "name": "schedule", + "type": "any", + "description": "An optional periodic schedule for this job. The default behavior is that the job only runs when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", + "required": false + }, + "tags": { + "name": "tags", + "type": "map[string]string", + "description": "A map of tags associated with the job. These are forwarded to the cluster as cluster tags for jobs clusters, and are subject to the same limitations as cluster tags. A maximum of 25 tags can be added to the job.", + "required": false + }, + "tasks": { + "name": "tasks", + "type": "any", + "description": "A list of task specifications to be executed by this job.\nIt supports up to 1000 elements in write endpoints (:method:jobs/create, :method:jobs/reset, :method:jobs/update, :method:jobs/submit).\nRead endpoints return only 100 tasks. If more than 100 tasks are available, you can paginate through them using :method:jobs/get. Use the `next_page_token` field at the object root to determine if more results are available.", + "required": false + }, + "timeout_seconds": { + "name": "timeout_seconds", + "type": "int", + "description": "An optional timeout applied to each run of this job. A value of `0` means no timeout.", + "required": false + }, + "trigger": { + "name": "trigger", + "type": "any", + "description": "A configuration to trigger a run when certain conditions are met. The default behavior is that the job runs only when triggered by clicking “Run Now” in the Jobs UI or sending an API request to `runNow`.", + "required": false + }, + "usage_policy_id": { + "name": "usage_policy_id", + "type": "string", + "description": "The id of the user specified usage policy to use for this job.\nIf not specified, a default usage policy may be applied when creating or modifying the job.\nSee `effective_usage_policy_id` for the usage policy used by this workload.", + "required": false + }, + "webhook_notifications": { + "name": "webhook_notifications", + "type": "any", + "description": "A collection of system notification IDs to notify when runs of this job begin or complete.", + "required": false + } + } + }, + "bundle.MlflowExperiment": { + "name": "MlflowExperiment", + "package": "resources", + "description": "mlflow experiment configuration.", + "fields": { + "artifact_location": { + "name": "artifact_location", + "type": "any", + "description": "Location where all artifacts for the experiment are stored.\nIf not provided, the remote server will select an appropriate default.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "Experiment name.", + "required": false + }, + "tags": { + "name": "tags", + "type": "map[string]string", + "description": "A collection of tags to set on the experiment. Maximum tag size and number of tags per request\ndepends on the storage backend. All storage backends are guaranteed to support tag keys up\nto 250 bytes in size and tag values up to 5000 bytes in size. All storage backends are also\nguaranteed to support up to 20 tags per request.", + "required": false + } + } + }, + "bundle.MlflowModel": { + "name": "MlflowModel", + "package": "resources", + "description": "mlflow model configuration.", + "fields": { + "description": { + "name": "description", + "type": "string", + "description": "Optional description for registered model.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "Register models under this name", + "required": false + }, + "tags": { + "name": "tags", + "type": "map[string]string", + "description": "Additional metadata for registered model.", + "required": false + } + } + }, + "bundle.ModelServingEndpoint": { + "name": "ModelServingEndpoint", + "package": "resources", + "description": "model serving endpoint configuration.", + "fields": { + "ai_gateway": { + "name": "ai_gateway", + "type": "any", + "description": "The AI Gateway configuration for the serving endpoint. NOTE: External model, provisioned throughput, and pay-per-token endpoints are fully supported; agent endpoints currently only support inference tables.", + "required": false + }, + "budget_policy_id": { + "name": "budget_policy_id", + "type": "string", + "description": "The budget policy to be applied to the serving endpoint.", + "required": false + }, + "config": { + "name": "config", + "type": "any", + "description": "The core config of the serving endpoint.", + "required": false + }, + "description": { + "name": "description", + "type": "string", + "description": "", + "required": false + }, + "email_notifications": { + "name": "email_notifications", + "type": "any", + "description": "Email notification settings.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.", + "required": false + }, + "rate_limits": { + "name": "rate_limits", + "type": "any", + "description": "Rate limits to be applied to the serving endpoint. NOTE: this field is deprecated, please use AI Gateway to manage rate limits.", + "required": false, + "deprecated": true + }, + "route_optimized": { + "name": "route_optimized", + "type": "any", + "description": "Enable route optimization for the serving endpoint.", + "required": false + }, + "tags": { + "name": "tags", + "type": "map[string]string", + "description": "Tags to be attached to the serving endpoint and automatically propagated to billing logs.", + "required": false + } + } + }, + "bundle.Pipeline": { + "name": "Pipeline", + "package": "resources", + "description": "pipeline configuration.", + "fields": { + "allow_duplicate_names": { + "name": "allow_duplicate_names", + "type": "any", + "description": "If false, deployment will fail if name conflicts with that of another pipeline.", + "required": false + }, + "budget_policy_id": { + "name": "budget_policy_id", + "type": "string", + "description": "Budget policy of this pipeline.", + "required": false + }, + "catalog": { + "name": "catalog", + "type": "any", + "description": "A catalog in Unity Catalog to publish data from this pipeline to. If `target` is specified, tables in this pipeline are published to a `target` schema inside `catalog` (for example, `catalog`.`target`.`table`). If `target` is not specified, no data is published to Unity Catalog.", + "required": false + }, + "channel": { + "name": "channel", + "type": "any", + "description": "DLT Release Channel that specifies which version to use.", + "required": false + }, + "clusters": { + "name": "clusters", + "type": "any", + "description": "Cluster settings for this pipeline deployment.", + "required": false + }, + "configuration": { + "name": "configuration", + "type": "any", + "description": "String-String configuration for this pipeline execution.", + "required": false + }, + "continuous": { + "name": "continuous", + "type": "any", + "description": "Whether the pipeline is continuous or triggered. This replaces `trigger`.", + "required": false + }, + "deployment": { + "name": "deployment", + "type": "any", + "description": "Deployment type of this pipeline.", + "required": false + }, + "development": { + "name": "development", + "type": "any", + "description": "Whether the pipeline is in Development mode. Defaults to false.", + "required": false + }, + "dry_run": { + "name": "dry_run", + "type": "any", + "description": "", + "required": false + }, + "edition": { + "name": "edition", + "type": "any", + "description": "Pipeline product edition.", + "required": false + }, + "environment": { + "name": "environment", + "type": "any", + "description": "Environment specification for this pipeline used to install dependencies.", + "required": false + }, + "event_log": { + "name": "event_log", + "type": "any", + "description": "Event log configuration for this pipeline", + "required": false + }, + "filters": { + "name": "filters", + "type": "any", + "description": "Filters on which Pipeline packages to include in the deployed graph.", + "required": false + }, + "gateway_definition": { + "name": "gateway_definition", + "type": "any", + "description": "The definition of a gateway pipeline to support change data capture.", + "required": false + }, + "id": { + "name": "id", + "type": "any", + "description": "Unique identifier for this pipeline.", + "required": false + }, + "ingestion_definition": { + "name": "ingestion_definition", + "type": "any", + "description": "The configuration for a managed ingestion pipeline. These settings cannot be used with the 'libraries', 'schema', 'target', or 'catalog' settings.", + "required": false + }, + "libraries": { + "name": "libraries", + "type": "any", + "description": "Libraries or code needed by this deployment.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "Friendly identifier for this pipeline.", + "required": false + }, + "notifications": { + "name": "notifications", + "type": "any", + "description": "List of notification settings for this pipeline.", + "required": false + }, + "photon": { + "name": "photon", + "type": "any", + "description": "Whether Photon is enabled for this pipeline.", + "required": false + }, + "restart_window": { + "name": "restart_window", + "type": "any", + "description": "Restart window of this pipeline.", + "required": false + }, + "root_path": { + "name": "root_path", + "type": "string", + "description": "Root path for this pipeline.\nThis is used as the root directory when editing the pipeline in the Databricks user interface and it is\nadded to sys.path when executing Python sources during pipeline execution.", + "required": false + }, + "run_as": { + "name": "run_as", + "type": "any", + "description": "Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline.\n\nOnly `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown.", + "required": false + }, + "schema": { + "name": "schema", + "type": "any", + "description": "The default schema (database) where tables are read from or published to.", + "required": false + }, + "serverless": { + "name": "serverless", + "type": "any", + "description": "Whether serverless compute is enabled for this pipeline.", + "required": false + }, + "storage": { + "name": "storage", + "type": "any", + "description": "DBFS root directory for storing checkpoints and tables.", + "required": false + }, + "tags": { + "name": "tags", + "type": "map[string]string", + "description": "A map of tags associated with the pipeline.\nThese are forwarded to the cluster as cluster tags, and are therefore subject to the same limitations.\nA maximum of 25 tags can be added to the pipeline.", + "required": false + }, + "target": { + "name": "target", + "type": "any", + "description": "Target schema (database) to add tables in this pipeline to. Exactly one of `schema` or `target` must be specified. To publish to Unity Catalog, also specify `catalog`. This legacy field is deprecated for pipeline creation in favor of the `schema` field.", + "required": false, + "deprecated": true + }, + "trigger": { + "name": "trigger", + "type": "any", + "description": "Which pipeline trigger to use. Deprecated: Use `continuous` instead.", + "required": false, + "deprecated": true + }, + "usage_policy_id": { + "name": "usage_policy_id", + "type": "string", + "description": "Usage policy of this pipeline.", + "required": false + } + } + }, + "bundle.QualityMonitor": { + "name": "QualityMonitor", + "package": "resources", + "description": "quality monitor configuration.", + "fields": { + "assets_dir": { + "name": "assets_dir", + "type": "any", + "description": "[Create:REQ Update:IGN] Field for specifying the absolute path to a custom directory to store data-monitoring\nassets. Normally prepopulated to a default user location via UI and Python APIs.", + "required": false + }, + "baseline_table_name": { + "name": "baseline_table_name", + "type": "string", + "description": "[Create:OPT Update:OPT] Baseline table name.\nBaseline data is used to compute drift from the data in the monitored `table_name`.\nThe baseline table and the monitored table shall have the same schema.", + "required": false + }, + "custom_metrics": { + "name": "custom_metrics", + "type": "any", + "description": "[Create:OPT Update:OPT] Custom metrics.", + "required": false + }, + "data_classification_config": { + "name": "data_classification_config", + "type": "any", + "description": "[Create:OPT Update:OPT] Data classification related config.", + "required": false + }, + "inference_log": { + "name": "inference_log", + "type": "any", + "description": "", + "required": false + }, + "latest_monitor_failure_msg": { + "name": "latest_monitor_failure_msg", + "type": "any", + "description": "[Create:ERR Update:IGN] The latest error message for a monitor failure.", + "required": false + }, + "notifications": { + "name": "notifications", + "type": "any", + "description": "[Create:OPT Update:OPT] Field for specifying notification settings.", + "required": false + }, + "output_schema_name": { + "name": "output_schema_name", + "type": "string", + "description": "[Create:REQ Update:REQ] Schema where output tables are created. Needs to be in 2-level format {catalog}.{schema}", + "required": false + }, + "schedule": { + "name": "schedule", + "type": "any", + "description": "[Create:OPT Update:OPT] The monitor schedule.", + "required": false + }, + "skip_builtin_dashboard": { + "name": "skip_builtin_dashboard", + "type": "any", + "description": "Whether to skip creating a default dashboard summarizing data quality metrics.", + "required": false + }, + "slicing_exprs": { + "name": "slicing_exprs", + "type": "any", + "description": "[Create:OPT Update:OPT] List of column expressions to slice data with for targeted analysis. The data is grouped by\neach expression independently, resulting in a separate slice for each predicate and its\ncomplements. For example `slicing_exprs=[“col_1”, “col_2 \u003e 10”]` will generate the following\nslices: two slices for `col_2 \u003e 10` (True and False), and one slice per unique value in\n`col1`. For high-cardinality columns, only the top 100 unique values by frequency will\ngenerate slices.", + "required": false + }, + "snapshot": { + "name": "snapshot", + "type": "any", + "description": "Configuration for monitoring snapshot tables.", + "required": false + }, + "time_series": { + "name": "time_series", + "type": "any", + "description": "Configuration for monitoring time series tables.", + "required": false + }, + "warehouse_id": { + "name": "warehouse_id", + "type": "string", + "description": "Optional argument to specify the warehouse for dashboard creation. If not specified, the first running\nwarehouse will be used.", + "required": false + } + } + }, + "bundle.RegisteredModel": { + "name": "RegisteredModel", + "package": "resources", + "description": "registered model configuration.", + "fields": { + "aliases": { + "name": "aliases", + "type": "any", + "description": "List of aliases associated with the registered model", + "required": false + }, + "browse_only": { + "name": "browse_only", + "type": "any", + "description": "Indicates whether the principal is limited to retrieving metadata for the associated object through the BROWSE privilege when include_browse is enabled in the request.", + "required": false + }, + "catalog_name": { + "name": "catalog_name", + "type": "string", + "description": "The name of the catalog where the schema and the registered model reside", + "required": false + }, + "comment": { + "name": "comment", + "type": "any", + "description": "The comment attached to the registered model", + "required": false + }, + "created_at": { + "name": "created_at", + "type": "string (timestamp)", + "description": "Creation timestamp of the registered model in milliseconds since the Unix epoch", + "required": false + }, + "created_by": { + "name": "created_by", + "type": "any", + "description": "The identifier of the user who created the registered model", + "required": false + }, + "full_name": { + "name": "full_name", + "type": "string", + "description": "The three-level (fully qualified) name of the registered model", + "required": false + }, + "metastore_id": { + "name": "metastore_id", + "type": "string", + "description": "The unique identifier of the metastore", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of the registered model", + "required": false + }, + "owner": { + "name": "owner", + "type": "any", + "description": "The identifier of the user who owns the registered model", + "required": false + }, + "schema_name": { + "name": "schema_name", + "type": "string", + "description": "The name of the schema where the registered model resides", + "required": false + }, + "storage_location": { + "name": "storage_location", + "type": "any", + "description": "The storage location on the cloud under which model version data files are stored", + "required": false + }, + "updated_at": { + "name": "updated_at", + "type": "string (timestamp)", + "description": "Last-update timestamp of the registered model in milliseconds since the Unix epoch", + "required": false + }, + "updated_by": { + "name": "updated_by", + "type": "any", + "description": "The identifier of the user who updated the registered model last time", + "required": false + } + } + }, + "bundle.Schema": { + "name": "Schema", + "package": "resources", + "description": "schema configuration.", + "fields": { + "catalog_name": { + "name": "catalog_name", + "type": "string", + "description": "Name of parent catalog.", + "required": false + }, + "comment": { + "name": "comment", + "type": "any", + "description": "User-provided free-form text description.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "Name of schema, relative to parent catalog.", + "required": false + }, + "properties": { + "name": "properties", + "type": "any", + "description": "A map of key-value properties attached to the securable.", + "required": false + }, + "storage_root": { + "name": "storage_root", + "type": "any", + "description": "Storage root URL for managed tables within schema.", + "required": false + } + } + }, + "bundle.SqlWarehouse": { + "name": "SqlWarehouse", + "package": "resources", + "description": "Creates a new SQL warehouse.", + "fields": { + "auto_stop_mins": { + "name": "auto_stop_mins", + "type": "any", + "description": "The amount of time in minutes that a SQL warehouse must be idle (i.e., no\nRUNNING queries) before it is automatically stopped.\n\nSupported values:\n- Must be == 0 or \u003e= 10 mins\n- 0 indicates no autostop.\n\nDefaults to 120 mins", + "required": false + }, + "channel": { + "name": "channel", + "type": "any", + "description": "Channel Details", + "required": false + }, + "cluster_size": { + "name": "cluster_size", + "type": "int", + "description": "Size of the clusters allocated for this warehouse.\nIncreasing the size of a spark cluster allows you to run larger queries on\nit. If you want to increase the number of concurrent queries, please tune\nmax_num_clusters.\n\nSupported values:\n- 2X-Small\n- X-Small\n- Small\n- Medium\n- Large\n- X-Large\n- 2X-Large\n- 3X-Large\n- 4X-Large", + "required": false + }, + "creator_name": { + "name": "creator_name", + "type": "string", + "description": "warehouse creator name", + "required": false + }, + "enable_photon": { + "name": "enable_photon", + "type": "bool", + "description": "Configures whether the warehouse should use Photon optimized clusters.\n\nDefaults to false.", + "required": false + }, + "enable_serverless_compute": { + "name": "enable_serverless_compute", + "type": "bool", + "description": "Configures whether the warehouse should use serverless compute", + "required": false + }, + "instance_profile_arn": { + "name": "instance_profile_arn", + "type": "any", + "description": "Deprecated. Instance profile used to pass IAM role to the cluster", + "required": false, + "deprecated": true + }, + "max_num_clusters": { + "name": "max_num_clusters", + "type": "any", + "description": "Maximum number of clusters that the autoscaler will create to handle\nconcurrent queries.\n\nSupported values:\n- Must be \u003e= min_num_clusters\n- Must be \u003c= 40.\n\nDefaults to min_clusters if unset.", + "required": false + }, + "min_num_clusters": { + "name": "min_num_clusters", + "type": "any", + "description": "Minimum number of available clusters that will be maintained for this SQL\nwarehouse. Increasing this will ensure that a larger number of clusters are\nalways running and therefore may reduce the cold start time for new\nqueries. This is similar to reserved vs. revocable cores in a resource\nmanager.\n\nSupported values:\n- Must be \u003e 0\n- Must be \u003c= min(max_num_clusters, 30)\n\nDefaults to 1", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "Logical name for the cluster.\n\nSupported values:\n- Must be unique within an org.\n- Must be less than 100 characters.", + "required": false + }, + "spot_instance_policy": { + "name": "spot_instance_policy", + "type": "any", + "description": "Configurations whether the endpoint should use spot instances.", + "required": false + }, + "tags": { + "name": "tags", + "type": "map[string]string", + "description": "A set of key-value pairs that will be tagged on all resources (e.g., AWS instances and EBS volumes) associated\nwith this SQL warehouse.\n\nSupported values:\n- Number of tags \u003c 45.", + "required": false + }, + "warehouse_type": { + "name": "warehouse_type", + "type": "any", + "description": "Warehouse type: `PRO` or `CLASSIC`. If you want to use serverless compute,\nyou must set to `PRO` and also set the field `enable_serverless_compute` to `true`.", + "required": false + } + } + }, + "bundle.SyncedDatabaseTable": { + "name": "SyncedDatabaseTable", + "package": "resources", + "description": "Next field marker: 18", + "fields": { + "data_synchronization_status": { + "name": "data_synchronization_status", + "type": "any", + "description": "Synced Table data synchronization status", + "required": false, + "output_only": true + }, + "database_instance_name": { + "name": "database_instance_name", + "type": "string", + "description": "Name of the target database instance. This is required when creating synced database tables in standard catalogs.\nThis is optional when creating synced database tables in registered catalogs. If this field is specified\nwhen creating synced database tables in registered catalogs, the database instance name MUST\nmatch that of the registered catalog (or the request will be rejected).", + "required": false + }, + "effective_database_instance_name": { + "name": "effective_database_instance_name", + "type": "string", + "description": "The name of the database instance that this table is registered to. This field is always returned, and for\ntables inside database catalogs is inferred database instance associated with the catalog.", + "required": false, + "output_only": true + }, + "effective_logical_database_name": { + "name": "effective_logical_database_name", + "type": "string", + "description": "The name of the logical database that this table is registered to.", + "required": false, + "output_only": true + }, + "logical_database_name": { + "name": "logical_database_name", + "type": "string", + "description": "Target Postgres database object (logical database) name for this table.\n\nWhen creating a synced table in a registered Postgres catalog, the\ntarget Postgres database name is inferred to be that of the registered catalog.\nIf this field is specified in this scenario, the Postgres database name MUST\nmatch that of the registered catalog (or the request will be rejected).\n\nWhen creating a synced table in a standard catalog, this field is required.\nIn this scenario, specifying this field will allow targeting an arbitrary postgres database.\nNote that this has implications for the `create_database_objects_is_missing` field in `spec`.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "Full three-part (catalog, schema, table) name of the table.", + "required": false + }, + "spec": { + "name": "spec", + "type": "any", + "description": "Specification of a synced database table.", + "required": false + }, + "unity_catalog_provisioning_state": { + "name": "unity_catalog_provisioning_state", + "type": "any", + "description": "The provisioning state of the synced table entity in Unity Catalog. This is distinct from the\nstate of the data synchronization pipeline (i.e. the table may be in \"ACTIVE\" but the pipeline\nmay be in \"PROVISIONING\" as it runs asynchronously).", + "required": false, + "output_only": true + } + } + }, + "bundle.Volume": { + "name": "Volume", + "package": "resources", + "description": "volume configuration.", + "fields": { + "catalog_name": { + "name": "catalog_name", + "type": "string", + "description": "The name of the catalog where the schema and the volume are", + "required": false + }, + "comment": { + "name": "comment", + "type": "any", + "description": "The comment attached to the volume", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of the volume", + "required": false + }, + "schema_name": { + "name": "schema_name", + "type": "string", + "description": "The name of the schema where the volume is", + "required": false + }, + "storage_location": { + "name": "storage_location", + "type": "any", + "description": "The storage location on the cloud", + "required": false + }, + "volume_type": { + "name": "volume_type", + "type": "any", + "description": "The type of the volume. An external volume is located in the specified external location.\nA managed volume is located in the default location which is specified by the parent schema, or the parent catalog, or the Metastore.\n[Learn more](https://docs.databricks.com/aws/en/volumes/managed-vs-external)", + "required": false + } + } + }, + "catalog.MonitorCronSchedule": { + "name": "MonitorCronSchedule", + "package": "catalog", + "description": "monitor cron schedule configuration.", + "fields": { + "pause_status": { + "name": "pause_status", + "type": "any", + "description": "Read only field that indicates whether a schedule is paused or not.", + "required": false + }, + "quartz_cron_expression": { + "name": "quartz_cron_expression", + "type": "any", + "description": "The expression that determines when to run the monitor. See [examples](https://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html).", + "required": false + }, + "timezone_id": { + "name": "timezone_id", + "type": "string", + "description": "The timezone id (e.g., ``PST``) in which to evaluate the quartz expression.", + "required": false + } + } + }, + "catalog.MonitorCronSchedulePauseStatus": { + "name": "MonitorCronSchedulePauseStatus", + "package": "catalog", + "description": "Source link: https://src.dev.databricks.com/databricks/universe/-/blob/elastic-spark-common/api/messages/schedule.proto\nMonitoring workflow schedule pause status.", + "fields": {} + }, + "catalog.MonitorDataClassificationConfig": { + "name": "MonitorDataClassificationConfig", + "package": "catalog", + "description": "Data classification related configuration.", + "fields": { + "enabled": { + "name": "enabled", + "type": "bool", + "description": "Whether to enable data classification.", + "required": false + } + } + }, + "catalog.MonitorDestination": { + "name": "MonitorDestination", + "package": "catalog", + "description": "monitor destination configuration.", + "fields": { + "email_addresses": { + "name": "email_addresses", + "type": "any", + "description": "The list of email addresses to send the notification to. A maximum of 5 email addresses is supported.", + "required": false + } + } + }, + "catalog.MonitorInferenceLog": { + "name": "MonitorInferenceLog", + "package": "catalog", + "description": "monitor inference log configuration.", + "fields": { + "granularities": { + "name": "granularities", + "type": "any", + "description": "List of granularities to use when aggregating data into time windows based on their timestamp.", + "required": false + }, + "label_col": { + "name": "label_col", + "type": "any", + "description": "Column for the label.", + "required": false + }, + "model_id_col": { + "name": "model_id_col", + "type": "any", + "description": "Column for the model identifier.", + "required": false + }, + "prediction_col": { + "name": "prediction_col", + "type": "any", + "description": "Column for the prediction.", + "required": false + }, + "prediction_proba_col": { + "name": "prediction_proba_col", + "type": "any", + "description": "Column for prediction probabilities", + "required": false + }, + "problem_type": { + "name": "problem_type", + "type": "any", + "description": "Problem type the model aims to solve.", + "required": false + }, + "timestamp_col": { + "name": "timestamp_col", + "type": "any", + "description": "Column for the timestamp.", + "required": false + } + } + }, + "catalog.MonitorInferenceLogProblemType": { + "name": "MonitorInferenceLogProblemType", + "package": "catalog", + "description": "monitor inference log problem type configuration.", + "fields": {} + }, + "catalog.MonitorMetric": { + "name": "MonitorMetric", + "package": "catalog", + "description": "Custom metric definition.", + "fields": { + "definition": { + "name": "definition", + "type": "any", + "description": "Jinja template for a SQL expression that specifies how to compute the metric. See [create metric definition](https://docs.databricks.com/en/lakehouse-monitoring/custom-metrics.html#create-definition).", + "required": false + }, + "input_columns": { + "name": "input_columns", + "type": "any", + "description": "A list of column names in the input table the metric should be computed for.\nCan use ``\":table\"`` to indicate that the metric needs information from multiple columns.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "Name of the metric in the output tables.", + "required": false + }, + "output_data_type": { + "name": "output_data_type", + "type": "any", + "description": "The output type of the custom metric.", + "required": false + }, + "type": { + "name": "type", + "type": "any", + "description": "Can only be one of ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"``, ``\"CUSTOM_METRIC_TYPE_DERIVED\"``, or ``\"CUSTOM_METRIC_TYPE_DRIFT\"``.\nThe ``\"CUSTOM_METRIC_TYPE_AGGREGATE\"`` and ``\"CUSTOM_METRIC_TYPE_DERIVED\"`` metrics\nare computed on a single table, whereas the ``\"CUSTOM_METRIC_TYPE_DRIFT\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics", + "required": false + } + } + }, + "catalog.MonitorMetricType": { + "name": "MonitorMetricType", + "package": "catalog", + "description": "Can only be one of ``\\\"CUSTOM_METRIC_TYPE_AGGREGATE\\\"``, ``\\\"CUSTOM_METRIC_TYPE_DERIVED\\\"``, or ``\\\"CUSTOM_METRIC_TYPE_DRIFT\\\"``.\nThe ``\\\"CUSTOM_METRIC_TYPE_AGGREGATE\\\"`` and ``\\\"CUSTOM_METRIC_TYPE_DERIVED\\\"`` metrics\nare computed on a single table, whereas the ``\\\"CUSTOM_METRIC_TYPE_DRIFT\\\"`` compare metrics across\nbaseline and input table, or across the two consecutive time windows.\n- CUSTOM_METRIC_TYPE_AGGREGATE: only depend on the existing columns in your table\n- CUSTOM_METRIC_TYPE_DERIVED: depend on previously computed aggregate metrics\n- CUSTOM_METRIC_TYPE_DRIFT: depend on previously computed aggregate or derived metrics", + "fields": {} + }, + "catalog.MonitorNotifications": { + "name": "MonitorNotifications", + "package": "catalog", + "description": "monitor notifications configuration.", + "fields": { + "on_failure": { + "name": "on_failure", + "type": "any", + "description": "Destinations to send notifications on failure/timeout.", + "required": false + }, + "on_new_classification_tag_detected": { + "name": "on_new_classification_tag_detected", + "type": "any", + "description": "Destinations to send notifications on new classification tag detected.", + "required": false + } + } + }, + "catalog.MonitorSnapshot": { + "name": "MonitorSnapshot", + "package": "catalog", + "description": "Snapshot analysis configuration", + "fields": {} + }, + "catalog.MonitorTimeSeries": { + "name": "MonitorTimeSeries", + "package": "catalog", + "description": "Time series analysis configuration.", + "fields": { + "granularities": { + "name": "granularities", + "type": "any", + "description": "Granularities for aggregating data into time windows based on their timestamp. Currently the following static\ngranularities are supported:\n{``\\\"5 minutes\\\"``, ``\\\"30 minutes\\\"``, ``\\\"1 hour\\\"``, ``\\\"1 day\\\"``, ``\\\"\\u003cn\\u003e week(s)\\\"``, ``\\\"1 month\\\"``, ``\\\"1 year\\\"``}.", + "required": false + }, + "timestamp_col": { + "name": "timestamp_col", + "type": "any", + "description": "Column for the timestamp.", + "required": false + } + } + }, + "catalog.RegisteredModelAlias": { + "name": "RegisteredModelAlias", + "package": "catalog", + "description": "registered model alias configuration.", + "fields": { + "alias_name": { + "name": "alias_name", + "type": "string", + "description": "Name of the alias, e.g. 'champion' or 'latest_stable'", + "required": false + }, + "catalog_name": { + "name": "catalog_name", + "type": "string", + "description": "The name of the catalog containing the model version", + "required": false + }, + "id": { + "name": "id", + "type": "any", + "description": "The unique identifier of the alias", + "required": false + }, + "model_name": { + "name": "model_name", + "type": "string", + "description": "The name of the parent registered model of the model version, relative to parent schema", + "required": false + }, + "schema_name": { + "name": "schema_name", + "type": "string", + "description": "The name of the schema containing the model version, relative to parent catalog", + "required": false + }, + "version_num": { + "name": "version_num", + "type": "any", + "description": "Integer version number of the model version to which this alias points.", + "required": false + } + } + }, + "catalog.VolumeType": { + "name": "VolumeType", + "package": "catalog", + "description": "volume type configuration.", + "fields": {} + }, + "compute.Adlsgen2Info": { + "name": "Adlsgen2Info", + "package": "compute", + "description": "A storage location in Adls Gen2", + "fields": { + "destination": { + "name": "destination", + "type": "any", + "description": "abfss destination, e.g. `abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`.", + "required": false + } + } + }, + "compute.AutoScale": { + "name": "AutoScale", + "package": "compute", + "description": "auto scale configuration.", + "fields": { + "max_workers": { + "name": "max_workers", + "type": "any", + "description": "The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`.", + "required": false + }, + "min_workers": { + "name": "min_workers", + "type": "any", + "description": "The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation.", + "required": false + } + } + }, + "compute.AwsAttributes": { + "name": "AwsAttributes", + "package": "compute", + "description": "Attributes set during cluster creation which are related to Amazon Web Services.", + "fields": { + "availability": { + "name": "availability", + "type": "any", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.", + "required": false + }, + "ebs_volume_count": { + "name": "ebs_volume_count", + "type": "int", + "description": "The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden.", + "required": false + }, + "ebs_volume_iops": { + "name": "ebs_volume_iops", + "type": "any", + "description": "If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.", + "required": false + }, + "ebs_volume_size": { + "name": "ebs_volume_size", + "type": "int", + "description": "The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096.", + "required": false + }, + "ebs_volume_throughput": { + "name": "ebs_volume_throughput", + "type": "any", + "description": "If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.", + "required": false + }, + "ebs_volume_type": { + "name": "ebs_volume_type", + "type": "any", + "description": "The type of EBS volumes that will be launched with this cluster.", + "required": false + }, + "first_on_demand": { + "name": "first_on_demand", + "type": "any", + "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.", + "required": false + }, + "instance_profile_arn": { + "name": "instance_profile_arn", + "type": "any", + "description": "Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.", + "required": false + }, + "spot_bid_price_percent": { + "name": "spot_bid_price_percent", + "type": "any", + "description": "The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.", + "required": false + }, + "zone_id": { + "name": "zone_id", + "type": "string", + "description": "Identifier for the availability zone/datacenter in which the cluster resides.\nThis string will be of a form like \"us-west-2a\". The provided availability\nzone must be in the same region as the Databricks deployment. For example, \"us-west-2a\"\nis not a valid zone id if the Databricks deployment resides in the \"us-east-1\" region.\nThis is an optional field at cluster creation, and if not specified, the zone \"auto\" will be used.\nIf the zone specified is \"auto\", will try to place cluster in a zone with high availability,\nand will retry placement in a different AZ if there is not enough capacity.\n\nThe list of available zones as well as the default value can be found by using the\n`List Zones` method.", + "required": false + } + } + }, + "compute.AwsAvailability": { + "name": "AwsAvailability", + "package": "compute", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\n\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.", + "fields": {} + }, + "compute.AzureAttributes": { + "name": "AzureAttributes", + "package": "compute", + "description": "Attributes set during cluster creation which are related to Microsoft Azure.", + "fields": { + "availability": { + "name": "availability", + "type": "any", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero, this availability\ntype will be used for the entire cluster.", + "required": false + }, + "first_on_demand": { + "name": "first_on_demand", + "type": "any", + "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.", + "required": false + }, + "log_analytics_info": { + "name": "log_analytics_info", + "type": "any", + "description": "Defines values necessary to configure and run Azure Log Analytics agent", + "required": false + }, + "spot_bid_max_price": { + "name": "spot_bid_max_price", + "type": "any", + "description": "The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should \u003e 0 or -1.", + "required": false + } + } + }, + "compute.AzureAvailability": { + "name": "AzureAvailability", + "package": "compute", + "description": "Availability type used for all subsequent nodes past the `first_on_demand` ones.\nNote: If `first_on_demand` is zero, this availability type will be used for the entire cluster.", + "fields": {} + }, + "compute.ClientsTypes": { + "name": "ClientsTypes", + "package": "compute", + "description": "clients types configuration.", + "fields": { + "jobs": { + "name": "jobs", + "type": "any", + "description": "With jobs set, the cluster can be used for jobs", + "required": false + }, + "notebooks": { + "name": "notebooks", + "type": "any", + "description": "With notebooks set, this cluster can be used for notebooks", + "required": false + } + } + }, + "compute.ClusterLogConf": { + "name": "ClusterLogConf", + "package": "compute", + "description": "Cluster log delivery config", + "fields": { + "dbfs": { + "name": "dbfs", + "type": "any", + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\" : { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "required": false + }, + "s3": { + "name": "s3", + "type": "any", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \"s3\": { \"destination\" : \"s3://cluster_log_bucket/prefix\", \"region\" : \"us-west-2\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "required": false + }, + "volumes": { + "name": "volumes", + "type": "any", + "description": "destination needs to be provided, e.g.\n`{ \"volumes\": { \"destination\": \"/Volumes/catalog/schema/volume/cluster_log\" } }`", + "required": false + } + } + }, + "compute.ClusterSpec": { + "name": "ClusterSpec", + "package": "compute", + "description": "Contains a snapshot of the latest user specified settings that were used to create/edit the cluster.", + "fields": { + "apply_policy_default_values": { + "name": "apply_policy_default_values", + "type": "any", + "description": "When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied.", + "required": false + }, + "autoscale": { + "name": "autoscale", + "type": "any", + "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", + "required": false + }, + "autotermination_minutes": { + "name": "autotermination_minutes", + "type": "int", + "description": "Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination.", + "required": false + }, + "aws_attributes": { + "name": "aws_attributes", + "type": "any", + "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "required": false + }, + "azure_attributes": { + "name": "azure_attributes", + "type": "any", + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", + "required": false + }, + "cluster_log_conf": { + "name": "cluster_log_conf", + "type": "any", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nThree kinds of destinations (DBFS, S3 and Unity Catalog volumes) are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "required": false + }, + "cluster_name": { + "name": "cluster_name", + "type": "string", + "description": "Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\nFor job clusters, the cluster name is automatically set based on the job and job run IDs.", + "required": false + }, + "custom_tags": { + "name": "custom_tags", + "type": "map[string]string", + "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", + "required": false + }, + "data_security_mode": { + "name": "data_security_mode", + "type": "any", + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used when `kind = CLASSIC_PREVIEW`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.", + "required": false + }, + "docker_image": { + "name": "docker_image", + "type": "any", + "description": "Custom docker image BYOC", + "required": false + }, + "driver_instance_pool_id": { + "name": "driver_instance_pool_id", + "type": "string", + "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned.", + "required": false + }, + "driver_node_type_id": { + "name": "driver_node_type_id", + "type": "string", + "description": "The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n\nThis field, along with node_type_id, should not be set if virtual_cluster_size is set.\nIf both driver_node_type_id, node_type_id, and virtual_cluster_size are specified, driver_node_type_id and node_type_id take precedence.", + "required": false + }, + "enable_elastic_disk": { + "name": "enable_elastic_disk", + "type": "bool", + "description": "Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space.", + "required": false + }, + "enable_local_disk_encryption": { + "name": "enable_local_disk_encryption", + "type": "bool", + "description": "Whether to enable LUKS on cluster VMs' local disks", + "required": false + }, + "gcp_attributes": { + "name": "gcp_attributes", + "type": "any", + "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", + "required": false + }, + "init_scripts": { + "name": "init_scripts", + "type": "any", + "description": "The configuration for storing init scripts. Any number of destinations can be specified.\nThe scripts are executed sequentially in the order provided.\nIf `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "required": false + }, + "instance_pool_id": { + "name": "instance_pool_id", + "type": "string", + "description": "The optional ID of the instance pool to which the cluster belongs.", + "required": false + }, + "is_single_node": { + "name": "is_single_node", + "type": "bool", + "description": "This field can only be used when `kind = CLASSIC_PREVIEW`.\n\nWhen set to true, Databricks will automatically set single node related `custom_tags`, `spark_conf`, and `num_workers`", + "required": false + }, + "kind": { + "name": "kind", + "type": "any", + "description": "The kind of compute described by this compute specification.\n\nDepending on `kind`, different validations and default values will be applied.\n\nClusters with `kind = CLASSIC_PREVIEW` support the following fields, whereas clusters with no specified `kind` do not.\n* [is_single_node](/api/workspace/clusters/create#is_single_node)\n* [use_ml_runtime](/api/workspace/clusters/create#use_ml_runtime)\n* [data_security_mode](/api/workspace/clusters/create#data_security_mode) set to `DATA_SECURITY_MODE_AUTO`, `DATA_SECURITY_MODE_DEDICATED`, or `DATA_SECURITY_MODE_STANDARD`\n\nBy using the [simple form](https://docs.databricks.com/compute/simple-form.html), your clusters are automatically using `kind = CLASSIC_PREVIEW`.", + "required": false + }, + "node_type_id": { + "name": "node_type_id", + "type": "string", + "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.", + "required": false + }, + "num_workers": { + "name": "num_workers", + "type": "any", + "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.", + "required": false + }, + "policy_id": { + "name": "policy_id", + "type": "string", + "description": "The ID of the cluster policy used to create the cluster if applicable.", + "required": false + }, + "remote_disk_throughput": { + "name": "remote_disk_throughput", + "type": "any", + "description": "If set, what the configurable throughput (in Mb/s) for the remote disk is. Currently only supported for GCP HYPERDISK_BALANCED disks.", + "required": false + }, + "runtime_engine": { + "name": "runtime_engine", + "type": "any", + "description": "Determines the cluster's runtime engine, either standard or Photon.\n\nThis field is not compatible with legacy `spark_version` values that contain `-photon-`.\nRemove `-photon-` from the `spark_version` and set `runtime_engine` to `PHOTON`.\n\nIf left unspecified, the runtime engine defaults to standard unless the spark_version\ncontains -photon-, in which case Photon will be used.", + "required": false + }, + "single_user_name": { + "name": "single_user_name", + "type": "string", + "description": "Single user name if data_security_mode is `SINGLE_USER`", + "required": false + }, + "spark_conf": { + "name": "spark_conf", + "type": "any", + "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nUsers can also pass in a string of extra JVM options to the driver and the executors via\n`spark.driver.extraJavaOptions` and `spark.executor.extraJavaOptions` respectively.", + "required": false + }, + "spark_env_vars": { + "name": "spark_env_vars", + "type": "any", + "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", + "required": false + }, + "spark_version": { + "name": "spark_version", + "type": "any", + "description": "The Spark version of the cluster, e.g. `3.3.x-scala2.11`.\nA list of available Spark versions can be retrieved by using\nthe :method:clusters/sparkVersions API call.", + "required": false + }, + "ssh_public_keys": { + "name": "ssh_public_keys", + "type": "any", + "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", + "required": false + }, + "total_initial_remote_disk_size": { + "name": "total_initial_remote_disk_size", + "type": "int", + "description": "If set, what the total initial volume size (in GB) of the remote disks should be. Currently only supported for GCP HYPERDISK_BALANCED disks.", + "required": false + }, + "use_ml_runtime": { + "name": "use_ml_runtime", + "type": "any", + "description": "This field can only be used when `kind = CLASSIC_PREVIEW`.\n\n`effective_spark_version` is determined by `spark_version` (DBR release), this field `use_ml_runtime`, and whether `node_type_id` is gpu node or not.", + "required": false + }, + "workload_type": { + "name": "workload_type", + "type": "any", + "description": "Cluster Attributes showing for clusters workload types.", + "required": false + } + } + }, + "compute.DataSecurityMode": { + "name": "DataSecurityMode", + "package": "compute", + "description": "Data security mode decides what data governance model to use when accessing data\nfrom a cluster.\n\nThe following modes can only be used when `kind = CLASSIC_PREVIEW`.\n* `DATA_SECURITY_MODE_AUTO`: Databricks will choose the most appropriate access mode depending on your compute configuration.\n* `DATA_SECURITY_MODE_STANDARD`: Alias for `USER_ISOLATION`.\n* `DATA_SECURITY_MODE_DEDICATED`: Alias for `SINGLE_USER`.\n\nThe following modes can be used regardless of `kind`.\n* `NONE`: No security isolation for multiple users sharing the cluster. Data governance features are not available in this mode.\n* `SINGLE_USER`: A secure cluster that can only be exclusively used by a single user specified in `single_user_name`. Most programming languages, cluster features and data governance features are available in this mode.\n* `USER_ISOLATION`: A secure cluster that can be shared by multiple users. Cluster users are fully isolated so that they cannot see each other's data and credentials. Most data governance features are supported in this mode. But programming languages and cluster features might be limited.\n\nThe following modes are deprecated starting with Databricks Runtime 15.0 and\nwill be removed for future Databricks Runtime versions:\n\n* `LEGACY_TABLE_ACL`: This mode is for users migrating from legacy Table ACL clusters.\n* `LEGACY_PASSTHROUGH`: This mode is for users migrating from legacy Passthrough on high concurrency clusters.\n* `LEGACY_SINGLE_USER`: This mode is for users migrating from legacy Passthrough on standard clusters.\n* `LEGACY_SINGLE_USER_STANDARD`: This mode provides a way that doesn’t have UC nor passthrough enabled.", + "fields": {} + }, + "compute.DbfsStorageInfo": { + "name": "DbfsStorageInfo", + "package": "compute", + "description": "A storage location in DBFS", + "fields": { + "destination": { + "name": "destination", + "type": "any", + "description": "dbfs destination, e.g. `dbfs:/my/path`", + "required": false + } + } + }, + "compute.DockerBasicAuth": { + "name": "DockerBasicAuth", + "package": "compute", + "description": "docker basic auth configuration.", + "fields": { + "password": { + "name": "password", + "type": "any", + "description": "Password of the user", + "required": false + }, + "username": { + "name": "username", + "type": "any", + "description": "Name of the user", + "required": false + } + } + }, + "compute.DockerImage": { + "name": "DockerImage", + "package": "compute", + "description": "docker image configuration.", + "fields": { + "basic_auth": { + "name": "basic_auth", + "type": "any", + "description": "Basic auth with username and password", + "required": false + }, + "url": { + "name": "url", + "type": "any", + "description": "URL of the docker image.", + "required": false + } + } + }, + "compute.EbsVolumeType": { + "name": "EbsVolumeType", + "package": "compute", + "description": "All EBS volume types that Databricks supports.\nSee https://aws.amazon.com/ebs/details/ for details.", + "fields": {} + }, + "compute.Environment": { + "name": "Environment", + "package": "compute", + "description": "The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines.\nIn this minimal environment spec, only pip dependencies are supported.", + "fields": { + "client": { + "name": "client", + "type": "any", + "description": "Use `environment_version` instead.", + "required": false, + "deprecated": true + }, + "dependencies": { + "name": "dependencies", + "type": "any", + "description": "List of pip dependencies, as supported by the version of pip in this environment.\nEach dependency is a valid pip requirements file line per https://pip.pypa.io/en/stable/reference/requirements-file-format/.\nAllowed dependencies include a requirement specifier, an archive URL, a local project path (such as WSFS or UC Volumes in Databricks), or a VCS project URL.", + "required": false + }, + "environment_version": { + "name": "environment_version", + "type": "any", + "description": "Required. Environment version used by the environment.\nEach version comes with a specific Python version and a set of Python packages.\nThe version is a string, consisting of an integer.", + "required": false + }, + "java_dependencies": { + "name": "java_dependencies", + "type": "any", + "description": "List of java dependencies. Each dependency is a string representing a java library path. For example: `/Volumes/path/to/test.jar`.", + "required": false + } + } + }, + "compute.GcpAttributes": { + "name": "GcpAttributes", + "package": "compute", + "description": "Attributes set during cluster creation which are related to GCP.", + "fields": { + "availability": { + "name": "availability", + "type": "any", + "description": "This field determines whether the spark executors will be scheduled to run on preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", + "required": false + }, + "boot_disk_size": { + "name": "boot_disk_size", + "type": "int", + "description": "Boot disk size in GB", + "required": false + }, + "first_on_demand": { + "name": "first_on_demand", + "type": "any", + "description": "The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.", + "required": false + }, + "google_service_account": { + "name": "google_service_account", + "type": "any", + "description": "If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator.", + "required": false + }, + "local_ssd_count": { + "name": "local_ssd_count", + "type": "int", + "description": "If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached.\nEach local SSD is 375GB in size.\nRefer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds)\nfor the supported number of local SSDs for each instance type.", + "required": false + }, + "use_preemptible_executors": { + "name": "use_preemptible_executors", + "type": "any", + "description": "This field determines whether the spark executors will be scheduled to run on preemptible\nVMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the 'availability' field instead.", + "required": false, + "deprecated": true + }, + "zone_id": { + "name": "zone_id", + "type": "string", + "description": "Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- \"HA\" =\u003e High availability, spread nodes across availability zones for a Databricks deployment region [default].\n- \"AUTO\" =\u003e Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone =\u003e Pick One of the available zones for (machine type + region) from\nhttps://cloud.google.com/compute/docs/regions-zones.", + "required": false + } + } + }, + "compute.GcpAvailability": { + "name": "GcpAvailability", + "package": "compute", + "description": "This field determines whether the instance pool will contain preemptible\nVMs, on-demand VMs, or preemptible VMs with a fallback to on-demand VMs if the former is unavailable.", + "fields": {} + }, + "compute.GcsStorageInfo": { + "name": "GcsStorageInfo", + "package": "compute", + "description": "A storage location in Google Cloud Platform's GCS", + "fields": { + "destination": { + "name": "destination", + "type": "any", + "description": "GCS destination/URI, e.g. `gs://my-bucket/some-prefix`", + "required": false + } + } + }, + "compute.InitScriptInfo": { + "name": "InitScriptInfo", + "package": "compute", + "description": "Config for an individual init script\nNext ID: 11", + "fields": { + "abfss": { + "name": "abfss", + "type": "any", + "description": "destination needs to be provided, e.g.\n`abfss://\u003ccontainer-name\u003e@\u003cstorage-account-name\u003e.dfs.core.windows.net/\u003cdirectory-name\u003e`", + "required": false + }, + "dbfs": { + "name": "dbfs", + "type": "any", + "description": "destination needs to be provided. e.g.\n`{ \"dbfs\": { \"destination\" : \"dbfs:/home/cluster_log\" } }`", + "required": false, + "deprecated": true + }, + "file": { + "name": "file", + "type": "any", + "description": "destination needs to be provided, e.g.\n`{ \"file\": { \"destination\": \"file:/my/local/file.sh\" } }`", + "required": false + }, + "gcs": { + "name": "gcs", + "type": "any", + "description": "destination needs to be provided, e.g.\n`{ \"gcs\": { \"destination\": \"gs://my-bucket/file.sh\" } }`", + "required": false + }, + "s3": { + "name": "s3", + "type": "any", + "description": "destination and either the region or endpoint need to be provided. e.g.\n`{ \\\"s3\\\": { \\\"destination\\\": \\\"s3://cluster_log_bucket/prefix\\\", \\\"region\\\": \\\"us-west-2\\\" } }`\nCluster iam role is used to access s3, please make sure the cluster iam role in\n`instance_profile_arn` has permission to write data to the s3 destination.", + "required": false + }, + "volumes": { + "name": "volumes", + "type": "any", + "description": "destination needs to be provided. e.g.\n`{ \\\"volumes\\\" : { \\\"destination\\\" : \\\"/Volumes/my-init.sh\\\" } }`", + "required": false + }, + "workspace": { + "name": "workspace", + "type": "any", + "description": "destination needs to be provided, e.g.\n`{ \"workspace\": { \"destination\": \"/cluster-init-scripts/setup-datadog.sh\" } }`", + "required": false + } + } + }, + "compute.Library": { + "name": "Library", + "package": "compute", + "description": "library configuration.", + "fields": { + "cran": { + "name": "cran", + "type": "any", + "description": "Specification of a CRAN library to be installed as part of the library", + "required": false + }, + "egg": { + "name": "egg", + "type": "any", + "description": "Deprecated. URI of the egg library to install. Installing Python egg files is deprecated and is not supported in Databricks Runtime 14.0 and above.", + "required": false, + "deprecated": true + }, + "jar": { + "name": "jar", + "type": "any", + "description": "URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"jar\": \"/Workspace/path/to/library.jar\" }`, `{ \"jar\" : \"/Volumes/path/to/library.jar\" }` or\n`{ \"jar\": \"s3://my-bucket/library.jar\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.", + "required": false + }, + "maven": { + "name": "maven", + "type": "any", + "description": "Specification of a maven library to be installed. For example:\n`{ \"coordinates\": \"org.jsoup:jsoup:1.7.2\" }`", + "required": false + }, + "pypi": { + "name": "pypi", + "type": "any", + "description": "Specification of a PyPi library to be installed. For example:\n`{ \"package\": \"simplejson\" }`", + "required": false + }, + "requirements": { + "name": "requirements", + "type": "any", + "description": "URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.\nFor example: `{ \"requirements\": \"/Workspace/path/to/requirements.txt\" }` or `{ \"requirements\" : \"/Volumes/path/to/requirements.txt\" }`", + "required": false + }, + "whl": { + "name": "whl", + "type": "any", + "description": "URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ \"whl\": \"/Workspace/path/to/library.whl\" }`, `{ \"whl\" : \"/Volumes/path/to/library.whl\" }` or\n`{ \"whl\": \"s3://my-bucket/library.whl\" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.", + "required": false + } + } + }, + "compute.LocalFileInfo": { + "name": "LocalFileInfo", + "package": "compute", + "description": "local file info configuration.", + "fields": { + "destination": { + "name": "destination", + "type": "any", + "description": "local file destination, e.g. `file:/my/local/file.sh`", + "required": false + } + } + }, + "compute.LogAnalyticsInfo": { + "name": "LogAnalyticsInfo", + "package": "compute", + "description": "log analytics info configuration.", + "fields": { + "log_analytics_primary_key": { + "name": "log_analytics_primary_key", + "type": "any", + "description": "", + "required": false + }, + "log_analytics_workspace_id": { + "name": "log_analytics_workspace_id", + "type": "string", + "description": "", + "required": false + } + } + }, + "compute.MavenLibrary": { + "name": "MavenLibrary", + "package": "compute", + "description": "maven library configuration.", + "fields": { + "coordinates": { + "name": "coordinates", + "type": "any", + "description": "Gradle-style maven coordinates. For example: \"org.jsoup:jsoup:1.7.2\".", + "required": false + }, + "exclusions": { + "name": "exclusions", + "type": "any", + "description": "List of dependences to exclude. For example: `[\"slf4j:slf4j\", \"*:hadoop-client\"]`.\n\nMaven dependency exclusions:\nhttps://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html.", + "required": false + }, + "repo": { + "name": "repo", + "type": "any", + "description": "Maven repo to install the Maven package from. If omitted, both Maven Central Repository\nand Spark Packages are searched.", + "required": false + } + } + }, + "compute.PythonPyPiLibrary": { + "name": "PythonPyPiLibrary", + "package": "compute", + "description": "python py pi library configuration.", + "fields": { + "package": { + "name": "package", + "type": "any", + "description": "The name of the pypi package to install. An optional exact version specification is also\nsupported. Examples: \"simplejson\" and \"simplejson==3.8.0\".", + "required": false + }, + "repo": { + "name": "repo", + "type": "any", + "description": "The repository where the package can be found. If not specified, the default pip index is\nused.", + "required": false + } + } + }, + "compute.RCranLibrary": { + "name": "RCranLibrary", + "package": "compute", + "description": "r cran library configuration.", + "fields": { + "package": { + "name": "package", + "type": "any", + "description": "The name of the CRAN package to install.", + "required": false + }, + "repo": { + "name": "repo", + "type": "any", + "description": "The repository where the package can be found. If not specified, the default CRAN repo is used.", + "required": false + } + } + }, + "compute.RuntimeEngine": { + "name": "RuntimeEngine", + "package": "compute", + "description": "runtime engine configuration.", + "fields": {} + }, + "compute.S3StorageInfo": { + "name": "S3StorageInfo", + "package": "compute", + "description": "A storage location in Amazon S3", + "fields": { + "canned_acl": { + "name": "canned_acl", + "type": "any", + "description": "(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs.", + "required": false + }, + "destination": { + "name": "destination", + "type": "any", + "description": "S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs.", + "required": false + }, + "enable_encryption": { + "name": "enable_encryption", + "type": "bool", + "description": "(Optional) Flag to enable server side encryption, `false` by default.", + "required": false + }, + "encryption_type": { + "name": "encryption_type", + "type": "any", + "description": "(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`.", + "required": false + }, + "endpoint": { + "name": "endpoint", + "type": "any", + "description": "S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used.", + "required": false + }, + "kms_key": { + "name": "kms_key", + "type": "any", + "description": "(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`.", + "required": false + }, + "region": { + "name": "region", + "type": "any", + "description": "S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used.", + "required": false + } + } + }, + "compute.VolumesStorageInfo": { + "name": "VolumesStorageInfo", + "package": "compute", + "description": "A storage location back by UC Volumes.", + "fields": { + "destination": { + "name": "destination", + "type": "any", + "description": "UC Volumes destination, e.g. `/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh`\nor `dbfs:/Volumes/catalog/schema/vol1/init-scripts/setup-datadog.sh`", + "required": false + } + } + }, + "compute.WorkloadType": { + "name": "WorkloadType", + "package": "compute", + "description": "Cluster Attributes showing for clusters workload types.", + "fields": { + "clients": { + "name": "clients", + "type": "any", + "description": "defined what type of clients can use the cluster. E.g. Notebooks, Jobs", + "required": false + } + } + }, + "compute.WorkspaceStorageInfo": { + "name": "WorkspaceStorageInfo", + "package": "compute", + "description": "A storage location in Workspace Filesystem (WSFS)", + "fields": { + "destination": { + "name": "destination", + "type": "any", + "description": "wsfs destination, e.g. `workspace:/cluster-init-scripts/setup-datadog.sh`", + "required": false + } + } + }, + "dashboards.LifecycleState": { + "name": "LifecycleState", + "package": "dashboards", + "description": "lifecycle state configuration.", + "fields": {} + }, + "database.CustomTag": { + "name": "CustomTag", + "package": "database", + "description": "custom tag configuration.", + "fields": { + "key": { + "name": "key", + "type": "any", + "description": "The key of the custom tag.", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "The value of the custom tag.", + "required": false + } + } + }, + "database.DatabaseInstanceRef": { + "name": "DatabaseInstanceRef", + "package": "database", + "description": "DatabaseInstanceRef is a reference to a database instance. It is used in the\nDatabaseInstance object to refer to the parent instance of an instance and\nto refer the child instances of an instance.\nTo specify as a parent instance during creation of an instance,\nthe lsn and branch_time fields are optional. If not specified, the child\ninstance will be created from the latest lsn of the parent.\nIf both lsn and branch_time are specified, the lsn will be used to create\nthe child instance.", + "fields": { + "branch_time": { + "name": "branch_time", + "type": "string (timestamp)", + "description": "Branch time of the ref database instance.\nFor a parent ref instance, this is the point in time on the parent instance from which the\ninstance was created.\nFor a child ref instance, this is the point in time on the instance from which the child\ninstance was created.\nInput: For specifying the point in time to create a child instance. Optional.\nOutput: Only populated if provided as input to create a child instance.", + "required": false + }, + "effective_lsn": { + "name": "effective_lsn", + "type": "any", + "description": "For a parent ref instance, this is the LSN on the parent instance from which the\ninstance was created.\nFor a child ref instance, this is the LSN on the instance from which the child instance\nwas created.", + "required": false, + "output_only": true + }, + "lsn": { + "name": "lsn", + "type": "any", + "description": "User-specified WAL LSN of the ref database instance.\n\nInput: For specifying the WAL LSN to create a child instance. Optional.\nOutput: Only populated if provided as input to create a child instance.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "Name of the ref database instance.", + "required": false + }, + "uid": { + "name": "uid", + "type": "any", + "description": "Id of the ref database instance.", + "required": false, + "output_only": true + } + } + }, + "database.DatabaseInstanceState": { + "name": "DatabaseInstanceState", + "package": "database", + "description": "database instance state configuration.", + "fields": {} + }, + "database.DeltaTableSyncInfo": { + "name": "DeltaTableSyncInfo", + "package": "database", + "description": "delta table sync info configuration.", + "fields": { + "delta_commit_timestamp": { + "name": "delta_commit_timestamp", + "type": "any", + "description": "The timestamp when the above Delta version was committed in the source Delta table.\nNote: This is the Delta commit time, not the time the data was written to the synced table.", + "required": false, + "output_only": true + }, + "delta_commit_version": { + "name": "delta_commit_version", + "type": "any", + "description": "The Delta Lake commit version that was last successfully synced.", + "required": false, + "output_only": true + } + } + }, + "database.NewPipelineSpec": { + "name": "NewPipelineSpec", + "package": "database", + "description": "Custom fields that user can set for pipeline while creating SyncedDatabaseTable.\nNote that other fields of pipeline are still inferred by table def internally", + "fields": { + "budget_policy_id": { + "name": "budget_policy_id", + "type": "string", + "description": "Budget policy to set on the newly created pipeline.", + "required": false + }, + "storage_catalog": { + "name": "storage_catalog", + "type": "any", + "description": "This field needs to be specified if the destination catalog is a managed postgres catalog.\n\nUC catalog for the pipeline to store intermediate files (checkpoints, event logs etc).\nThis needs to be a standard catalog where the user has permissions to create Delta tables.", + "required": false + }, + "storage_schema": { + "name": "storage_schema", + "type": "any", + "description": "This field needs to be specified if the destination catalog is a managed postgres catalog.\n\nUC schema for the pipeline to store intermediate files (checkpoints, event logs etc).\nThis needs to be in the standard catalog where the user has permissions to create Delta tables.", + "required": false + } + } + }, + "database.ProvisioningInfoState": { + "name": "ProvisioningInfoState", + "package": "database", + "description": "provisioning info state configuration.", + "fields": {} + }, + "database.ProvisioningPhase": { + "name": "ProvisioningPhase", + "package": "database", + "description": "provisioning phase configuration.", + "fields": {} + }, + "database.SyncedTableContinuousUpdateStatus": { + "name": "SyncedTableContinuousUpdateStatus", + "package": "database", + "description": "Detailed status of a synced table. Shown if the synced table is in the SYNCED_CONTINUOUS_UPDATE\nor the SYNCED_UPDATING_PIPELINE_RESOURCES state.", + "fields": { + "initial_pipeline_sync_progress": { + "name": "initial_pipeline_sync_progress", + "type": "any", + "description": "Progress of the initial data synchronization.", + "required": false, + "output_only": true + }, + "last_processed_commit_version": { + "name": "last_processed_commit_version", + "type": "any", + "description": "The last source table Delta version that was successfully synced to the synced table.", + "required": false, + "output_only": true + }, + "timestamp": { + "name": "timestamp", + "type": "any", + "description": "The end timestamp of the last time any data was synchronized from the source table to the synced\ntable. This is when the data is available in the synced table.", + "required": false, + "output_only": true + } + } + }, + "database.SyncedTableFailedStatus": { + "name": "SyncedTableFailedStatus", + "package": "database", + "description": "Detailed status of a synced table. Shown if the synced table is in the OFFLINE_FAILED or the\nSYNCED_PIPELINE_FAILED state.", + "fields": { + "last_processed_commit_version": { + "name": "last_processed_commit_version", + "type": "any", + "description": "The last source table Delta version that was successfully synced to the synced table.\nThe last source table Delta version that was synced to the synced table.\nOnly populated if the table is still\nsynced and available for serving.", + "required": false, + "output_only": true + }, + "timestamp": { + "name": "timestamp", + "type": "any", + "description": "The end timestamp of the last time any data was synchronized from the source table to the synced\ntable. Only populated if the table is still synced and available for serving.", + "required": false, + "output_only": true + } + } + }, + "database.SyncedTablePipelineProgress": { + "name": "SyncedTablePipelineProgress", + "package": "database", + "description": "Progress information of the Synced Table data synchronization pipeline.", + "fields": { + "estimated_completion_time_seconds": { + "name": "estimated_completion_time_seconds", + "type": "int", + "description": "The estimated time remaining to complete this update in seconds.", + "required": false, + "output_only": true + }, + "latest_version_currently_processing": { + "name": "latest_version_currently_processing", + "type": "any", + "description": "The source table Delta version that was last processed by the pipeline. The pipeline may not\nhave completely processed this version yet.", + "required": false, + "output_only": true + }, + "provisioning_phase": { + "name": "provisioning_phase", + "type": "any", + "description": "The current phase of the data synchronization pipeline.", + "required": false, + "output_only": true + }, + "sync_progress_completion": { + "name": "sync_progress_completion", + "type": "any", + "description": "The completion ratio of this update. This is a number between 0 and 1.", + "required": false, + "output_only": true + }, + "synced_row_count": { + "name": "synced_row_count", + "type": "int", + "description": "The number of rows that have been synced in this update.", + "required": false, + "output_only": true + }, + "total_row_count": { + "name": "total_row_count", + "type": "int", + "description": "The total number of rows that need to be synced in this update. This number may be an estimate.", + "required": false, + "output_only": true + } + } + }, + "database.SyncedTablePosition": { + "name": "SyncedTablePosition", + "package": "database", + "description": "synced table position configuration.", + "fields": { + "delta_table_sync_info": { + "name": "delta_table_sync_info", + "type": "any", + "description": "", + "required": false, + "output_only": true + }, + "sync_end_timestamp": { + "name": "sync_end_timestamp", + "type": "any", + "description": "The end timestamp of the most recent successful synchronization.\nThis is the time when the data is available in the synced table.", + "required": false, + "output_only": true + }, + "sync_start_timestamp": { + "name": "sync_start_timestamp", + "type": "any", + "description": "The starting timestamp of the most recent successful synchronization from the source table\nto the destination (synced) table.\nNote this is the starting timestamp of the sync operation, not the end time.\nE.g., for a batch, this is the time when the sync operation started.", + "required": false, + "output_only": true + } + } + }, + "database.SyncedTableProvisioningStatus": { + "name": "SyncedTableProvisioningStatus", + "package": "database", + "description": "Detailed status of a synced table. Shown if the synced table is in the\nPROVISIONING_PIPELINE_RESOURCES or the PROVISIONING_INITIAL_SNAPSHOT state.", + "fields": { + "initial_pipeline_sync_progress": { + "name": "initial_pipeline_sync_progress", + "type": "any", + "description": "Details about initial data synchronization. Only populated when in the\nPROVISIONING_INITIAL_SNAPSHOT state.", + "required": false, + "output_only": true + } + } + }, + "database.SyncedTableSchedulingPolicy": { + "name": "SyncedTableSchedulingPolicy", + "package": "database", + "description": "synced table scheduling policy configuration.", + "fields": {} + }, + "database.SyncedTableSpec": { + "name": "SyncedTableSpec", + "package": "database", + "description": "Specification of a synced database table.", + "fields": { + "create_database_objects_if_missing": { + "name": "create_database_objects_if_missing", + "type": "any", + "description": "If true, the synced table's logical database and schema resources in PG\nwill be created if they do not already exist.", + "required": false + }, + "existing_pipeline_id": { + "name": "existing_pipeline_id", + "type": "string", + "description": "At most one of existing_pipeline_id and new_pipeline_spec should be defined.\n\nIf existing_pipeline_id is defined, the synced table will be bin packed into the existing pipeline\nreferenced. This avoids creating a new pipeline and allows sharing existing compute.\nIn this case, the scheduling_policy of this synced table must match the scheduling policy of the existing pipeline.", + "required": false + }, + "new_pipeline_spec": { + "name": "new_pipeline_spec", + "type": "any", + "description": "At most one of existing_pipeline_id and new_pipeline_spec should be defined.\n\nIf new_pipeline_spec is defined, a new pipeline is created for this synced table. The location pointed to is used\nto store intermediate files (checkpoints, event logs etc). The caller must have write permissions to create Delta\ntables in the specified catalog and schema. Again, note this requires write permissions, whereas the source table\nonly requires read permissions.", + "required": false + }, + "primary_key_columns": { + "name": "primary_key_columns", + "type": "any", + "description": "Primary Key columns to be used for data insert/update in the destination.", + "required": false + }, + "scheduling_policy": { + "name": "scheduling_policy", + "type": "any", + "description": "Scheduling policy of the underlying pipeline.", + "required": false + }, + "source_table_full_name": { + "name": "source_table_full_name", + "type": "string", + "description": "Three-part (catalog, schema, table) name of the source Delta table.", + "required": false + }, + "timeseries_key": { + "name": "timeseries_key", + "type": "any", + "description": "Time series key to deduplicate (tie-break) rows with the same primary key.", + "required": false + } + } + }, + "database.SyncedTableState": { + "name": "SyncedTableState", + "package": "database", + "description": "The state of a synced table.", + "fields": {} + }, + "database.SyncedTableStatus": { + "name": "SyncedTableStatus", + "package": "database", + "description": "Status of a synced table.", + "fields": { + "continuous_update_status": { + "name": "continuous_update_status", + "type": "any", + "description": "Detailed status of a synced table. Shown if the synced table is in the SYNCED_CONTINUOUS_UPDATE\nor the SYNCED_UPDATING_PIPELINE_RESOURCES state.", + "required": false + }, + "detailed_state": { + "name": "detailed_state", + "type": "any", + "description": "The state of the synced table.", + "required": false, + "output_only": true + }, + "failed_status": { + "name": "failed_status", + "type": "any", + "description": "Detailed status of a synced table. Shown if the synced table is in the OFFLINE_FAILED or the\nSYNCED_PIPELINE_FAILED state.", + "required": false + }, + "last_sync": { + "name": "last_sync", + "type": "any", + "description": "Summary of the last successful synchronization from source to destination.\n\nWill always be present if there has been a successful sync. Even if the most recent syncs have failed.\n\nLimitation:\nThe only exception is if the synced table is doing a FULL REFRESH, then the last sync information\nwill not be available until the full refresh is complete. This limitation will be addressed in a future version.\n\nThis top-level field is a convenience for consumers who want easy access to last sync information\nwithout having to traverse detailed_status.", + "required": false, + "output_only": true + }, + "message": { + "name": "message", + "type": "any", + "description": "A text description of the current state of the synced table.", + "required": false, + "output_only": true + }, + "pipeline_id": { + "name": "pipeline_id", + "type": "string", + "description": "ID of the associated pipeline. The pipeline ID may have been provided by the client\n(in the case of bin packing), or generated by the server (when creating a new pipeline).", + "required": false, + "output_only": true + }, + "provisioning_status": { + "name": "provisioning_status", + "type": "any", + "description": "Detailed status of a synced table. Shown if the synced table is in the\nPROVISIONING_PIPELINE_RESOURCES or the PROVISIONING_INITIAL_SNAPSHOT state.", + "required": false + }, + "triggered_update_status": { + "name": "triggered_update_status", + "type": "any", + "description": "Detailed status of a synced table. Shown if the synced table is in the SYNCED_TRIGGERED_UPDATE\nor the SYNCED_NO_PENDING_UPDATE state.", + "required": false + } + } + }, + "database.SyncedTableTriggeredUpdateStatus": { + "name": "SyncedTableTriggeredUpdateStatus", + "package": "database", + "description": "Detailed status of a synced table. Shown if the synced table is in the SYNCED_TRIGGERED_UPDATE\nor the SYNCED_NO_PENDING_UPDATE state.", + "fields": { + "last_processed_commit_version": { + "name": "last_processed_commit_version", + "type": "any", + "description": "The last source table Delta version that was successfully synced to the synced table.", + "required": false, + "output_only": true + }, + "timestamp": { + "name": "timestamp", + "type": "any", + "description": "The end timestamp of the last time any data was synchronized from the source table to the synced\ntable. This is when the data is available in the synced table.", + "required": false, + "output_only": true + }, + "triggered_update_progress": { + "name": "triggered_update_progress", + "type": "any", + "description": "Progress of the active data synchronization pipeline.", + "required": false, + "output_only": true + } + } + }, + "jobs.AuthenticationMethod": { + "name": "AuthenticationMethod", + "package": "jobs", + "description": "authentication method configuration.", + "fields": {} + }, + "jobs.CleanRoomsNotebookTask": { + "name": "CleanRoomsNotebookTask", + "package": "jobs", + "description": "Clean Rooms notebook task for V1 Clean Room service (GA).\nReplaces the deprecated CleanRoomNotebookTask (defined above) which was for V0 service.", + "fields": { + "clean_room_name": { + "name": "clean_room_name", + "type": "string", + "description": "The clean room that the notebook belongs to.", + "required": false + }, + "etag": { + "name": "etag", + "type": "any", + "description": "Checksum to validate the freshness of the notebook resource (i.e. the notebook being run is the latest version).\nIt can be fetched by calling the :method:cleanroomassets/get API.", + "required": false + }, + "notebook_base_parameters": { + "name": "notebook_base_parameters", + "type": "any", + "description": "Base parameters to be used for the clean room notebook job.", + "required": false + }, + "notebook_name": { + "name": "notebook_name", + "type": "string", + "description": "Name of the notebook being run.", + "required": false + } + } + }, + "jobs.ComputeConfig": { + "name": "ComputeConfig", + "package": "jobs", + "description": "compute config configuration.", + "fields": { + "gpu_node_pool_id": { + "name": "gpu_node_pool_id", + "type": "string", + "description": "IDof the GPU pool to use.", + "required": false + }, + "gpu_type": { + "name": "gpu_type", + "type": "any", + "description": "GPU type.", + "required": false + }, + "num_gpus": { + "name": "num_gpus", + "type": "any", + "description": "Number of GPUs.", + "required": false + } + } + }, + "jobs.Condition": { + "name": "Condition", + "package": "jobs", + "description": "condition configuration.", + "fields": {} + }, + "jobs.ConditionTask": { + "name": "ConditionTask", + "package": "jobs", + "description": "condition task configuration.", + "fields": { + "left": { + "name": "left", + "type": "any", + "description": "The left operand of the condition task. Can be either a string value or a job state or parameter reference.", + "required": false + }, + "op": { + "name": "op", + "type": "any", + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", + "required": false + }, + "right": { + "name": "right", + "type": "any", + "description": "The right operand of the condition task. Can be either a string value or a job state or parameter reference.", + "required": false + } + } + }, + "jobs.ConditionTaskOp": { + "name": "ConditionTaskOp", + "package": "jobs", + "description": "* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” \u003e= “12”` will evaluate to `true`, `“10.0” \u003e= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.", + "fields": {} + }, + "jobs.Continuous": { + "name": "Continuous", + "package": "jobs", + "description": "continuous configuration.", + "fields": { + "pause_status": { + "name": "pause_status", + "type": "any", + "description": "Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED.", + "required": false + }, + "task_retry_mode": { + "name": "task_retry_mode", + "type": "any", + "description": "Indicate whether the continuous job is applying task level retries or not. Defaults to NEVER.", + "required": false + } + } + }, + "jobs.CronSchedule": { + "name": "CronSchedule", + "package": "jobs", + "description": "cron schedule configuration.", + "fields": { + "pause_status": { + "name": "pause_status", + "type": "any", + "description": "Indicate whether this schedule is paused or not.", + "required": false + }, + "quartz_cron_expression": { + "name": "quartz_cron_expression", + "type": "any", + "description": "A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required.", + "required": false + }, + "timezone_id": { + "name": "timezone_id", + "type": "string", + "description": "A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required.", + "required": false + } + } + }, + "jobs.DashboardTask": { + "name": "DashboardTask", + "package": "jobs", + "description": "Configures the Lakeview Dashboard job task type.", + "fields": { + "dashboard_id": { + "name": "dashboard_id", + "type": "string", + "description": "The identifier of the dashboard to refresh.", + "required": false + }, + "subscription": { + "name": "subscription", + "type": "any", + "description": "Optional: subscription configuration for sending the dashboard snapshot.", + "required": false + }, + "warehouse_id": { + "name": "warehouse_id", + "type": "string", + "description": "Optional: The warehouse id to execute the dashboard with for the schedule.\nIf not specified, the default warehouse of the dashboard will be used.", + "required": false + } + } + }, + "jobs.DbtCloudTask": { + "name": "DbtCloudTask", + "package": "jobs", + "description": "Deprecated in favor of DbtPlatformTask", + "fields": { + "connection_resource_name": { + "name": "connection_resource_name", + "type": "string", + "description": "The resource name of the UC connection that authenticates the dbt Cloud for this task", + "required": false + }, + "dbt_cloud_job_id": { + "name": "dbt_cloud_job_id", + "type": "string", + "description": "Id of the dbt Cloud job to be triggered", + "required": false + } + } + }, + "jobs.DbtPlatformTask": { + "name": "DbtPlatformTask", + "package": "jobs", + "description": "dbt platform task configuration.", + "fields": { + "connection_resource_name": { + "name": "connection_resource_name", + "type": "string", + "description": "The resource name of the UC connection that authenticates the dbt platform for this task", + "required": false + }, + "dbt_platform_job_id": { + "name": "dbt_platform_job_id", + "type": "string", + "description": "Id of the dbt platform job to be triggered. Specified as a string for maximum compatibility with clients.", + "required": false + } + } + }, + "jobs.DbtTask": { + "name": "DbtTask", + "package": "jobs", + "description": "dbt task configuration.", + "fields": { + "catalog": { + "name": "catalog", + "type": "any", + "description": "Optional name of the catalog to use. The value is the top level in the 3-level namespace of Unity Catalog (catalog / schema / relation). The catalog value can only be specified if a warehouse_id is specified. Requires dbt-databricks \u003e= 1.1.1.", + "required": false + }, + "commands": { + "name": "commands", + "type": "any", + "description": "A list of dbt commands to execute. All commands must start with `dbt`. This parameter must not be empty. A maximum of up to 10 commands can be provided.", + "required": false + }, + "profiles_directory": { + "name": "profiles_directory", + "type": "any", + "description": "Optional (relative) path to the profiles directory. Can only be specified if no warehouse_id is specified. If no warehouse_id is specified and this folder is unset, the root directory is used.", + "required": false + }, + "project_directory": { + "name": "project_directory", + "type": "any", + "description": "Path to the project directory. Optional for Git sourced tasks, in which\ncase if no value is provided, the root of the Git repository is used.", + "required": false + }, + "schema": { + "name": "schema", + "type": "any", + "description": "Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used.", + "required": false + }, + "source": { + "name": "source", + "type": "any", + "description": "Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Project is located in Databricks workspace.\n* `GIT`: Project is located in cloud Git provider.", + "required": false + }, + "warehouse_id": { + "name": "warehouse_id", + "type": "string", + "description": "ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument.", + "required": false + } + } + }, + "jobs.FileArrivalTriggerConfiguration": { + "name": "FileArrivalTriggerConfiguration", + "package": "jobs", + "description": "file arrival trigger configuration configuration.", + "fields": { + "min_time_between_triggers_seconds": { + "name": "min_time_between_triggers_seconds", + "type": "int", + "description": "If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds", + "required": false + }, + "url": { + "name": "url", + "type": "any", + "description": "URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location.", + "required": false + }, + "wait_after_last_change_seconds": { + "name": "wait_after_last_change_seconds", + "type": "int", + "description": "If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.", + "required": false + } + } + }, + "jobs.ForEachTask": { + "name": "ForEachTask", + "package": "jobs", + "description": "for each task configuration.", + "fields": { + "concurrency": { + "name": "concurrency", + "type": "any", + "description": "An optional maximum allowed number of concurrent runs of the task.\nSet this value if you want to be able to execute multiple runs of the task concurrently.", + "required": false + }, + "inputs": { + "name": "inputs", + "type": "any", + "description": "Array for task to iterate on. This can be a JSON string or a reference to\nan array parameter.", + "required": false + }, + "task": { + "name": "task", + "type": "any", + "description": "Configuration for the task that will be run for each element in the array", + "required": false + } + } + }, + "jobs.Format": { + "name": "Format", + "package": "jobs", + "description": "format configuration.", + "fields": {} + }, + "jobs.GenAiComputeTask": { + "name": "GenAiComputeTask", + "package": "jobs", + "description": "gen ai compute task configuration.", + "fields": { + "command": { + "name": "command", + "type": "any", + "description": "Command launcher to run the actual script, e.g. bash, python etc.", + "required": false + }, + "compute": { + "name": "compute", + "type": "any", + "description": "", + "required": false + }, + "dl_runtime_image": { + "name": "dl_runtime_image", + "type": "any", + "description": "Runtime image", + "required": false + }, + "mlflow_experiment_name": { + "name": "mlflow_experiment_name", + "type": "string", + "description": "Optional string containing the name of the MLflow experiment to log the run to. If name is not\nfound, backend will create the mlflow experiment using the name.", + "required": false + }, + "source": { + "name": "source", + "type": "any", + "description": "Optional location type of the training script. When set to `WORKSPACE`, the script will be retrieved from the local Databricks workspace. When set to `GIT`, the script will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n* `WORKSPACE`: Script is located in Databricks workspace.\n* `GIT`: Script is located in cloud Git provider.", + "required": false + }, + "training_script_path": { + "name": "training_script_path", + "type": "string", + "description": "The training script file path to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required.", + "required": false + }, + "yaml_parameters": { + "name": "yaml_parameters", + "type": "any", + "description": "Optional string containing model parameters passed to the training script in yaml format.\nIf present, then the content in yaml_parameters_file_path will be ignored.", + "required": false + }, + "yaml_parameters_file_path": { + "name": "yaml_parameters_file_path", + "type": "string", + "description": "Optional path to a YAML file containing model parameters passed to the training script.", + "required": false + } + } + }, + "jobs.GitProvider": { + "name": "GitProvider", + "package": "jobs", + "description": "git provider configuration.", + "fields": {} + }, + "jobs.GitSnapshot": { + "name": "GitSnapshot", + "package": "jobs", + "description": "Read-only state of the remote repository at the time the job was run. This field is only included on job runs.", + "fields": { + "used_commit": { + "name": "used_commit", + "type": "any", + "description": "Commit that was used to execute the run. If git_branch was specified, this points to the HEAD of the branch at the time of the run; if git_tag was specified, this points to the commit the tag points to.", + "required": false + } + } + }, + "jobs.GitSource": { + "name": "GitSource", + "package": "jobs", + "description": "An optional specification for a remote Git repository containing the source code used by tasks. Version-controlled source code is supported by notebook, dbt, Python script, and SQL File tasks.\n\nIf `git_source` is set, these tasks retrieve the file from the remote repository by default. However, this behavior can be overridden by setting `source` to `WORKSPACE` on the task.\n\nNote: dbt and SQL File tasks support only version-controlled sources. If dbt or SQL File tasks are used, `git_source` must be defined on the job.", + "fields": { + "git_branch": { + "name": "git_branch", + "type": "any", + "description": "Name of the branch to be checked out and used by this job. This field cannot be specified in conjunction with git_tag or git_commit.", + "required": false + }, + "git_commit": { + "name": "git_commit", + "type": "any", + "description": "Commit to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_tag.", + "required": false + }, + "git_provider": { + "name": "git_provider", + "type": "any", + "description": "Unique identifier of the service used to host the Git repository. The value is case insensitive.", + "required": false + }, + "git_snapshot": { + "name": "git_snapshot", + "type": "any", + "description": "Read-only state of the remote repository at the time the job was run. This field is only included on job runs.", + "required": false + }, + "git_tag": { + "name": "git_tag", + "type": "any", + "description": "Name of the tag to be checked out and used by this job. This field cannot be specified in conjunction with git_branch or git_commit.", + "required": false + }, + "git_url": { + "name": "git_url", + "type": "string", + "description": "URL of the repository to be cloned by this job.", + "required": false + }, + "job_source": { + "name": "job_source", + "type": "any", + "description": "The source of the job specification in the remote repository when the job is source controlled.", + "required": false, + "deprecated": true + } + } + }, + "jobs.JobCluster": { + "name": "JobCluster", + "package": "jobs", + "description": "job cluster configuration.", + "fields": { + "job_cluster_key": { + "name": "job_cluster_key", + "type": "any", + "description": "A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution.", + "required": false + }, + "new_cluster": { + "name": "new_cluster", + "type": "any", + "description": "If new_cluster, a description of a cluster that is created for each task.", + "required": false + } + } + }, + "jobs.JobDeployment": { + "name": "JobDeployment", + "package": "jobs", + "description": "job deployment configuration.", + "fields": { + "kind": { + "name": "kind", + "type": "any", + "description": "The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle.", + "required": false + }, + "metadata_file_path": { + "name": "metadata_file_path", + "type": "string", + "description": "Path of the file that contains deployment metadata.", + "required": false + } + } + }, + "jobs.JobDeploymentKind": { + "name": "JobDeploymentKind", + "package": "jobs", + "description": "* `BUNDLE`: The job is managed by Databricks Asset Bundle.", + "fields": {} + }, + "jobs.JobEditMode": { + "name": "JobEditMode", + "package": "jobs", + "description": "Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.", + "fields": {} + }, + "jobs.JobEmailNotifications": { + "name": "JobEmailNotifications", + "package": "jobs", + "description": "job email notifications configuration.", + "fields": { + "no_alert_for_skipped_runs": { + "name": "no_alert_for_skipped_runs", + "type": "any", + "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped.\nThis field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field.", + "required": false, + "deprecated": true + }, + "on_duration_warning_threshold_exceeded": { + "name": "on_duration_warning_threshold_exceeded", + "type": "any", + "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", + "required": false + }, + "on_failure": { + "name": "on_failure", + "type": "any", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "required": false + }, + "on_start": { + "name": "on_start", + "type": "any", + "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "required": false + }, + "on_streaming_backlog_exceeded": { + "name": "on_streaming_backlog_exceeded", + "type": "any", + "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", + "required": false + }, + "on_success": { + "name": "on_success", + "type": "any", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "required": false + } + } + }, + "jobs.JobEnvironment": { + "name": "JobEnvironment", + "package": "jobs", + "description": "job environment configuration.", + "fields": { + "environment_key": { + "name": "environment_key", + "type": "any", + "description": "The key of an environment. It has to be unique within a job.", + "required": false + }, + "spec": { + "name": "spec", + "type": "any", + "description": "The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines.\nIn this minimal environment spec, only pip dependencies are supported.", + "required": false + } + } + }, + "jobs.JobNotificationSettings": { + "name": "JobNotificationSettings", + "package": "jobs", + "description": "Configuration settings for job notification.", + "fields": { + "no_alert_for_canceled_runs": { + "name": "no_alert_for_canceled_runs", + "type": "any", + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.", + "required": false + }, + "no_alert_for_skipped_runs": { + "name": "no_alert_for_skipped_runs", + "type": "any", + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.", + "required": false + } + } + }, + "jobs.JobParameterDefinition": { + "name": "JobParameterDefinition", + "package": "jobs", + "description": "job parameter definition configuration.", + "fields": { + "default": { + "name": "default", + "type": "any", + "description": "Default value of the parameter.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of the defined parameter. May only contain alphanumeric characters, `_`, `-`, and `.`", + "required": false + } + } + }, + "jobs.JobRunAs": { + "name": "JobRunAs", + "package": "jobs", + "description": "Write-only setting. Specifies the user or service principal that the job runs as. If not specified, the job runs as the user who created the job.\n\nEither `user_name` or `service_principal_name` should be specified. If not, an error is thrown.", + "fields": { + "group_name": { + "name": "group_name", + "type": "string", + "description": "Group name of an account group assigned to the workspace. Setting this field requires being a member of the group.", + "required": false + }, + "service_principal_name": { + "name": "service_principal_name", + "type": "string", + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", + "required": false + }, + "user_name": { + "name": "user_name", + "type": "string", + "description": "The email of an active workspace user. Non-admin users can only set this field to their own email.", + "required": false + } + } + }, + "jobs.JobSource": { + "name": "JobSource", + "package": "jobs", + "description": "The source of the job specification in the remote repository when the job is source controlled.", + "fields": { + "dirty_state": { + "name": "dirty_state", + "type": "any", + "description": "Dirty state indicates the job is not fully synced with the job specification in the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", + "required": false + }, + "import_from_git_branch": { + "name": "import_from_git_branch", + "type": "any", + "description": "Name of the branch which the job is imported from.", + "required": false + }, + "job_config_path": { + "name": "job_config_path", + "type": "string", + "description": "Path of the job YAML file that contains the job specification.", + "required": false + } + } + }, + "jobs.JobSourceDirtyState": { + "name": "JobSourceDirtyState", + "package": "jobs", + "description": "Dirty state indicates the job is not fully synced with the job specification\nin the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.", + "fields": {} + }, + "jobs.JobsHealthMetric": { + "name": "JobsHealthMetric", + "package": "jobs", + "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.", + "fields": {} + }, + "jobs.JobsHealthOperator": { + "name": "JobsHealthOperator", + "package": "jobs", + "description": "Specifies the operator used to compare the health metric value with the specified threshold.", + "fields": {} + }, + "jobs.JobsHealthRule": { + "name": "JobsHealthRule", + "package": "jobs", + "description": "jobs health rule configuration.", + "fields": { + "metric": { + "name": "metric", + "type": "any", + "description": "Specifies the health metric that is being evaluated for a particular health rule.\n\n* `RUN_DURATION_SECONDS`: Expected total time for a run in seconds.\n* `STREAMING_BACKLOG_BYTES`: An estimate of the maximum bytes of data waiting to be consumed across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_RECORDS`: An estimate of the maximum offset lag across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_SECONDS`: An estimate of the maximum consumer delay across all streams. This metric is in Public Preview.\n* `STREAMING_BACKLOG_FILES`: An estimate of the maximum number of outstanding files across all streams. This metric is in Public Preview.", + "required": false + }, + "op": { + "name": "op", + "type": "any", + "description": "Specifies the operator used to compare the health metric value with the specified threshold.", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "Specifies the threshold value that the health metric should obey to satisfy the health rule.", + "required": false + } + } + }, + "jobs.JobsHealthRules": { + "name": "JobsHealthRules", + "package": "jobs", + "description": "An optional set of health rules that can be defined for this job.", + "fields": { + "rules": { + "name": "rules", + "type": "any", + "description": "", + "required": false + } + } + }, + "jobs.ModelTriggerConfiguration": { + "name": "ModelTriggerConfiguration", + "package": "jobs", + "description": "model trigger configuration configuration.", + "fields": { + "aliases": { + "name": "aliases", + "type": "any", + "description": "Aliases of the model versions to monitor. Can only be used in conjunction with condition MODEL_ALIAS_SET.", + "required": false + }, + "condition": { + "name": "condition", + "type": "any", + "description": "The condition based on which to trigger a job run.", + "required": false + }, + "min_time_between_triggers_seconds": { + "name": "min_time_between_triggers_seconds", + "type": "int", + "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds.", + "required": false + }, + "securable_name": { + "name": "securable_name", + "type": "string", + "description": "Name of the securable to monitor (\"mycatalog.myschema.mymodel\" in the case of model-level triggers,\n\"mycatalog.myschema\" in the case of schema-level triggers) or empty in the case of metastore-level triggers.", + "required": false + }, + "wait_after_last_change_seconds": { + "name": "wait_after_last_change_seconds", + "type": "int", + "description": "If set, the trigger starts a run only after no model updates have occurred for the specified time\nand can be used to wait for a series of model updates before triggering a run. The\nminimum allowed value is 60 seconds.", + "required": false + } + } + }, + "jobs.ModelTriggerConfigurationCondition": { + "name": "ModelTriggerConfigurationCondition", + "package": "jobs", + "description": "model trigger configuration condition configuration.", + "fields": {} + }, + "jobs.NotebookTask": { + "name": "NotebookTask", + "package": "jobs", + "description": "notebook task configuration.", + "fields": { + "base_parameters": { + "name": "base_parameters", + "type": "any", + "description": "Base parameters to be used for each run of this job. If the run is initiated by a call to :method:jobs/run\nNow with parameters specified, the two parameters maps are merged. If the same key is specified in\n`base_parameters` and in `run-now`, the value from `run-now` is used.\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nIf the notebook takes a parameter that is not specified in the job’s `base_parameters` or the `run-now` override parameters,\nthe default value from the notebook is used.\n\nRetrieve these parameters in a notebook using [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html#dbutils-widgets).\n\nThe JSON representation of this field cannot exceed 1MB.", + "required": false + }, + "notebook_path": { + "name": "notebook_path", + "type": "string", + "description": "The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.", + "required": false + }, + "source": { + "name": "source", + "type": "any", + "description": "Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n* `WORKSPACE`: Notebook is located in Databricks workspace.\n* `GIT`: Notebook is located in cloud Git provider.", + "required": false + }, + "warehouse_id": { + "name": "warehouse_id", + "type": "string", + "description": "Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.\n\nNote that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail.", + "required": false + } + } + }, + "jobs.PauseStatus": { + "name": "PauseStatus", + "package": "jobs", + "description": "pause status configuration.", + "fields": {} + }, + "jobs.PerformanceTarget": { + "name": "PerformanceTarget", + "package": "jobs", + "description": "PerformanceTarget defines how performant (lower latency) or cost efficient the execution of run on serverless compute should be.\nThe performance mode on the job or pipeline should map to a performance setting that is passed to Cluster Manager\n(see cluster-common PerformanceTarget).", + "fields": {} + }, + "jobs.PeriodicTriggerConfiguration": { + "name": "PeriodicTriggerConfiguration", + "package": "jobs", + "description": "periodic trigger configuration configuration.", + "fields": { + "interval": { + "name": "interval", + "type": "any", + "description": "The interval at which the trigger should run.", + "required": false + }, + "unit": { + "name": "unit", + "type": "any", + "description": "The unit of time for the interval.", + "required": false + } + } + }, + "jobs.PeriodicTriggerConfigurationTimeUnit": { + "name": "PeriodicTriggerConfigurationTimeUnit", + "package": "jobs", + "description": "periodic trigger configuration time unit configuration.", + "fields": {} + }, + "jobs.PipelineParams": { + "name": "PipelineParams", + "package": "jobs", + "description": "pipeline params configuration.", + "fields": { + "full_refresh": { + "name": "full_refresh", + "type": "any", + "description": "If true, triggers a full refresh on the delta live table.", + "required": false + } + } + }, + "jobs.PipelineTask": { + "name": "PipelineTask", + "package": "jobs", + "description": "pipeline task configuration.", + "fields": { + "full_refresh": { + "name": "full_refresh", + "type": "any", + "description": "If true, triggers a full refresh on the delta live table.", + "required": false + }, + "pipeline_id": { + "name": "pipeline_id", + "type": "string", + "description": "The full name of the pipeline task to execute.", + "required": false + } + } + }, + "jobs.PowerBiModel": { + "name": "PowerBiModel", + "package": "jobs", + "description": "power bi model configuration.", + "fields": { + "authentication_method": { + "name": "authentication_method", + "type": "any", + "description": "How the published Power BI model authenticates to Databricks", + "required": false + }, + "model_name": { + "name": "model_name", + "type": "string", + "description": "The name of the Power BI model", + "required": false + }, + "overwrite_existing": { + "name": "overwrite_existing", + "type": "any", + "description": "Whether to overwrite existing Power BI models", + "required": false + }, + "storage_mode": { + "name": "storage_mode", + "type": "any", + "description": "The default storage mode of the Power BI model", + "required": false + }, + "workspace_name": { + "name": "workspace_name", + "type": "string", + "description": "The name of the Power BI workspace of the model", + "required": false + } + } + }, + "jobs.PowerBiTable": { + "name": "PowerBiTable", + "package": "jobs", + "description": "power bi table configuration.", + "fields": { + "catalog": { + "name": "catalog", + "type": "any", + "description": "The catalog name in Databricks", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The table name in Databricks", + "required": false + }, + "schema": { + "name": "schema", + "type": "any", + "description": "The schema name in Databricks", + "required": false + }, + "storage_mode": { + "name": "storage_mode", + "type": "any", + "description": "The Power BI storage mode of the table", + "required": false + } + } + }, + "jobs.PowerBiTask": { + "name": "PowerBiTask", + "package": "jobs", + "description": "power bi task configuration.", + "fields": { + "connection_resource_name": { + "name": "connection_resource_name", + "type": "string", + "description": "The resource name of the UC connection to authenticate from Databricks to Power BI", + "required": false + }, + "power_bi_model": { + "name": "power_bi_model", + "type": "any", + "description": "The semantic model to update", + "required": false + }, + "refresh_after_update": { + "name": "refresh_after_update", + "type": "any", + "description": "Whether the model should be refreshed after the update", + "required": false + }, + "tables": { + "name": "tables", + "type": "any", + "description": "The tables to be exported to Power BI", + "required": false + }, + "warehouse_id": { + "name": "warehouse_id", + "type": "string", + "description": "The SQL warehouse ID to use as the Power BI data source", + "required": false + } + } + }, + "jobs.PythonWheelTask": { + "name": "PythonWheelTask", + "package": "jobs", + "description": "python wheel task configuration.", + "fields": { + "entry_point": { + "name": "entry_point", + "type": "any", + "description": "Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`", + "required": false + }, + "named_parameters": { + "name": "named_parameters", + "type": "any", + "description": "Command-line parameters passed to Python wheel task in the form of `[\"--name=task\", \"--data=dbfs:/path/to/data.json\"]`. Leave it empty if `parameters` is not null.", + "required": false + }, + "package_name": { + "name": "package_name", + "type": "string", + "description": "Name of the package to execute", + "required": false + }, + "parameters": { + "name": "parameters", + "type": "any", + "description": "Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.", + "required": false + } + } + }, + "jobs.QueueSettings": { + "name": "QueueSettings", + "package": "jobs", + "description": "Configuration settings for queue.", + "fields": { + "enabled": { + "name": "enabled", + "type": "bool", + "description": "If true, enable queueing for the job. This is a required field.", + "required": false + } + } + }, + "jobs.RunIf": { + "name": "RunIf", + "package": "jobs", + "description": "An optional value indicating the condition that determines whether the task should be run once its dependencies have been completed. When omitted, defaults to `ALL_SUCCESS`.\n\nPossible values are:\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", + "fields": {} + }, + "jobs.RunJobTask": { + "name": "RunJobTask", + "package": "jobs", + "description": "run job task configuration.", + "fields": { + "dbt_commands": { + "name": "dbt_commands", + "type": "any", + "description": "An array of commands to execute for jobs with the dbt task, for example `\"dbt_commands\": [\"dbt deps\", \"dbt seed\", \"dbt deps\", \"dbt seed\", \"dbt run\"]`\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.", + "required": false, + "deprecated": true + }, + "jar_params": { + "name": "jar_params", + "type": "any", + "description": "A list of parameters for jobs with Spark JAR tasks, for example `\"jar_params\": [\"john doe\", \"35\"]`.\nThe parameters are used to invoke the main function of the main class specified in the Spark JAR task.\nIf not specified upon `run-now`, it defaults to an empty list.\njar_params cannot be specified in conjunction with notebook_params.\nThe JSON representation of this field (for example `{\"jar_params\":[\"john doe\",\"35\"]}`) cannot exceed 10,000 bytes.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.", + "required": false, + "deprecated": true + }, + "job_id": { + "name": "job_id", + "type": "string", + "description": "ID of the job to trigger.", + "required": false + }, + "job_parameters": { + "name": "job_parameters", + "type": "any", + "description": "Job-level parameters used to trigger the job.", + "required": false + }, + "notebook_params": { + "name": "notebook_params", + "type": "any", + "description": "A map from keys to values for jobs with notebook task, for example `\"notebook_params\": {\"name\": \"john doe\", \"age\": \"35\"}`.\nThe map is passed to the notebook and is accessible through the [dbutils.widgets.get](https://docs.databricks.com/dev-tools/databricks-utils.html) function.\n\nIf not specified upon `run-now`, the triggered run uses the job’s base parameters.\n\nnotebook_params cannot be specified in conjunction with jar_params.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.\n\nThe JSON representation of this field (for example `{\"notebook_params\":{\"name\":\"john doe\",\"age\":\"35\"}}`) cannot exceed 10,000 bytes.", + "required": false, + "deprecated": true + }, + "pipeline_params": { + "name": "pipeline_params", + "type": "any", + "description": "Controls whether the pipeline should perform a full refresh", + "required": false + }, + "python_named_params": { + "name": "python_named_params", + "type": "any", + "description": "", + "required": false, + "deprecated": true + }, + "python_params": { + "name": "python_params", + "type": "any", + "description": "A list of parameters for jobs with Python tasks, for example `\"python_params\": [\"john doe\", \"35\"]`.\nThe parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite\nthe parameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", + "required": false, + "deprecated": true + }, + "spark_submit_params": { + "name": "spark_submit_params", + "type": "any", + "description": "A list of parameters for jobs with spark submit task, for example `\"spark_submit_params\": [\"--class\", \"org.apache.spark.examples.SparkPi\"]`.\nThe parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the\nparameters specified in job setting. The JSON representation of this field (for example `{\"python_params\":[\"john doe\",\"35\"]}`)\ncannot exceed 10,000 bytes.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.", + "required": false, + "deprecated": true + }, + "sql_params": { + "name": "sql_params", + "type": "any", + "description": "A map from keys to values for jobs with SQL task, for example `\"sql_params\": {\"name\": \"john doe\", \"age\": \"35\"}`. The SQL alert task does not support custom parameters.\n\n⚠ **Deprecation note** Use [job parameters](https://docs.databricks.com/jobs/job-parameters.html#job-parameter-pushdown) to pass information down to tasks.", + "required": false, + "deprecated": true + } + } + }, + "jobs.Source": { + "name": "Source", + "package": "jobs", + "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\\\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", + "fields": {} + }, + "jobs.SparkJarTask": { + "name": "SparkJarTask", + "package": "jobs", + "description": "spark jar task configuration.", + "fields": { + "jar_uri": { + "name": "jar_uri", + "type": "any", + "description": "Deprecated since 04/2016. For classic compute, provide a `jar` through the `libraries` field instead. For serverless compute, provide a `jar` though the `java_dependencies` field inside the `environments` list.\n\nSee the examples of classic and serverless compute usage at the top of the page.", + "required": false, + "deprecated": true + }, + "main_class_name": { + "name": "main_class_name", + "type": "string", + "description": "The full name of the class containing the main method to be executed. This class must be contained in a JAR provided as a library.\n\nThe code must use `SparkContext.getOrCreate` to obtain a Spark context; otherwise, runs of the job fail.", + "required": false + }, + "parameters": { + "name": "parameters", + "type": "any", + "description": "Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", + "required": false + }, + "run_as_repl": { + "name": "run_as_repl", + "type": "any", + "description": "Deprecated. A value of `false` is no longer supported.", + "required": false, + "deprecated": true + } + } + }, + "jobs.SparkPythonTask": { + "name": "SparkPythonTask", + "package": "jobs", + "description": "spark python task configuration.", + "fields": { + "parameters": { + "name": "parameters", + "type": "any", + "description": "Command line parameters passed to the Python file.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", + "required": false + }, + "python_file": { + "name": "python_file", + "type": "any", + "description": "The Python file to be executed. Cloud file URIs (such as dbfs:/, s3:/, adls:/, gcs:/) and workspace paths are supported. For python files stored in the Databricks workspace, the path must be absolute and begin with `/`. For files stored in a remote repository, the path must be relative. This field is required.", + "required": false + }, + "source": { + "name": "source", + "type": "any", + "description": "Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local\nDatabricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.", + "required": false + } + } + }, + "jobs.SparkSubmitTask": { + "name": "SparkSubmitTask", + "package": "jobs", + "description": "spark submit task configuration.", + "fields": { + "parameters": { + "name": "parameters", + "type": "any", + "description": "Command-line parameters passed to spark submit.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.", + "required": false + } + } + }, + "jobs.SqlTask": { + "name": "SqlTask", + "package": "jobs", + "description": "sql task configuration.", + "fields": { + "alert": { + "name": "alert", + "type": "any", + "description": "If alert, indicates that this job must refresh a SQL alert.", + "required": false + }, + "dashboard": { + "name": "dashboard", + "type": "any", + "description": "If dashboard, indicates that this job must refresh a SQL dashboard.", + "required": false + }, + "file": { + "name": "file", + "type": "any", + "description": "If file, indicates that this job runs a SQL file in a remote Git repository.", + "required": false + }, + "parameters": { + "name": "parameters", + "type": "any", + "description": "Parameters to be used for each run of this job. The SQL alert task does not support custom parameters.", + "required": false + }, + "query": { + "name": "query", + "type": "any", + "description": "If query, indicates that this job must execute a SQL query.", + "required": false + }, + "warehouse_id": { + "name": "warehouse_id", + "type": "string", + "description": "The canonical identifier of the SQL warehouse. Recommended to use with serverless or pro SQL warehouses. Classic SQL warehouses are only supported for SQL alert, dashboard and query tasks and are limited to scheduled single-task jobs.", + "required": false + } + } + }, + "jobs.SqlTaskAlert": { + "name": "SqlTaskAlert", + "package": "jobs", + "description": "sql task alert configuration.", + "fields": { + "alert_id": { + "name": "alert_id", + "type": "string", + "description": "The canonical identifier of the SQL alert.", + "required": false + }, + "pause_subscriptions": { + "name": "pause_subscriptions", + "type": "any", + "description": "If true, the alert notifications are not sent to subscribers.", + "required": false + }, + "subscriptions": { + "name": "subscriptions", + "type": "any", + "description": "If specified, alert notifications are sent to subscribers.", + "required": false + } + } + }, + "jobs.SqlTaskDashboard": { + "name": "SqlTaskDashboard", + "package": "jobs", + "description": "sql task dashboard configuration.", + "fields": { + "custom_subject": { + "name": "custom_subject", + "type": "any", + "description": "Subject of the email sent to subscribers of this task.", + "required": false + }, + "dashboard_id": { + "name": "dashboard_id", + "type": "string", + "description": "The canonical identifier of the SQL dashboard.", + "required": false + }, + "pause_subscriptions": { + "name": "pause_subscriptions", + "type": "any", + "description": "If true, the dashboard snapshot is not taken, and emails are not sent to subscribers.", + "required": false + }, + "subscriptions": { + "name": "subscriptions", + "type": "any", + "description": "If specified, dashboard snapshots are sent to subscriptions.", + "required": false + } + } + }, + "jobs.SqlTaskFile": { + "name": "SqlTaskFile", + "package": "jobs", + "description": "sql task file configuration.", + "fields": { + "path": { + "name": "path", + "type": "any", + "description": "Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths.", + "required": false + }, + "source": { + "name": "source", + "type": "any", + "description": "Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.", + "required": false + } + } + }, + "jobs.SqlTaskQuery": { + "name": "SqlTaskQuery", + "package": "jobs", + "description": "sql task query configuration.", + "fields": { + "query_id": { + "name": "query_id", + "type": "string", + "description": "The canonical identifier of the SQL query.", + "required": false + } + } + }, + "jobs.SqlTaskSubscription": { + "name": "SqlTaskSubscription", + "package": "jobs", + "description": "sql task subscription configuration.", + "fields": { + "destination_id": { + "name": "destination_id", + "type": "string", + "description": "The canonical identifier of the destination to receive email notification. This parameter is mutually exclusive with user_name. You cannot set both destination_id and user_name for subscription notifications.", + "required": false + }, + "user_name": { + "name": "user_name", + "type": "string", + "description": "The user name to receive the subscription email. This parameter is mutually exclusive with destination_id. You cannot set both destination_id and user_name for subscription notifications.", + "required": false + } + } + }, + "jobs.StorageMode": { + "name": "StorageMode", + "package": "jobs", + "description": "storage mode configuration.", + "fields": {} + }, + "jobs.Subscription": { + "name": "Subscription", + "package": "jobs", + "description": "subscription configuration.", + "fields": { + "custom_subject": { + "name": "custom_subject", + "type": "any", + "description": "Optional: Allows users to specify a custom subject line on the email sent\nto subscribers.", + "required": false + }, + "paused": { + "name": "paused", + "type": "any", + "description": "When true, the subscription will not send emails.", + "required": false + }, + "subscribers": { + "name": "subscribers", + "type": "any", + "description": "The list of subscribers to send the snapshot of the dashboard to.", + "required": false + } + } + }, + "jobs.SubscriptionSubscriber": { + "name": "SubscriptionSubscriber", + "package": "jobs", + "description": "subscription subscriber configuration.", + "fields": { + "destination_id": { + "name": "destination_id", + "type": "string", + "description": "A snapshot of the dashboard will be sent to the destination when the `destination_id` field is present.", + "required": false + }, + "user_name": { + "name": "user_name", + "type": "string", + "description": "A snapshot of the dashboard will be sent to the user's email when the `user_name` field is present.", + "required": false + } + } + }, + "jobs.TableUpdateTriggerConfiguration": { + "name": "TableUpdateTriggerConfiguration", + "package": "jobs", + "description": "table update trigger configuration configuration.", + "fields": { + "condition": { + "name": "condition", + "type": "any", + "description": "The table(s) condition based on which to trigger a job run.", + "required": false + }, + "min_time_between_triggers_seconds": { + "name": "min_time_between_triggers_seconds", + "type": "int", + "description": "If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds.", + "required": false + }, + "table_names": { + "name": "table_names", + "type": "any", + "description": "A list of tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.", + "required": false + }, + "wait_after_last_change_seconds": { + "name": "wait_after_last_change_seconds", + "type": "int", + "description": "If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds.", + "required": false + } + } + }, + "jobs.Task": { + "name": "Task", + "package": "jobs", + "description": "task configuration.", + "fields": { + "clean_rooms_notebook_task": { + "name": "clean_rooms_notebook_task", + "type": "any", + "description": "The task runs a [clean rooms](https://docs.databricks.com/clean-rooms/index.html) notebook\nwhen the `clean_rooms_notebook_task` field is present.", + "required": false + }, + "condition_task": { + "name": "condition_task", + "type": "any", + "description": "The task evaluates a condition that can be used to control the execution of other tasks when the `condition_task` field is present.\nThe condition task does not require a cluster to execute and does not support retries or notifications.", + "required": false + }, + "dashboard_task": { + "name": "dashboard_task", + "type": "any", + "description": "The task refreshes a dashboard and sends a snapshot to subscribers.", + "required": false + }, + "dbt_cloud_task": { + "name": "dbt_cloud_task", + "type": "any", + "description": "Task type for dbt cloud, deprecated in favor of the new name dbt_platform_task", + "required": false, + "deprecated": true + }, + "dbt_platform_task": { + "name": "dbt_platform_task", + "type": "any", + "description": "", + "required": false + }, + "dbt_task": { + "name": "dbt_task", + "type": "any", + "description": "The task runs one or more dbt commands when the `dbt_task` field is present. The dbt task requires both Databricks SQL and the ability to use a serverless or a pro SQL warehouse.", + "required": false + }, + "depends_on": { + "name": "depends_on", + "type": "any", + "description": "An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.\nThe key is `task_key`, and the value is the name assigned to the dependent task.", + "required": false + }, + "description": { + "name": "description", + "type": "string", + "description": "An optional description for this task.", + "required": false + }, + "disable_auto_optimization": { + "name": "disable_auto_optimization", + "type": "any", + "description": "An option to disable auto optimization in serverless", + "required": false + }, + "disabled": { + "name": "disabled", + "type": "any", + "description": "An optional flag to disable the task. If set to true, the task will not run even if it is part of a job.", + "required": false + }, + "email_notifications": { + "name": "email_notifications", + "type": "any", + "description": "An optional set of email addresses that is notified when runs of this task begin or complete as well as when this task is deleted. The default behavior is to not send any emails.", + "required": false + }, + "environment_key": { + "name": "environment_key", + "type": "any", + "description": "The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute.", + "required": false + }, + "existing_cluster_id": { + "name": "existing_cluster_id", + "type": "string", + "description": "If existing_cluster_id, the ID of an existing cluster that is used for all runs.\nWhen running jobs or tasks on an existing cluster, you may need to manually restart\nthe cluster if it stops responding. We suggest running jobs and tasks on new clusters for\ngreater reliability", + "required": false + }, + "for_each_task": { + "name": "for_each_task", + "type": "any", + "description": "The task executes a nested task for every input provided when the `for_each_task` field is present.", + "required": false + }, + "gen_ai_compute_task": { + "name": "gen_ai_compute_task", + "type": "any", + "description": "", + "required": false + }, + "health": { + "name": "health", + "type": "any", + "description": "An optional set of health rules that can be defined for this job.", + "required": false + }, + "job_cluster_key": { + "name": "job_cluster_key", + "type": "any", + "description": "If job_cluster_key, this task is executed reusing the cluster specified in `job.settings.job_clusters`.", + "required": false + }, + "libraries": { + "name": "libraries", + "type": "any", + "description": "An optional list of libraries to be installed on the cluster.\nThe default value is an empty list.", + "required": false + }, + "max_retries": { + "name": "max_retries", + "type": "any", + "description": "An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry.", + "required": false + }, + "min_retry_interval_millis": { + "name": "min_retry_interval_millis", + "type": "any", + "description": "An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried.", + "required": false + }, + "new_cluster": { + "name": "new_cluster", + "type": "any", + "description": "If new_cluster, a description of a new cluster that is created for each run.", + "required": false + }, + "notebook_task": { + "name": "notebook_task", + "type": "any", + "description": "The task runs a notebook when the `notebook_task` field is present.", + "required": false + }, + "notification_settings": { + "name": "notification_settings", + "type": "any", + "description": "Optional notification settings that are used when sending notifications to each of the `email_notifications` and `webhook_notifications` for this task.", + "required": false + }, + "pipeline_task": { + "name": "pipeline_task", + "type": "any", + "description": "The task triggers a pipeline update when the `pipeline_task` field is present. Only pipelines configured to use triggered more are supported.", + "required": false + }, + "power_bi_task": { + "name": "power_bi_task", + "type": "any", + "description": "The task triggers a Power BI semantic model update when the `power_bi_task` field is present.", + "required": false + }, + "python_wheel_task": { + "name": "python_wheel_task", + "type": "any", + "description": "The task runs a Python wheel when the `python_wheel_task` field is present.", + "required": false + }, + "retry_on_timeout": { + "name": "retry_on_timeout", + "type": "any", + "description": "An optional policy to specify whether to retry a job when it times out. The default behavior\nis to not retry on timeout.", + "required": false + }, + "run_if": { + "name": "run_if", + "type": "any", + "description": "An optional value specifying the condition determining whether the task is run once its dependencies have been completed.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed", + "required": false + }, + "run_job_task": { + "name": "run_job_task", + "type": "any", + "description": "The task triggers another job when the `run_job_task` field is present.", + "required": false + }, + "spark_jar_task": { + "name": "spark_jar_task", + "type": "any", + "description": "The task runs a JAR when the `spark_jar_task` field is present.", + "required": false + }, + "spark_python_task": { + "name": "spark_python_task", + "type": "any", + "description": "The task runs a Python file when the `spark_python_task` field is present.", + "required": false + }, + "spark_submit_task": { + "name": "spark_submit_task", + "type": "any", + "description": "(Legacy) The task runs the spark-submit script when the spark_submit_task field is present. Databricks recommends using the spark_jar_task instead; see [Spark Submit task for jobs](/jobs/spark-submit).", + "required": false, + "deprecated": true + }, + "sql_task": { + "name": "sql_task", + "type": "any", + "description": "The task runs a SQL query or file, or it refreshes a SQL alert or a legacy SQL dashboard when the `sql_task` field is present.", + "required": false + }, + "task_key": { + "name": "task_key", + "type": "any", + "description": "A unique name for the task. This field is used to refer to this task from other tasks.\nThis field is required and must be unique within its parent job.\nOn Update or Reset, this field is used to reference the tasks to be updated or reset.", + "required": false + }, + "timeout_seconds": { + "name": "timeout_seconds", + "type": "int", + "description": "An optional timeout applied to each run of this job task. A value of `0` means no timeout.", + "required": false + }, + "webhook_notifications": { + "name": "webhook_notifications", + "type": "any", + "description": "A collection of system notification IDs to notify when runs of this task begin or complete. The default behavior is to not send any system notifications.", + "required": false + } + } + }, + "jobs.TaskDependency": { + "name": "TaskDependency", + "package": "jobs", + "description": "task dependency configuration.", + "fields": { + "outcome": { + "name": "outcome", + "type": "any", + "description": "Can only be specified on condition task dependencies. The outcome of the dependent task that must be met for this task to run.", + "required": false + }, + "task_key": { + "name": "task_key", + "type": "any", + "description": "The name of the task this task depends on.", + "required": false + } + } + }, + "jobs.TaskEmailNotifications": { + "name": "TaskEmailNotifications", + "package": "jobs", + "description": "task email notifications configuration.", + "fields": { + "no_alert_for_skipped_runs": { + "name": "no_alert_for_skipped_runs", + "type": "any", + "description": "If true, do not send email to recipients specified in `on_failure` if the run is skipped.\nThis field is `deprecated`. Please use the `notification_settings.no_alert_for_skipped_runs` field.", + "required": false, + "deprecated": true + }, + "on_duration_warning_threshold_exceeded": { + "name": "on_duration_warning_threshold_exceeded", + "type": "any", + "description": "A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.", + "required": false + }, + "on_failure": { + "name": "on_failure", + "type": "any", + "description": "A list of email addresses to be notified when a run unsuccessfully completes. A run is considered to have completed unsuccessfully if it ends with an `INTERNAL_ERROR` `life_cycle_state` or a `FAILED`, or `TIMED_OUT` result_state. If this is not specified on job creation, reset, or update the list is empty, and notifications are not sent.", + "required": false + }, + "on_start": { + "name": "on_start", + "type": "any", + "description": "A list of email addresses to be notified when a run begins. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "required": false + }, + "on_streaming_backlog_exceeded": { + "name": "on_streaming_backlog_exceeded", + "type": "any", + "description": "A list of email addresses to notify when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.", + "required": false + }, + "on_success": { + "name": "on_success", + "type": "any", + "description": "A list of email addresses to be notified when a run successfully completes. A run is considered to have completed successfully if it ends with a `TERMINATED` `life_cycle_state` and a `SUCCESS` result_state. If not specified on job creation, reset, or update, the list is empty, and notifications are not sent.", + "required": false + } + } + }, + "jobs.TaskNotificationSettings": { + "name": "TaskNotificationSettings", + "package": "jobs", + "description": "Configuration settings for task notification.", + "fields": { + "alert_on_last_attempt": { + "name": "alert_on_last_attempt", + "type": "any", + "description": "If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run.", + "required": false + }, + "no_alert_for_canceled_runs": { + "name": "no_alert_for_canceled_runs", + "type": "any", + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.", + "required": false + }, + "no_alert_for_skipped_runs": { + "name": "no_alert_for_skipped_runs", + "type": "any", + "description": "If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.", + "required": false + } + } + }, + "jobs.TaskRetryMode": { + "name": "TaskRetryMode", + "package": "jobs", + "description": "task retry mode of the continuous job\n* NEVER: The failed task will not be retried.\n* ON_FAILURE: Retry a failed task if at least one other task in the job is still running its first attempt.\nWhen this condition is no longer met or the retry limit is reached, the job run is cancelled and a new run is started.", + "fields": {} + }, + "jobs.TriggerSettings": { + "name": "TriggerSettings", + "package": "jobs", + "description": "Configuration settings for trigger.", + "fields": { + "file_arrival": { + "name": "file_arrival", + "type": "any", + "description": "File arrival trigger settings.", + "required": false + }, + "model": { + "name": "model", + "type": "any", + "description": "", + "required": false + }, + "pause_status": { + "name": "pause_status", + "type": "any", + "description": "Whether this trigger is paused or not.", + "required": false + }, + "periodic": { + "name": "periodic", + "type": "any", + "description": "Periodic trigger settings.", + "required": false + }, + "table_update": { + "name": "table_update", + "type": "any", + "description": "", + "required": false + } + } + }, + "jobs.Webhook": { + "name": "Webhook", + "package": "jobs", + "description": "webhook configuration.", + "fields": { + "id": { + "name": "id", + "type": "any", + "description": "", + "required": false + } + } + }, + "jobs.WebhookNotifications": { + "name": "WebhookNotifications", + "package": "jobs", + "description": "webhook notifications configuration.", + "fields": { + "on_duration_warning_threshold_exceeded": { + "name": "on_duration_warning_threshold_exceeded", + "type": "any", + "description": "An optional list of system notification IDs to call when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. A maximum of 3 destinations can be specified for the `on_duration_warning_threshold_exceeded` property.", + "required": false + }, + "on_failure": { + "name": "on_failure", + "type": "any", + "description": "An optional list of system notification IDs to call when the run fails. A maximum of 3 destinations can be specified for the `on_failure` property.", + "required": false + }, + "on_start": { + "name": "on_start", + "type": "any", + "description": "An optional list of system notification IDs to call when the run starts. A maximum of 3 destinations can be specified for the `on_start` property.", + "required": false + }, + "on_streaming_backlog_exceeded": { + "name": "on_streaming_backlog_exceeded", + "type": "any", + "description": "An optional list of system notification IDs to call when any streaming backlog thresholds are exceeded for any stream.\nStreaming backlog thresholds can be set in the `health` field using the following metrics: `STREAMING_BACKLOG_BYTES`, `STREAMING_BACKLOG_RECORDS`, `STREAMING_BACKLOG_SECONDS`, or `STREAMING_BACKLOG_FILES`.\nAlerting is based on the 10-minute average of these metrics. If the issue persists, notifications are resent every 30 minutes.\nA maximum of 3 destinations can be specified for the `on_streaming_backlog_exceeded` property.", + "required": false + }, + "on_success": { + "name": "on_success", + "type": "any", + "description": "An optional list of system notification IDs to call when the run completes successfully. A maximum of 3 destinations can be specified for the `on_success` property.", + "required": false + } + } + }, + "ml.ExperimentTag": { + "name": "ExperimentTag", + "package": "ml", + "description": "A tag for an experiment.", + "fields": { + "key": { + "name": "key", + "type": "any", + "description": "The tag key.", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "The tag value.", + "required": false + } + } + }, + "ml.ModelTag": { + "name": "ModelTag", + "package": "ml", + "description": "Tag for a registered model", + "fields": { + "key": { + "name": "key", + "type": "any", + "description": "The tag key.", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "The tag value.", + "required": false + } + } + }, + "pipelines.ConnectionParameters": { + "name": "ConnectionParameters", + "package": "pipelines", + "description": "connection parameters configuration.", + "fields": { + "source_catalog": { + "name": "source_catalog", + "type": "any", + "description": "Source catalog for initial connection.\nThis is necessary for schema exploration in some database systems like Oracle, and optional but nice-to-have\nin some other database systems like Postgres.\nFor Oracle databases, this maps to a service name.", + "required": false + } + } + }, + "pipelines.CronTrigger": { + "name": "CronTrigger", + "package": "pipelines", + "description": "cron trigger configuration.", + "fields": { + "quartz_cron_schedule": { + "name": "quartz_cron_schedule", + "type": "any", + "description": "", + "required": false + }, + "timezone_id": { + "name": "timezone_id", + "type": "string", + "description": "", + "required": false + } + } + }, + "pipelines.DayOfWeek": { + "name": "DayOfWeek", + "package": "pipelines", + "description": "Days of week in which the window is allowed to happen.\nIf not specified all days of the week will be used.", + "fields": {} + }, + "pipelines.DeploymentKind": { + "name": "DeploymentKind", + "package": "pipelines", + "description": "The deployment method that manages the pipeline:\n- BUNDLE: The pipeline is managed by a Databricks Asset Bundle.", + "fields": {} + }, + "pipelines.EventLogSpec": { + "name": "EventLogSpec", + "package": "pipelines", + "description": "Configurable event log parameters.", + "fields": { + "catalog": { + "name": "catalog", + "type": "any", + "description": "The UC catalog the event log is published under.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name the event log is published to in UC.", + "required": false + }, + "schema": { + "name": "schema", + "type": "any", + "description": "The UC schema the event log is published under.", + "required": false + } + } + }, + "pipelines.FileLibrary": { + "name": "FileLibrary", + "package": "pipelines", + "description": "file library configuration.", + "fields": { + "path": { + "name": "path", + "type": "any", + "description": "The absolute path of the source code.", + "required": false + } + } + }, + "pipelines.Filters": { + "name": "Filters", + "package": "pipelines", + "description": "filters configuration.", + "fields": { + "exclude": { + "name": "exclude", + "type": "any", + "description": "Paths to exclude.", + "required": false + }, + "include": { + "name": "include", + "type": "any", + "description": "Paths to include.", + "required": false + } + } + }, + "pipelines.IngestionConfig": { + "name": "IngestionConfig", + "package": "pipelines", + "description": "ingestion config configuration.", + "fields": { + "report": { + "name": "report", + "type": "any", + "description": "Select a specific source report.", + "required": false + }, + "schema": { + "name": "schema", + "type": "any", + "description": "Select all tables from a specific source schema.", + "required": false + }, + "table": { + "name": "table", + "type": "any", + "description": "Select a specific source table.", + "required": false + } + } + }, + "pipelines.IngestionGatewayPipelineDefinition": { + "name": "IngestionGatewayPipelineDefinition", + "package": "pipelines", + "description": "ingestion gateway pipeline definition configuration.", + "fields": { + "connection_id": { + "name": "connection_id", + "type": "string", + "description": "[Deprecated, use connection_name instead] Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source.", + "required": false, + "deprecated": true + }, + "connection_name": { + "name": "connection_name", + "type": "string", + "description": "Immutable. The Unity Catalog connection that this gateway pipeline uses to communicate with the source.", + "required": false + }, + "connection_parameters": { + "name": "connection_parameters", + "type": "any", + "description": "Optional, Internal. Parameters required to establish an initial connection with the source.", + "required": false + }, + "gateway_storage_catalog": { + "name": "gateway_storage_catalog", + "type": "any", + "description": "Required, Immutable. The name of the catalog for the gateway pipeline's storage location.", + "required": false + }, + "gateway_storage_name": { + "name": "gateway_storage_name", + "type": "string", + "description": "Optional. The Unity Catalog-compatible name for the gateway storage location.\nThis is the destination to use for the data that is extracted by the gateway.\nSpark Declarative Pipelines system will automatically create the storage location under the catalog and schema.", + "required": false + }, + "gateway_storage_schema": { + "name": "gateway_storage_schema", + "type": "any", + "description": "Required, Immutable. The name of the schema for the gateway pipelines's storage location.", + "required": false + } + } + }, + "pipelines.IngestionPipelineDefinition": { + "name": "IngestionPipelineDefinition", + "package": "pipelines", + "description": "ingestion pipeline definition configuration.", + "fields": { + "connection_name": { + "name": "connection_name", + "type": "string", + "description": "Immutable. The Unity Catalog connection that this ingestion pipeline uses to communicate with the source. This is used with connectors for applications like Salesforce, Workday, and so on.", + "required": false + }, + "ingest_from_uc_foreign_catalog": { + "name": "ingest_from_uc_foreign_catalog", + "type": "any", + "description": "Immutable. If set to true, the pipeline will ingest tables from the\nUC foreign catalogs directly without the need to specify a UC connection or ingestion gateway.\nThe `source_catalog` fields in objects of IngestionConfig are interpreted as\nthe UC foreign catalogs to ingest from.", + "required": false + }, + "ingestion_gateway_id": { + "name": "ingestion_gateway_id", + "type": "string", + "description": "Immutable. Identifier for the gateway that is used by this ingestion pipeline to communicate with the source database. This is used with connectors to databases like SQL Server.", + "required": false + }, + "netsuite_jar_path": { + "name": "netsuite_jar_path", + "type": "string", + "description": "Netsuite only configuration. When the field is set for a netsuite connector,\nthe jar stored in the field will be validated and added to the classpath of\npipeline's cluster.", + "required": false + }, + "objects": { + "name": "objects", + "type": "any", + "description": "Required. Settings specifying tables to replicate and the destination for the replicated tables.", + "required": false + }, + "source_configurations": { + "name": "source_configurations", + "type": "any", + "description": "Top-level source configurations", + "required": false + }, + "source_type": { + "name": "source_type", + "type": "any", + "description": "The type of the foreign source.\nThe source type will be inferred from the source connection or ingestion gateway.\nThis field is output only and will be ignored if provided.", + "required": false, + "output_only": true + }, + "table_configuration": { + "name": "table_configuration", + "type": "any", + "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in the pipeline.", + "required": false + } + } + }, + "pipelines.IngestionPipelineDefinitionTableSpecificConfigQueryBasedConnectorConfig": { + "name": "IngestionPipelineDefinitionTableSpecificConfigQueryBasedConnectorConfig", + "package": "pipelines", + "description": "Configurations that are only applicable for query-based ingestion connectors.", + "fields": { + "cursor_columns": { + "name": "cursor_columns", + "type": "any", + "description": "The names of the monotonically increasing columns in the source table that are used to enable\nthe table to be read and ingested incrementally through structured streaming.\nThe columns are allowed to have repeated values but have to be non-decreasing.\nIf the source data is merged into the destination (e.g., using SCD Type 1 or Type 2), these\ncolumns will implicitly define the `sequence_by` behavior. You can still explicitly set\n`sequence_by` to override this default.", + "required": false + }, + "deletion_condition": { + "name": "deletion_condition", + "type": "any", + "description": "Specifies a SQL WHERE condition that specifies that the source row has been deleted.\nThis is sometimes referred to as \"soft-deletes\".\nFor example: \"Operation = 'DELETE'\" or \"is_deleted = true\".\nThis field is orthogonal to `hard_deletion_sync_interval_in_seconds`,\none for soft-deletes and the other for hard-deletes.\nSee also the hard_deletion_sync_min_interval_in_seconds field for\nhandling of \"hard deletes\" where the source rows are physically removed from the table.", + "required": false + }, + "hard_deletion_sync_min_interval_in_seconds": { + "name": "hard_deletion_sync_min_interval_in_seconds", + "type": "int", + "description": "Specifies the minimum interval (in seconds) between snapshots on primary keys\nfor detecting and synchronizing hard deletions—i.e., rows that have been\nphysically removed from the source table.\nThis interval acts as a lower bound. If ingestion runs less frequently than\nthis value, hard deletion synchronization will align with the actual ingestion\nfrequency instead of happening more often.\nIf not set, hard deletion synchronization via snapshots is disabled.\nThis field is mutable and can be updated without triggering a full snapshot.", + "required": false + } + } + }, + "pipelines.IngestionPipelineDefinitionWorkdayReportParameters": { + "name": "IngestionPipelineDefinitionWorkdayReportParameters", + "package": "pipelines", + "description": "ingestion pipeline definition workday report parameters configuration.", + "fields": { + "incremental": { + "name": "incremental", + "type": "any", + "description": "(Optional) Marks the report as incremental.\nThis field is deprecated and should not be used. Use `parameters` instead. The incremental behavior is now\ncontrolled by the `parameters` field.", + "required": false, + "deprecated": true + }, + "parameters": { + "name": "parameters", + "type": "any", + "description": "Parameters for the Workday report. Each key represents the parameter name (e.g., \"start_date\", \"end_date\"),\nand the corresponding value is a SQL-like expression used to compute the parameter value at runtime.\nExample:\n{\n\"start_date\": \"{ coalesce(current_offset(), date(\\\"2025-02-01\\\")) }\",\n\"end_date\": \"{ current_date() - INTERVAL 1 DAY }\"\n}", + "required": false + }, + "report_parameters": { + "name": "report_parameters", + "type": "any", + "description": "(Optional) Additional custom parameters for Workday Report\nThis field is deprecated and should not be used. Use `parameters` instead.", + "required": false, + "deprecated": true + } + } + }, + "pipelines.IngestionPipelineDefinitionWorkdayReportParametersQueryKeyValue": { + "name": "IngestionPipelineDefinitionWorkdayReportParametersQueryKeyValue", + "package": "pipelines", + "description": "ingestion pipeline definition workday report parameters query key value configuration.", + "fields": { + "key": { + "name": "key", + "type": "any", + "description": "Key for the report parameter, can be a column name or other metadata", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "Value for the report parameter.\nPossible values it can take are these sql functions:\n1. coalesce(current_offset(), date(\"YYYY-MM-DD\")) -\u003e if current_offset() is null, then the passed date, else current_offset()\n2. current_date()\n3. date_sub(current_date(), x) -\u003e subtract x (some non-negative integer) days from current date", + "required": false + } + } + }, + "pipelines.IngestionSourceType": { + "name": "IngestionSourceType", + "package": "pipelines", + "description": "ingestion source type configuration.", + "fields": {} + }, + "pipelines.ManualTrigger": { + "name": "ManualTrigger", + "package": "pipelines", + "description": "manual trigger configuration.", + "fields": {} + }, + "pipelines.NotebookLibrary": { + "name": "NotebookLibrary", + "package": "pipelines", + "description": "notebook library configuration.", + "fields": { + "path": { + "name": "path", + "type": "any", + "description": "The absolute path of the source code.", + "required": false + } + } + }, + "pipelines.Notifications": { + "name": "Notifications", + "package": "pipelines", + "description": "notifications configuration.", + "fields": { + "alerts": { + "name": "alerts", + "type": "any", + "description": "A list of alerts that trigger the sending of notifications to the configured\ndestinations. The supported alerts are:\n\n* `on-update-success`: A pipeline update completes successfully.\n* `on-update-failure`: Each time a pipeline update fails.\n* `on-update-fatal-failure`: A pipeline update fails with a non-retryable (fatal) error.\n* `on-flow-failure`: A single data flow fails.", + "required": false + }, + "email_recipients": { + "name": "email_recipients", + "type": "any", + "description": "A list of email addresses notified when a configured alert is triggered.", + "required": false + } + } + }, + "pipelines.PathPattern": { + "name": "PathPattern", + "package": "pipelines", + "description": "path pattern configuration.", + "fields": { + "include": { + "name": "include", + "type": "any", + "description": "The source code to include for pipelines", + "required": false + } + } + }, + "pipelines.PipelineCluster": { + "name": "PipelineCluster", + "package": "pipelines", + "description": "pipeline cluster configuration.", + "fields": { + "apply_policy_default_values": { + "name": "apply_policy_default_values", + "type": "any", + "description": "Note: This field won't be persisted. Only API users will check this field.", + "required": false + }, + "autoscale": { + "name": "autoscale", + "type": "any", + "description": "Parameters needed in order to automatically scale clusters up and down based on load.\nNote: autoscaling works best with DB runtime versions 3.0 or later.", + "required": false + }, + "aws_attributes": { + "name": "aws_attributes", + "type": "any", + "description": "Attributes related to clusters running on Amazon Web Services.\nIf not specified at cluster creation, a set of default values will be used.", + "required": false + }, + "azure_attributes": { + "name": "azure_attributes", + "type": "any", + "description": "Attributes related to clusters running on Microsoft Azure.\nIf not specified at cluster creation, a set of default values will be used.", + "required": false + }, + "cluster_log_conf": { + "name": "cluster_log_conf", + "type": "any", + "description": "The configuration for delivering spark logs to a long-term storage destination.\nOnly dbfs destinations are supported. Only one destination can be specified\nfor one cluster. If the conf is given, the logs will be delivered to the destination every\n`5 mins`. The destination of driver logs is `$destination/$clusterId/driver`, while\nthe destination of executor logs is `$destination/$clusterId/executor`.", + "required": false + }, + "custom_tags": { + "name": "custom_tags", + "type": "map[string]string", + "description": "Additional tags for cluster resources. Databricks will tag all cluster resources (e.g., AWS\ninstances and EBS volumes) with these tags in addition to `default_tags`. Notes:\n\n- Currently, Databricks allows at most 45 custom tags\n\n- Clusters can only reuse cloud resources if the resources' tags are a subset of the cluster tags", + "required": false + }, + "driver_instance_pool_id": { + "name": "driver_instance_pool_id", + "type": "string", + "description": "The optional ID of the instance pool for the driver of the cluster belongs.\nThe pool cluster uses the instance pool with id (instance_pool_id) if the driver pool is not\nassigned.", + "required": false + }, + "driver_node_type_id": { + "name": "driver_node_type_id", + "type": "string", + "description": "The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above.", + "required": false + }, + "enable_local_disk_encryption": { + "name": "enable_local_disk_encryption", + "type": "bool", + "description": "Whether to enable local disk encryption for the cluster.", + "required": false + }, + "gcp_attributes": { + "name": "gcp_attributes", + "type": "any", + "description": "Attributes related to clusters running on Google Cloud Platform.\nIf not specified at cluster creation, a set of default values will be used.", + "required": false + }, + "init_scripts": { + "name": "init_scripts", + "type": "any", + "description": "The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `\u003cdestination\u003e/\u003ccluster-ID\u003e/init_scripts`.", + "required": false + }, + "instance_pool_id": { + "name": "instance_pool_id", + "type": "string", + "description": "The optional ID of the instance pool to which the cluster belongs.", + "required": false + }, + "label": { + "name": "label", + "type": "any", + "description": "A label for the cluster specification, either `default` to configure the default cluster, or `maintenance` to configure the maintenance cluster. This field is optional. The default value is `default`.", + "required": false + }, + "node_type_id": { + "name": "node_type_id", + "type": "string", + "description": "This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.", + "required": false + }, + "num_workers": { + "name": "num_workers", + "type": "any", + "description": "Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.", + "required": false + }, + "policy_id": { + "name": "policy_id", + "type": "string", + "description": "The ID of the cluster policy used to create the cluster if applicable.", + "required": false + }, + "spark_conf": { + "name": "spark_conf", + "type": "any", + "description": "An object containing a set of optional, user-specified Spark configuration key-value pairs.\nSee :method:clusters/create for more details.", + "required": false + }, + "spark_env_vars": { + "name": "spark_env_vars", + "type": "any", + "description": "An object containing a set of optional, user-specified environment variable key-value pairs.\nPlease note that key-value pair of the form (X,Y) will be exported as is (i.e.,\n`export X='Y'`) while launching the driver and workers.\n\nIn order to specify an additional set of `SPARK_DAEMON_JAVA_OPTS`, we recommend appending\nthem to `$SPARK_DAEMON_JAVA_OPTS` as shown in the example below. This ensures that all\ndefault databricks managed environmental variables are included as well.\n\nExample Spark environment variables:\n`{\"SPARK_WORKER_MEMORY\": \"28000m\", \"SPARK_LOCAL_DIRS\": \"/local_disk0\"}` or\n`{\"SPARK_DAEMON_JAVA_OPTS\": \"$SPARK_DAEMON_JAVA_OPTS -Dspark.shuffle.service.enabled=true\"}`", + "required": false + }, + "ssh_public_keys": { + "name": "ssh_public_keys", + "type": "any", + "description": "SSH public key contents that will be added to each Spark node in this cluster. The\ncorresponding private keys can be used to login with the user name `ubuntu` on port `2200`.\nUp to 10 keys can be specified.", + "required": false + } + } + }, + "pipelines.PipelineClusterAutoscale": { + "name": "PipelineClusterAutoscale", + "package": "pipelines", + "description": "pipeline cluster autoscale configuration.", + "fields": { + "max_workers": { + "name": "max_workers", + "type": "any", + "description": "The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`.", + "required": false + }, + "min_workers": { + "name": "min_workers", + "type": "any", + "description": "The minimum number of workers the cluster can scale down to when underutilized.\nIt is also the initial number of workers the cluster will have after creation.", + "required": false + }, + "mode": { + "name": "mode", + "type": "any", + "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.", + "required": false + } + } + }, + "pipelines.PipelineClusterAutoscaleMode": { + "name": "PipelineClusterAutoscaleMode", + "package": "pipelines", + "description": "Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.", + "fields": {} + }, + "pipelines.PipelineDeployment": { + "name": "PipelineDeployment", + "package": "pipelines", + "description": "pipeline deployment configuration.", + "fields": { + "kind": { + "name": "kind", + "type": "any", + "description": "The deployment method that manages the pipeline.", + "required": false + }, + "metadata_file_path": { + "name": "metadata_file_path", + "type": "string", + "description": "The path to the file containing metadata about the deployment.", + "required": false + } + } + }, + "pipelines.PipelineLibrary": { + "name": "PipelineLibrary", + "package": "pipelines", + "description": "pipeline library configuration.", + "fields": { + "file": { + "name": "file", + "type": "any", + "description": "The path to a file that defines a pipeline and is stored in the Databricks Repos.", + "required": false + }, + "glob": { + "name": "glob", + "type": "any", + "description": "The unified field to include source codes.\nEach entry can be a notebook path, a file path, or a folder path that ends `/**`.\nThis field cannot be used together with `notebook` or `file`.", + "required": false + }, + "jar": { + "name": "jar", + "type": "any", + "description": "URI of the jar to be installed. Currently only DBFS is supported.", + "required": false + }, + "maven": { + "name": "maven", + "type": "any", + "description": "Specification of a maven library to be installed.", + "required": false + }, + "notebook": { + "name": "notebook", + "type": "any", + "description": "The path to a notebook that defines a pipeline and is stored in the Databricks workspace.", + "required": false + }, + "whl": { + "name": "whl", + "type": "any", + "description": "URI of the whl to be installed.", + "required": false, + "deprecated": true + } + } + }, + "pipelines.PipelineTrigger": { + "name": "PipelineTrigger", + "package": "pipelines", + "description": "pipeline trigger configuration.", + "fields": { + "cron": { + "name": "cron", + "type": "any", + "description": "", + "required": false + }, + "manual": { + "name": "manual", + "type": "any", + "description": "", + "required": false + } + } + }, + "pipelines.PipelinesEnvironment": { + "name": "PipelinesEnvironment", + "package": "pipelines", + "description": "The environment entity used to preserve serverless environment side panel, jobs' environment for non-notebook task, and DLT's environment for classic and serverless pipelines.\nIn this minimal environment spec, only pip dependencies are supported.", + "fields": { + "dependencies": { + "name": "dependencies", + "type": "any", + "description": "List of pip dependencies, as supported by the version of pip in this environment.\nEach dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/\nAllowed dependency could be \u003crequirement specifier\u003e, \u003carchive url/path\u003e, \u003clocal project path\u003e(WSFS or Volumes in Databricks), \u003cvcs project url\u003e", + "required": false + } + } + }, + "pipelines.PostgresCatalogConfig": { + "name": "PostgresCatalogConfig", + "package": "pipelines", + "description": "PG-specific catalog-level configuration parameters", + "fields": { + "slot_config": { + "name": "slot_config", + "type": "any", + "description": "Optional. The Postgres slot configuration to use for logical replication", + "required": false + } + } + }, + "pipelines.PostgresSlotConfig": { + "name": "PostgresSlotConfig", + "package": "pipelines", + "description": "PostgresSlotConfig contains the configuration for a Postgres logical replication slot", + "fields": { + "publication_name": { + "name": "publication_name", + "type": "string", + "description": "The name of the publication to use for the Postgres source", + "required": false + }, + "slot_name": { + "name": "slot_name", + "type": "string", + "description": "The name of the logical replication slot to use for the Postgres source", + "required": false + } + } + }, + "pipelines.ReportSpec": { + "name": "ReportSpec", + "package": "pipelines", + "description": "Specification for report.", + "fields": { + "destination_catalog": { + "name": "destination_catalog", + "type": "any", + "description": "Required. Destination catalog to store table.", + "required": false + }, + "destination_schema": { + "name": "destination_schema", + "type": "any", + "description": "Required. Destination schema to store table.", + "required": false + }, + "destination_table": { + "name": "destination_table", + "type": "any", + "description": "Required. Destination table name. The pipeline fails if a table with that name already exists.", + "required": false + }, + "source_url": { + "name": "source_url", + "type": "string", + "description": "Required. Report URL in the source system.", + "required": false + }, + "table_configuration": { + "name": "table_configuration", + "type": "any", + "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object.", + "required": false + } + } + }, + "pipelines.RestartWindow": { + "name": "RestartWindow", + "package": "pipelines", + "description": "restart window configuration.", + "fields": { + "days_of_week": { + "name": "days_of_week", + "type": "any", + "description": "Days of week in which the restart is allowed to happen (within a five-hour window starting at start_hour).\nIf not specified all days of the week will be used.", + "required": false + }, + "start_hour": { + "name": "start_hour", + "type": "any", + "description": "An integer between 0 and 23 denoting the start hour for the restart window in the 24-hour day.\nContinuous pipeline restart is triggered only within a five-hour window starting at this hour.", + "required": false + }, + "time_zone_id": { + "name": "time_zone_id", + "type": "string", + "description": "Time zone id of restart window. See https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details.\nIf not specified, UTC will be used.", + "required": false + } + } + }, + "pipelines.RunAs": { + "name": "RunAs", + "package": "pipelines", + "description": "Write-only setting, available only in Create/Update calls. Specifies the user or service principal that the pipeline runs as. If not specified, the pipeline runs as the user who created the pipeline.\n\nOnly `user_name` or `service_principal_name` can be specified. If both are specified, an error is thrown.", + "fields": { + "service_principal_name": { + "name": "service_principal_name", + "type": "string", + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", + "required": false + }, + "user_name": { + "name": "user_name", + "type": "string", + "description": "The email of an active workspace user. Users can only set this field to their own email.", + "required": false + } + } + }, + "pipelines.SchemaSpec": { + "name": "SchemaSpec", + "package": "pipelines", + "description": "Specification for schema.", + "fields": { + "destination_catalog": { + "name": "destination_catalog", + "type": "any", + "description": "Required. Destination catalog to store tables.", + "required": false + }, + "destination_schema": { + "name": "destination_schema", + "type": "any", + "description": "Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists.", + "required": false + }, + "source_catalog": { + "name": "source_catalog", + "type": "any", + "description": "The source catalog name. Might be optional depending on the type of source.", + "required": false + }, + "source_schema": { + "name": "source_schema", + "type": "any", + "description": "Required. Schema name in the source database.", + "required": false + }, + "table_configuration": { + "name": "table_configuration", + "type": "any", + "description": "Configuration settings to control the ingestion of tables. These settings are applied to all tables in this schema and override the table_configuration defined in the IngestionPipelineDefinition object.", + "required": false + } + } + }, + "pipelines.SourceCatalogConfig": { + "name": "SourceCatalogConfig", + "package": "pipelines", + "description": "SourceCatalogConfig contains catalog-level custom configuration parameters for each source", + "fields": { + "postgres": { + "name": "postgres", + "type": "any", + "description": "Postgres-specific catalog-level configuration parameters", + "required": false + }, + "source_catalog": { + "name": "source_catalog", + "type": "any", + "description": "Source catalog name", + "required": false + } + } + }, + "pipelines.SourceConfig": { + "name": "SourceConfig", + "package": "pipelines", + "description": "source config configuration.", + "fields": { + "catalog": { + "name": "catalog", + "type": "any", + "description": "Catalog-level source configuration parameters", + "required": false + } + } + }, + "pipelines.TableSpec": { + "name": "TableSpec", + "package": "pipelines", + "description": "Specification for table.", + "fields": { + "destination_catalog": { + "name": "destination_catalog", + "type": "any", + "description": "Required. Destination catalog to store table.", + "required": false + }, + "destination_schema": { + "name": "destination_schema", + "type": "any", + "description": "Required. Destination schema to store table.", + "required": false + }, + "destination_table": { + "name": "destination_table", + "type": "any", + "description": "Optional. Destination table name. The pipeline fails if a table with that name already exists. If not set, the source table name is used.", + "required": false + }, + "source_catalog": { + "name": "source_catalog", + "type": "any", + "description": "Source catalog name. Might be optional depending on the type of source.", + "required": false + }, + "source_schema": { + "name": "source_schema", + "type": "any", + "description": "Schema name in the source database. Might be optional depending on the type of source.", + "required": false + }, + "source_table": { + "name": "source_table", + "type": "any", + "description": "Required. Table name in the source database.", + "required": false + }, + "table_configuration": { + "name": "table_configuration", + "type": "any", + "description": "Configuration settings to control the ingestion of tables. These settings override the table_configuration defined in the IngestionPipelineDefinition object and the SchemaSpec.", + "required": false + } + } + }, + "pipelines.TableSpecificConfig": { + "name": "TableSpecificConfig", + "package": "pipelines", + "description": "table specific config configuration.", + "fields": { + "exclude_columns": { + "name": "exclude_columns", + "type": "any", + "description": "A list of column names to be excluded for the ingestion.\nWhen not specified, include_columns fully controls what columns to be ingested.\nWhen specified, all other columns including future ones will be automatically included for ingestion.\nThis field in mutually exclusive with `include_columns`.", + "required": false + }, + "include_columns": { + "name": "include_columns", + "type": "any", + "description": "A list of column names to be included for the ingestion.\nWhen not specified, all columns except ones in exclude_columns will be included. Future\ncolumns will be automatically included.\nWhen specified, all other future columns will be automatically excluded from ingestion.\nThis field in mutually exclusive with `exclude_columns`.", + "required": false + }, + "primary_keys": { + "name": "primary_keys", + "type": "any", + "description": "The primary key of the table used to apply changes.", + "required": false + }, + "query_based_connector_config": { + "name": "query_based_connector_config", + "type": "any", + "description": "Configurations that are only applicable for query-based ingestion connectors.", + "required": false + }, + "row_filter": { + "name": "row_filter", + "type": "any", + "description": "(Optional, Immutable) The row filter condition to be applied to the table.\nIt must not contain the WHERE keyword, only the actual filter condition.\nIt must be in DBSQL format.", + "required": false + }, + "salesforce_include_formula_fields": { + "name": "salesforce_include_formula_fields", + "type": "any", + "description": "If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector", + "required": false + }, + "scd_type": { + "name": "scd_type", + "type": "any", + "description": "The SCD type to use to ingest the table.", + "required": false + }, + "sequence_by": { + "name": "sequence_by", + "type": "any", + "description": "The column names specifying the logical order of events in the source data. Spark Declarative Pipelines uses this sequencing to handle change events that arrive out of order.", + "required": false + }, + "workday_report_parameters": { + "name": "workday_report_parameters", + "type": "any", + "description": "(Optional) Additional custom parameters for Workday Report", + "required": false + } + } + }, + "pipelines.TableSpecificConfigScdType": { + "name": "TableSpecificConfigScdType", + "package": "pipelines", + "description": "The SCD type to use to ingest the table.", + "fields": {} + }, + "serving.Ai21LabsConfig": { + "name": "Ai21LabsConfig", + "package": "serving", + "description": "ai21 labs config configuration.", + "fields": { + "ai21labs_api_key": { + "name": "ai21labs_api_key", + "type": "any", + "description": "The Databricks secret key reference for an AI21 Labs API key. If you\nprefer to paste your API key directly, see `ai21labs_api_key_plaintext`.\nYou must provide an API key using one of the following fields:\n`ai21labs_api_key` or `ai21labs_api_key_plaintext`.", + "required": false + }, + "ai21labs_api_key_plaintext": { + "name": "ai21labs_api_key_plaintext", + "type": "any", + "description": "An AI21 Labs API key provided as a plaintext string. If you prefer to\nreference your key using Databricks Secrets, see `ai21labs_api_key`. You\nmust provide an API key using one of the following fields:\n`ai21labs_api_key` or `ai21labs_api_key_plaintext`.", + "required": false + } + } + }, + "serving.AiGatewayConfig": { + "name": "AiGatewayConfig", + "package": "serving", + "description": "ai gateway config configuration.", + "fields": { + "fallback_config": { + "name": "fallback_config", + "type": "any", + "description": "Configuration for traffic fallback which auto fallbacks to other served entities if the request to a served\nentity fails with certain error codes, to increase availability.", + "required": false + }, + "guardrails": { + "name": "guardrails", + "type": "any", + "description": "Configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses.", + "required": false + }, + "inference_table_config": { + "name": "inference_table_config", + "type": "any", + "description": "Configuration for payload logging using inference tables.\nUse these tables to monitor and audit data being sent to and received from model APIs and to improve model quality.", + "required": false + }, + "rate_limits": { + "name": "rate_limits", + "type": "any", + "description": "Configuration for rate limits which can be set to limit endpoint traffic.", + "required": false + }, + "usage_tracking_config": { + "name": "usage_tracking_config", + "type": "any", + "description": "Configuration to enable usage tracking using system tables.\nThese tables allow you to monitor operational usage on endpoints and their associated costs.", + "required": false + } + } + }, + "serving.AiGatewayGuardrailParameters": { + "name": "AiGatewayGuardrailParameters", + "package": "serving", + "description": "ai gateway guardrail parameters configuration.", + "fields": { + "invalid_keywords": { + "name": "invalid_keywords", + "type": "any", + "description": "List of invalid keywords.\nAI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.", + "required": false, + "deprecated": true + }, + "pii": { + "name": "pii", + "type": "any", + "description": "Configuration for guardrail PII filter.", + "required": false + }, + "safety": { + "name": "safety", + "type": "any", + "description": "Indicates whether the safety filter is enabled.", + "required": false + }, + "valid_topics": { + "name": "valid_topics", + "type": "any", + "description": "The list of allowed topics.\nGiven a chat request, this guardrail flags the request if its topic is not in the allowed topics.", + "required": false, + "deprecated": true + } + } + }, + "serving.AiGatewayGuardrailPiiBehavior": { + "name": "AiGatewayGuardrailPiiBehavior", + "package": "serving", + "description": "ai gateway guardrail pii behavior configuration.", + "fields": { + "behavior": { + "name": "behavior", + "type": "any", + "description": "Configuration for input guardrail filters.", + "required": false + } + } + }, + "serving.AiGatewayGuardrailPiiBehaviorBehavior": { + "name": "AiGatewayGuardrailPiiBehaviorBehavior", + "package": "serving", + "description": "ai gateway guardrail pii behavior behavior configuration.", + "fields": {} + }, + "serving.AiGatewayGuardrails": { + "name": "AiGatewayGuardrails", + "package": "serving", + "description": "ai gateway guardrails configuration.", + "fields": { + "input": { + "name": "input", + "type": "any", + "description": "Configuration for input guardrail filters.", + "required": false + }, + "output": { + "name": "output", + "type": "any", + "description": "Configuration for output guardrail filters.", + "required": false + } + } + }, + "serving.AiGatewayInferenceTableConfig": { + "name": "AiGatewayInferenceTableConfig", + "package": "serving", + "description": "ai gateway inference table config configuration.", + "fields": { + "catalog_name": { + "name": "catalog_name", + "type": "string", + "description": "The name of the catalog in Unity Catalog. Required when enabling inference tables.\nNOTE: On update, you have to disable inference table first in order to change the catalog name.", + "required": false + }, + "enabled": { + "name": "enabled", + "type": "bool", + "description": "Indicates whether the inference table is enabled.", + "required": false + }, + "schema_name": { + "name": "schema_name", + "type": "string", + "description": "The name of the schema in Unity Catalog. Required when enabling inference tables.\nNOTE: On update, you have to disable inference table first in order to change the schema name.", + "required": false + }, + "table_name_prefix": { + "name": "table_name_prefix", + "type": "any", + "description": "The prefix of the table in Unity Catalog.\nNOTE: On update, you have to disable inference table first in order to change the prefix name.", + "required": false + } + } + }, + "serving.AiGatewayRateLimit": { + "name": "AiGatewayRateLimit", + "package": "serving", + "description": "ai gateway rate limit configuration.", + "fields": { + "calls": { + "name": "calls", + "type": "any", + "description": "Used to specify how many calls are allowed for a key within the renewal_period.", + "required": false + }, + "key": { + "name": "key", + "type": "any", + "description": "Key field for a rate limit. Currently, 'user', 'user_group, 'service_principal', and 'endpoint' are supported,\nwith 'endpoint' being the default if not specified.", + "required": false + }, + "principal": { + "name": "principal", + "type": "any", + "description": "Principal field for a user, user group, or service principal to apply rate limiting to. Accepts a user email, group name, or service principal application ID.", + "required": false + }, + "renewal_period": { + "name": "renewal_period", + "type": "any", + "description": "Renewal period field for a rate limit. Currently, only 'minute' is supported.", + "required": false + }, + "tokens": { + "name": "tokens", + "type": "any", + "description": "Used to specify how many tokens are allowed for a key within the renewal_period.", + "required": false + } + } + }, + "serving.AiGatewayRateLimitKey": { + "name": "AiGatewayRateLimitKey", + "package": "serving", + "description": "ai gateway rate limit key configuration.", + "fields": {} + }, + "serving.AiGatewayRateLimitRenewalPeriod": { + "name": "AiGatewayRateLimitRenewalPeriod", + "package": "serving", + "description": "ai gateway rate limit renewal period configuration.", + "fields": {} + }, + "serving.AiGatewayUsageTrackingConfig": { + "name": "AiGatewayUsageTrackingConfig", + "package": "serving", + "description": "ai gateway usage tracking config configuration.", + "fields": { + "enabled": { + "name": "enabled", + "type": "bool", + "description": "Whether to enable usage tracking.", + "required": false + } + } + }, + "serving.AmazonBedrockConfig": { + "name": "AmazonBedrockConfig", + "package": "serving", + "description": "amazon bedrock config configuration.", + "fields": { + "aws_access_key_id": { + "name": "aws_access_key_id", + "type": "string", + "description": "The Databricks secret key reference for an AWS access key ID with\npermissions to interact with Bedrock services. If you prefer to paste\nyour API key directly, see `aws_access_key_id_plaintext`. You must provide an API\nkey using one of the following fields: `aws_access_key_id` or\n`aws_access_key_id_plaintext`.", + "required": false + }, + "aws_access_key_id_plaintext": { + "name": "aws_access_key_id_plaintext", + "type": "any", + "description": "An AWS access key ID with permissions to interact with Bedrock services\nprovided as a plaintext string. If you prefer to reference your key using\nDatabricks Secrets, see `aws_access_key_id`. You must provide an API key\nusing one of the following fields: `aws_access_key_id` or\n`aws_access_key_id_plaintext`.", + "required": false + }, + "aws_region": { + "name": "aws_region", + "type": "any", + "description": "The AWS region to use. Bedrock has to be enabled there.", + "required": false + }, + "aws_secret_access_key": { + "name": "aws_secret_access_key", + "type": "any", + "description": "The Databricks secret key reference for an AWS secret access key paired\nwith the access key ID, with permissions to interact with Bedrock\nservices. If you prefer to paste your API key directly, see\n`aws_secret_access_key_plaintext`. You must provide an API key using one\nof the following fields: `aws_secret_access_key` or\n`aws_secret_access_key_plaintext`.", + "required": false + }, + "aws_secret_access_key_plaintext": { + "name": "aws_secret_access_key_plaintext", + "type": "any", + "description": "An AWS secret access key paired with the access key ID, with permissions\nto interact with Bedrock services provided as a plaintext string. If you\nprefer to reference your key using Databricks Secrets, see\n`aws_secret_access_key`. You must provide an API key using one of the\nfollowing fields: `aws_secret_access_key` or\n`aws_secret_access_key_plaintext`.", + "required": false + }, + "bedrock_provider": { + "name": "bedrock_provider", + "type": "any", + "description": "The underlying provider in Amazon Bedrock. Supported values (case\ninsensitive) include: Anthropic, Cohere, AI21Labs, Amazon.", + "required": false + }, + "instance_profile_arn": { + "name": "instance_profile_arn", + "type": "any", + "description": "ARN of the instance profile that the external model will use to access AWS resources.\nYou must authenticate using an instance profile or access keys.\nIf you prefer to authenticate using access keys, see `aws_access_key_id`,\n`aws_access_key_id_plaintext`, `aws_secret_access_key` and `aws_secret_access_key_plaintext`.", + "required": false + } + } + }, + "serving.AmazonBedrockConfigBedrockProvider": { + "name": "AmazonBedrockConfigBedrockProvider", + "package": "serving", + "description": "amazon bedrock config bedrock provider configuration.", + "fields": {} + }, + "serving.AnthropicConfig": { + "name": "AnthropicConfig", + "package": "serving", + "description": "anthropic config configuration.", + "fields": { + "anthropic_api_key": { + "name": "anthropic_api_key", + "type": "any", + "description": "The Databricks secret key reference for an Anthropic API key. If you\nprefer to paste your API key directly, see `anthropic_api_key_plaintext`.\nYou must provide an API key using one of the following fields:\n`anthropic_api_key` or `anthropic_api_key_plaintext`.", + "required": false + }, + "anthropic_api_key_plaintext": { + "name": "anthropic_api_key_plaintext", + "type": "any", + "description": "The Anthropic API key provided as a plaintext string. If you prefer to\nreference your key using Databricks Secrets, see `anthropic_api_key`. You\nmust provide an API key using one of the following fields:\n`anthropic_api_key` or `anthropic_api_key_plaintext`.", + "required": false + } + } + }, + "serving.ApiKeyAuth": { + "name": "ApiKeyAuth", + "package": "serving", + "description": "api key auth configuration.", + "fields": { + "key": { + "name": "key", + "type": "any", + "description": "The name of the API key parameter used for authentication.", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "The Databricks secret key reference for an API Key.\nIf you prefer to paste your token directly, see `value_plaintext`.", + "required": false + }, + "value_plaintext": { + "name": "value_plaintext", + "type": "any", + "description": "The API Key provided as a plaintext string. If you prefer to reference your\ntoken using Databricks Secrets, see `value`.", + "required": false + } + } + }, + "serving.AutoCaptureConfigInput": { + "name": "AutoCaptureConfigInput", + "package": "serving", + "description": "auto capture config input configuration.", + "fields": { + "catalog_name": { + "name": "catalog_name", + "type": "string", + "description": "The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if the inference table is already enabled.", + "required": false + }, + "enabled": { + "name": "enabled", + "type": "bool", + "description": "Indicates whether the inference table is enabled.", + "required": false + }, + "schema_name": { + "name": "schema_name", + "type": "string", + "description": "The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if the inference table is already enabled.", + "required": false + }, + "table_name_prefix": { + "name": "table_name_prefix", + "type": "any", + "description": "The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if the inference table is already enabled.", + "required": false + } + } + }, + "serving.BearerTokenAuth": { + "name": "BearerTokenAuth", + "package": "serving", + "description": "bearer token auth configuration.", + "fields": { + "token": { + "name": "token", + "type": "any", + "description": "The Databricks secret key reference for a token.\nIf you prefer to paste your token directly, see `token_plaintext`.", + "required": false + }, + "token_plaintext": { + "name": "token_plaintext", + "type": "any", + "description": "The token provided as a plaintext string. If you prefer to reference your\ntoken using Databricks Secrets, see `token`.", + "required": false + } + } + }, + "serving.CohereConfig": { + "name": "CohereConfig", + "package": "serving", + "description": "cohere config configuration.", + "fields": { + "cohere_api_base": { + "name": "cohere_api_base", + "type": "any", + "description": "This is an optional field to provide a customized base URL for the Cohere\nAPI. If left unspecified, the standard Cohere base URL is used.", + "required": false + }, + "cohere_api_key": { + "name": "cohere_api_key", + "type": "any", + "description": "The Databricks secret key reference for a Cohere API key. If you prefer\nto paste your API key directly, see `cohere_api_key_plaintext`. You must\nprovide an API key using one of the following fields: `cohere_api_key` or\n`cohere_api_key_plaintext`.", + "required": false + }, + "cohere_api_key_plaintext": { + "name": "cohere_api_key_plaintext", + "type": "any", + "description": "The Cohere API key provided as a plaintext string. If you prefer to\nreference your key using Databricks Secrets, see `cohere_api_key`. You\nmust provide an API key using one of the following fields:\n`cohere_api_key` or `cohere_api_key_plaintext`.", + "required": false + } + } + }, + "serving.CustomProviderConfig": { + "name": "CustomProviderConfig", + "package": "serving", + "description": "Configs needed to create a custom provider model route.", + "fields": { + "api_key_auth": { + "name": "api_key_auth", + "type": "any", + "description": "This is a field to provide API key authentication for the custom provider API.\nYou can only specify one authentication method.", + "required": false + }, + "bearer_token_auth": { + "name": "bearer_token_auth", + "type": "any", + "description": "This is a field to provide bearer token authentication for the custom provider API.\nYou can only specify one authentication method.", + "required": false + }, + "custom_provider_url": { + "name": "custom_provider_url", + "type": "string", + "description": "This is a field to provide the URL of the custom provider API.", + "required": false + } + } + }, + "serving.DatabricksModelServingConfig": { + "name": "DatabricksModelServingConfig", + "package": "serving", + "description": "databricks model serving config configuration.", + "fields": { + "databricks_api_token": { + "name": "databricks_api_token", + "type": "any", + "description": "The Databricks secret key reference for a Databricks API token that\ncorresponds to a user or service principal with Can Query access to the\nmodel serving endpoint pointed to by this external model. If you prefer\nto paste your API key directly, see `databricks_api_token_plaintext`. You\nmust provide an API key using one of the following fields:\n`databricks_api_token` or `databricks_api_token_plaintext`.", + "required": false + }, + "databricks_api_token_plaintext": { + "name": "databricks_api_token_plaintext", + "type": "any", + "description": "The Databricks API token that corresponds to a user or service principal\nwith Can Query access to the model serving endpoint pointed to by this\nexternal model provided as a plaintext string. If you prefer to reference\nyour key using Databricks Secrets, see `databricks_api_token`. You must\nprovide an API key using one of the following fields:\n`databricks_api_token` or `databricks_api_token_plaintext`.", + "required": false + }, + "databricks_workspace_url": { + "name": "databricks_workspace_url", + "type": "string", + "description": "The URL of the Databricks workspace containing the model serving endpoint\npointed to by this external model.", + "required": false + } + } + }, + "serving.EmailNotifications": { + "name": "EmailNotifications", + "package": "serving", + "description": "email notifications configuration.", + "fields": { + "on_update_failure": { + "name": "on_update_failure", + "type": "any", + "description": "A list of email addresses to be notified when an endpoint fails to update its configuration or state.", + "required": false + }, + "on_update_success": { + "name": "on_update_success", + "type": "any", + "description": "A list of email addresses to be notified when an endpoint successfully updates its configuration or state.", + "required": false + } + } + }, + "serving.EndpointCoreConfigInput": { + "name": "EndpointCoreConfigInput", + "package": "serving", + "description": "endpoint core config input configuration.", + "fields": { + "auto_capture_config": { + "name": "auto_capture_config", + "type": "any", + "description": "Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.\nNote: this field is deprecated for creating new provisioned throughput endpoints,\nor updating existing provisioned throughput endpoints that never have inference table configured;\nin these cases please use AI Gateway to manage inference tables.", + "required": false + }, + "served_entities": { + "name": "served_entities", + "type": "any", + "description": "The list of served entities under the serving endpoint config.", + "required": false + }, + "served_models": { + "name": "served_models", + "type": "any", + "description": "(Deprecated, use served_entities instead) The list of served models under the serving endpoint config.", + "required": false + }, + "traffic_config": { + "name": "traffic_config", + "type": "any", + "description": "The traffic configuration associated with the serving endpoint config.", + "required": false + } + } + }, + "serving.EndpointTag": { + "name": "EndpointTag", + "package": "serving", + "description": "endpoint tag configuration.", + "fields": { + "key": { + "name": "key", + "type": "any", + "description": "Key field for a serving endpoint tag.", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "Optional value field for a serving endpoint tag.", + "required": false + } + } + }, + "serving.ExternalModel": { + "name": "ExternalModel", + "package": "serving", + "description": "external model configuration.", + "fields": { + "ai21labs_config": { + "name": "ai21labs_config", + "type": "any", + "description": "AI21Labs Config. Only required if the provider is 'ai21labs'.", + "required": false + }, + "amazon_bedrock_config": { + "name": "amazon_bedrock_config", + "type": "any", + "description": "Amazon Bedrock Config. Only required if the provider is 'amazon-bedrock'.", + "required": false + }, + "anthropic_config": { + "name": "anthropic_config", + "type": "any", + "description": "Anthropic Config. Only required if the provider is 'anthropic'.", + "required": false + }, + "cohere_config": { + "name": "cohere_config", + "type": "any", + "description": "Cohere Config. Only required if the provider is 'cohere'.", + "required": false + }, + "custom_provider_config": { + "name": "custom_provider_config", + "type": "any", + "description": "Custom Provider Config. Only required if the provider is 'custom'.", + "required": false + }, + "databricks_model_serving_config": { + "name": "databricks_model_serving_config", + "type": "any", + "description": "Databricks Model Serving Config. Only required if the provider is 'databricks-model-serving'.", + "required": false + }, + "google_cloud_vertex_ai_config": { + "name": "google_cloud_vertex_ai_config", + "type": "any", + "description": "Google Cloud Vertex AI Config. Only required if the provider is 'google-cloud-vertex-ai'.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of the external model.", + "required": false + }, + "openai_config": { + "name": "openai_config", + "type": "any", + "description": "OpenAI Config. Only required if the provider is 'openai'.", + "required": false + }, + "palm_config": { + "name": "palm_config", + "type": "any", + "description": "PaLM Config. Only required if the provider is 'palm'.", + "required": false + }, + "provider": { + "name": "provider", + "type": "any", + "description": "The name of the provider for the external model. Currently, the supported providers are 'ai21labs', 'anthropic', 'amazon-bedrock', 'cohere', 'databricks-model-serving', 'google-cloud-vertex-ai', 'openai', 'palm', and 'custom'.", + "required": false + }, + "task": { + "name": "task", + "type": "any", + "description": "The task type of the external model.", + "required": false + } + } + }, + "serving.ExternalModelProvider": { + "name": "ExternalModelProvider", + "package": "serving", + "description": "external model provider configuration.", + "fields": {} + }, + "serving.FallbackConfig": { + "name": "FallbackConfig", + "package": "serving", + "description": "fallback config configuration.", + "fields": { + "enabled": { + "name": "enabled", + "type": "bool", + "description": "Whether to enable traffic fallback. When a served entity in the serving endpoint returns specific error\ncodes (e.g. 500), the request will automatically be round-robin attempted with other served entities in the same\nendpoint, following the order of served entity list, until a successful response is returned.\nIf all attempts fail, return the last response with the error code.", + "required": false + } + } + }, + "serving.GoogleCloudVertexAiConfig": { + "name": "GoogleCloudVertexAiConfig", + "package": "serving", + "description": "google cloud vertex ai config configuration.", + "fields": { + "private_key": { + "name": "private_key", + "type": "any", + "description": "The Databricks secret key reference for a private key for the service\naccount which has access to the Google Cloud Vertex AI Service. See [Best\npractices for managing service account keys]. If you prefer to paste your\nAPI key directly, see `private_key_plaintext`. You must provide an API\nkey using one of the following fields: `private_key` or\n`private_key_plaintext`\n\n[Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys", + "required": false + }, + "private_key_plaintext": { + "name": "private_key_plaintext", + "type": "any", + "description": "The private key for the service account which has access to the Google\nCloud Vertex AI Service provided as a plaintext secret. See [Best\npractices for managing service account keys]. If you prefer to reference\nyour key using Databricks Secrets, see `private_key`. You must provide an\nAPI key using one of the following fields: `private_key` or\n`private_key_plaintext`.\n\n[Best practices for managing service account keys]: https://cloud.google.com/iam/docs/best-practices-for-managing-service-account-keys", + "required": false + }, + "project_id": { + "name": "project_id", + "type": "string", + "description": "This is the Google Cloud project id that the service account is\nassociated with.", + "required": false + }, + "region": { + "name": "region", + "type": "any", + "description": "This is the region for the Google Cloud Vertex AI Service. See [supported\nregions] for more details. Some models are only available in specific\nregions.\n\n[supported regions]: https://cloud.google.com/vertex-ai/docs/general/locations", + "required": false + } + } + }, + "serving.OpenAiConfig": { + "name": "OpenAiConfig", + "package": "serving", + "description": "Configs needed to create an OpenAI model route.", + "fields": { + "microsoft_entra_client_id": { + "name": "microsoft_entra_client_id", + "type": "string", + "description": "This field is only required for Azure AD OpenAI and is the Microsoft\nEntra Client ID.", + "required": false + }, + "microsoft_entra_client_secret": { + "name": "microsoft_entra_client_secret", + "type": "any", + "description": "The Databricks secret key reference for a client secret used for\nMicrosoft Entra ID authentication. If you prefer to paste your client\nsecret directly, see `microsoft_entra_client_secret_plaintext`. You must\nprovide an API key using one of the following fields:\n`microsoft_entra_client_secret` or\n`microsoft_entra_client_secret_plaintext`.", + "required": false + }, + "microsoft_entra_client_secret_plaintext": { + "name": "microsoft_entra_client_secret_plaintext", + "type": "any", + "description": "The client secret used for Microsoft Entra ID authentication provided as\na plaintext string. If you prefer to reference your key using Databricks\nSecrets, see `microsoft_entra_client_secret`. You must provide an API key\nusing one of the following fields: `microsoft_entra_client_secret` or\n`microsoft_entra_client_secret_plaintext`.", + "required": false + }, + "microsoft_entra_tenant_id": { + "name": "microsoft_entra_tenant_id", + "type": "string", + "description": "This field is only required for Azure AD OpenAI and is the Microsoft\nEntra Tenant ID.", + "required": false + }, + "openai_api_base": { + "name": "openai_api_base", + "type": "any", + "description": "This is a field to provide a customized base URl for the OpenAI API. For\nAzure OpenAI, this field is required, and is the base URL for the Azure\nOpenAI API service provided by Azure. For other OpenAI API types, this\nfield is optional, and if left unspecified, the standard OpenAI base URL\nis used.", + "required": false + }, + "openai_api_key": { + "name": "openai_api_key", + "type": "any", + "description": "The Databricks secret key reference for an OpenAI API key using the\nOpenAI or Azure service. If you prefer to paste your API key directly,\nsee `openai_api_key_plaintext`. You must provide an API key using one of\nthe following fields: `openai_api_key` or `openai_api_key_plaintext`.", + "required": false + }, + "openai_api_key_plaintext": { + "name": "openai_api_key_plaintext", + "type": "any", + "description": "The OpenAI API key using the OpenAI or Azure service provided as a\nplaintext string. If you prefer to reference your key using Databricks\nSecrets, see `openai_api_key`. You must provide an API key using one of\nthe following fields: `openai_api_key` or `openai_api_key_plaintext`.", + "required": false + }, + "openai_api_type": { + "name": "openai_api_type", + "type": "any", + "description": "This is an optional field to specify the type of OpenAI API to use. For\nAzure OpenAI, this field is required, and adjust this parameter to\nrepresent the preferred security access validation protocol. For access\ntoken validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.", + "required": false + }, + "openai_api_version": { + "name": "openai_api_version", + "type": "any", + "description": "This is an optional field to specify the OpenAI API version. For Azure\nOpenAI, this field is required, and is the version of the Azure OpenAI\nservice to utilize, specified by a date.", + "required": false + }, + "openai_deployment_name": { + "name": "openai_deployment_name", + "type": "string", + "description": "This field is only required for Azure OpenAI and is the name of the\ndeployment resource for the Azure OpenAI service.", + "required": false + }, + "openai_organization": { + "name": "openai_organization", + "type": "any", + "description": "This is an optional field to specify the organization in OpenAI or Azure\nOpenAI.", + "required": false + } + } + }, + "serving.PaLmConfig": { + "name": "PaLmConfig", + "package": "serving", + "description": "pa lm config configuration.", + "fields": { + "palm_api_key": { + "name": "palm_api_key", + "type": "any", + "description": "The Databricks secret key reference for a PaLM API key. If you prefer to\npaste your API key directly, see `palm_api_key_plaintext`. You must\nprovide an API key using one of the following fields: `palm_api_key` or\n`palm_api_key_plaintext`.", + "required": false + }, + "palm_api_key_plaintext": { + "name": "palm_api_key_plaintext", + "type": "any", + "description": "The PaLM API key provided as a plaintext string. If you prefer to\nreference your key using Databricks Secrets, see `palm_api_key`. You must\nprovide an API key using one of the following fields: `palm_api_key` or\n`palm_api_key_plaintext`.", + "required": false + } + } + }, + "serving.RateLimit": { + "name": "RateLimit", + "package": "serving", + "description": "rate limit configuration.", + "fields": { + "calls": { + "name": "calls", + "type": "any", + "description": "Used to specify how many calls are allowed for a key within the renewal_period.", + "required": false + }, + "key": { + "name": "key", + "type": "any", + "description": "Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", + "required": false + }, + "renewal_period": { + "name": "renewal_period", + "type": "any", + "description": "Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.", + "required": false + } + } + }, + "serving.RateLimitKey": { + "name": "RateLimitKey", + "package": "serving", + "description": "rate limit key configuration.", + "fields": {} + }, + "serving.RateLimitRenewalPeriod": { + "name": "RateLimitRenewalPeriod", + "package": "serving", + "description": "rate limit renewal period configuration.", + "fields": {} + }, + "serving.Route": { + "name": "Route", + "package": "serving", + "description": "route configuration.", + "fields": { + "served_entity_name": { + "name": "served_entity_name", + "type": "string", + "description": "", + "required": false + }, + "served_model_name": { + "name": "served_model_name", + "type": "string", + "description": "The name of the served model this route configures traffic for.", + "required": false + }, + "traffic_percentage": { + "name": "traffic_percentage", + "type": "any", + "description": "The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.", + "required": false + } + } + }, + "serving.ServedEntityInput": { + "name": "ServedEntityInput", + "package": "serving", + "description": "served entity input configuration.", + "fields": { + "entity_name": { + "name": "entity_name", + "type": "string", + "description": "The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of **catalog_name.schema_name.model_name**.", + "required": false + }, + "entity_version": { + "name": "entity_version", + "type": "any", + "description": "", + "required": false + }, + "environment_vars": { + "name": "environment_vars", + "type": "any", + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", + "required": false + }, + "external_model": { + "name": "external_model", + "type": "any", + "description": "The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. For an existing endpoint with external_model, it cannot be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later. The task type of all external models within an endpoint must be the same.", + "required": false + }, + "instance_profile_arn": { + "name": "instance_profile_arn", + "type": "any", + "description": "ARN of the instance profile that the served entity uses to access AWS resources.", + "required": false + }, + "max_provisioned_concurrency": { + "name": "max_provisioned_concurrency", + "type": "any", + "description": "The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified.", + "required": false + }, + "max_provisioned_throughput": { + "name": "max_provisioned_throughput", + "type": "any", + "description": "The maximum tokens per second that the endpoint can scale up to.", + "required": false + }, + "min_provisioned_concurrency": { + "name": "min_provisioned_concurrency", + "type": "any", + "description": "The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified.", + "required": false + }, + "min_provisioned_throughput": { + "name": "min_provisioned_throughput", + "type": "any", + "description": "The minimum tokens per second that the endpoint can scale down to.", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version.", + "required": false + }, + "provisioned_model_units": { + "name": "provisioned_model_units", + "type": "any", + "description": "The number of model units provisioned.", + "required": false + }, + "scale_to_zero_enabled": { + "name": "scale_to_zero_enabled", + "type": "bool", + "description": "Whether the compute resources for the served entity should scale down to zero.", + "required": false + }, + "workload_size": { + "name": "workload_size", + "type": "int", + "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified.", + "required": false + }, + "workload_type": { + "name": "workload_type", + "type": "any", + "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is \"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).", + "required": false + } + } + }, + "serving.ServedModelInput": { + "name": "ServedModelInput", + "package": "serving", + "description": "served model input configuration.", + "fields": { + "environment_vars": { + "name": "environment_vars", + "type": "any", + "description": "An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and subject to change. Example entity environment variables that refer to Databricks secrets: `{\"OPENAI_API_KEY\": \"{{secrets/my_scope/my_key}}\", \"DATABRICKS_TOKEN\": \"{{secrets/my_scope2/my_key2}}\"}`", + "required": false + }, + "instance_profile_arn": { + "name": "instance_profile_arn", + "type": "any", + "description": "ARN of the instance profile that the served entity uses to access AWS resources.", + "required": false + }, + "max_provisioned_concurrency": { + "name": "max_provisioned_concurrency", + "type": "any", + "description": "The maximum provisioned concurrency that the endpoint can scale up to. Do not use if workload_size is specified.", + "required": false + }, + "max_provisioned_throughput": { + "name": "max_provisioned_throughput", + "type": "any", + "description": "The maximum tokens per second that the endpoint can scale up to.", + "required": false + }, + "min_provisioned_concurrency": { + "name": "min_provisioned_concurrency", + "type": "any", + "description": "The minimum provisioned concurrency that the endpoint can scale down to. Do not use if workload_size is specified.", + "required": false + }, + "min_provisioned_throughput": { + "name": "min_provisioned_throughput", + "type": "any", + "description": "The minimum tokens per second that the endpoint can scale down to.", + "required": false + }, + "model_name": { + "name": "model_name", + "type": "string", + "description": "", + "required": false + }, + "model_version": { + "name": "model_version", + "type": "any", + "description": "", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to entity_name-entity_version.", + "required": false + }, + "provisioned_model_units": { + "name": "provisioned_model_units", + "type": "any", + "description": "The number of model units provisioned.", + "required": false + }, + "scale_to_zero_enabled": { + "name": "scale_to_zero_enabled", + "type": "bool", + "description": "Whether the compute resources for the served entity should scale down to zero.", + "required": false + }, + "workload_size": { + "name": "workload_size", + "type": "int", + "description": "The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are \"Small\" (4 - 4 provisioned concurrency), \"Medium\" (8 - 16 provisioned concurrency), and \"Large\" (16 - 64 provisioned concurrency). Additional custom workload sizes can also be used when available in the workspace. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0. Do not use if min_provisioned_concurrency and max_provisioned_concurrency are specified.", + "required": false + }, + "workload_type": { + "name": "workload_type", + "type": "any", + "description": "The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is \"CPU\". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available [GPU types](https://docs.databricks.com/en/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).", + "required": false + } + } + }, + "serving.ServedModelInputWorkloadType": { + "name": "ServedModelInputWorkloadType", + "package": "serving", + "description": "Please keep this in sync with with workload types in InferenceEndpointEntities.scala", + "fields": {} + }, + "serving.ServingModelWorkloadType": { + "name": "ServingModelWorkloadType", + "package": "serving", + "description": "Please keep this in sync with with workload types in InferenceEndpointEntities.scala", + "fields": {} + }, + "serving.TrafficConfig": { + "name": "TrafficConfig", + "package": "serving", + "description": "traffic config configuration.", + "fields": { + "routes": { + "name": "routes", + "type": "any", + "description": "The list of routes that define traffic to each served entity.", + "required": false + } + } + }, + "sql.Aggregation": { + "name": "Aggregation", + "package": "sql", + "description": "aggregation configuration.", + "fields": {} + }, + "sql.AlertEvaluationState": { + "name": "AlertEvaluationState", + "package": "sql", + "description": "UNSPECIFIED - default unspecify value for proto enum, do not use it in the code\nUNKNOWN - alert not yet evaluated\nTRIGGERED - alert is triggered\nOK - alert is not triggered\nERROR - alert evaluation failed", + "fields": {} + }, + "sql.AlertLifecycleState": { + "name": "AlertLifecycleState", + "package": "sql", + "description": "alert lifecycle state configuration.", + "fields": {} + }, + "sql.AlertV2Evaluation": { + "name": "AlertV2Evaluation", + "package": "sql", + "description": "alert v2 evaluation configuration.", + "fields": { + "comparison_operator": { + "name": "comparison_operator", + "type": "any", + "description": "Operator used for comparison in alert evaluation.", + "required": false + }, + "empty_result_state": { + "name": "empty_result_state", + "type": "any", + "description": "Alert state if result is empty. Please avoid setting this field to be `UNKNOWN` because `UNKNOWN` state is planned to be deprecated.", + "required": false + }, + "last_evaluated_at": { + "name": "last_evaluated_at", + "type": "string (timestamp)", + "description": "Timestamp of the last evaluation.", + "required": false, + "output_only": true + }, + "notification": { + "name": "notification", + "type": "any", + "description": "User or Notification Destination to notify when alert is triggered.", + "required": false + }, + "source": { + "name": "source", + "type": "any", + "description": "Source column from result to use to evaluate alert", + "required": false + }, + "state": { + "name": "state", + "type": "any", + "description": "Latest state of alert evaluation.", + "required": false, + "output_only": true + }, + "threshold": { + "name": "threshold", + "type": "any", + "description": "Threshold to user for alert evaluation, can be a column or a value.", + "required": false + } + } + }, + "sql.AlertV2Notification": { + "name": "AlertV2Notification", + "package": "sql", + "description": "alert v2 notification configuration.", + "fields": { + "notify_on_ok": { + "name": "notify_on_ok", + "type": "any", + "description": "Whether to notify alert subscribers when alert returns back to normal.", + "required": false + }, + "retrigger_seconds": { + "name": "retrigger_seconds", + "type": "int", + "description": "Number of seconds an alert waits after being triggered before it is allowed to send another notification.\nIf set to 0 or omitted, the alert will not send any further notifications after the first trigger\nSetting this value to 1 allows the alert to send a notification on every evaluation where the condition is met, effectively making it always retrigger for notification purposes.", + "required": false + }, + "subscriptions": { + "name": "subscriptions", + "type": "any", + "description": "", + "required": false + } + } + }, + "sql.AlertV2Operand": { + "name": "AlertV2Operand", + "package": "sql", + "description": "alert v2 operand configuration.", + "fields": { + "column": { + "name": "column", + "type": "any", + "description": "", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "", + "required": false + } + } + }, + "sql.AlertV2OperandColumn": { + "name": "AlertV2OperandColumn", + "package": "sql", + "description": "alert v2 operand column configuration.", + "fields": { + "aggregation": { + "name": "aggregation", + "type": "any", + "description": "If not set, the behavior is equivalent to using `First row` in the UI.", + "required": false + }, + "display": { + "name": "display", + "type": "any", + "description": "", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "", + "required": false + } + } + }, + "sql.AlertV2OperandValue": { + "name": "AlertV2OperandValue", + "package": "sql", + "description": "alert v2 operand value configuration.", + "fields": { + "bool_value": { + "name": "bool_value", + "type": "any", + "description": "", + "required": false + }, + "double_value": { + "name": "double_value", + "type": "any", + "description": "", + "required": false + }, + "string_value": { + "name": "string_value", + "type": "any", + "description": "", + "required": false + } + } + }, + "sql.AlertV2RunAs": { + "name": "AlertV2RunAs", + "package": "sql", + "description": "alert v2 run as configuration.", + "fields": { + "service_principal_name": { + "name": "service_principal_name", + "type": "string", + "description": "Application ID of an active service principal. Setting this field requires the `servicePrincipal/user` role.", + "required": false + }, + "user_name": { + "name": "user_name", + "type": "string", + "description": "The email of an active workspace user. Can only set this field to their own email.", + "required": false + } + } + }, + "sql.AlertV2Subscription": { + "name": "AlertV2Subscription", + "package": "sql", + "description": "alert v2 subscription configuration.", + "fields": { + "destination_id": { + "name": "destination_id", + "type": "string", + "description": "", + "required": false + }, + "user_email": { + "name": "user_email", + "type": "any", + "description": "", + "required": false + } + } + }, + "sql.Channel": { + "name": "Channel", + "package": "sql", + "description": "Configures the channel name and DBSQL version of the warehouse. CHANNEL_NAME_CUSTOM should be chosen only when `dbsql_version` is specified.", + "fields": { + "dbsql_version": { + "name": "dbsql_version", + "type": "any", + "description": "", + "required": false + }, + "name": { + "name": "name", + "type": "any", + "description": "", + "required": false + } + } + }, + "sql.ChannelName": { + "name": "ChannelName", + "package": "sql", + "description": "channel name configuration.", + "fields": {} + }, + "sql.ComparisonOperator": { + "name": "ComparisonOperator", + "package": "sql", + "description": "comparison operator configuration.", + "fields": {} + }, + "sql.CreateWarehouseRequestWarehouseType": { + "name": "CreateWarehouseRequestWarehouseType", + "package": "sql", + "description": "create warehouse request warehouse type configuration.", + "fields": {} + }, + "sql.CronSchedule": { + "name": "CronSchedule", + "package": "sql", + "description": "cron schedule configuration.", + "fields": { + "pause_status": { + "name": "pause_status", + "type": "any", + "description": "Indicate whether this schedule is paused or not.", + "required": false + }, + "quartz_cron_schedule": { + "name": "quartz_cron_schedule", + "type": "any", + "description": "A cron expression using quartz syntax that specifies the schedule for this pipeline.\nShould use the quartz format described here: http://www.quartz-scheduler.org/documentation/quartz-2.1.7/tutorials/tutorial-lesson-06.html", + "required": false + }, + "timezone_id": { + "name": "timezone_id", + "type": "string", + "description": "A Java timezone id. The schedule will be resolved using this timezone.\nThis will be combined with the quartz_cron_schedule to determine the schedule.\nSee https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html for details.", + "required": false + } + } + }, + "sql.EndpointTagPair": { + "name": "EndpointTagPair", + "package": "sql", + "description": "endpoint tag pair configuration.", + "fields": { + "key": { + "name": "key", + "type": "any", + "description": "", + "required": false + }, + "value": { + "name": "value", + "type": "any", + "description": "", + "required": false + } + } + }, + "sql.EndpointTags": { + "name": "EndpointTags", + "package": "sql", + "description": "endpoint tags configuration.", + "fields": { + "custom_tags": { + "name": "custom_tags", + "type": "map[string]string", + "description": "", + "required": false + } + } + }, + "sql.SchedulePauseStatus": { + "name": "SchedulePauseStatus", + "package": "sql", + "description": "schedule pause status configuration.", + "fields": {} + }, + "sql.SpotInstancePolicy": { + "name": "SpotInstancePolicy", + "package": "sql", + "description": "EndpointSpotInstancePolicy configures whether the endpoint should use spot\ninstances.\n\nThe breakdown of how the EndpointSpotInstancePolicy converts to per cloud\nconfigurations is:\n\n+-------+--------------------------------------+--------------------------------+\n| Cloud | COST_OPTIMIZED | RELIABILITY_OPTIMIZED |\n+-------+--------------------------------------+--------------------------------+\n| AWS | On Demand Driver with Spot Executors | On Demand Driver and\nExecutors | | AZURE | On Demand Driver and Executors | On Demand Driver\nand Executors |\n+-------+--------------------------------------+--------------------------------+\n\nWhile including \"spot\" in the enum name may limit the the future\nextensibility of this field because it limits this enum to denoting \"spot or\nnot\", this is the field that PM recommends after discussion with customers\nper SC-48783.", + "fields": {} + }, + "workspace.AzureKeyVaultSecretScopeMetadata": { + "name": "AzureKeyVaultSecretScopeMetadata", + "package": "workspace", + "description": "The metadata of the Azure KeyVault for a secret scope of type `AZURE_KEYVAULT`", + "fields": { + "dns_name": { + "name": "dns_name", + "type": "string", + "description": "The DNS of the KeyVault", + "required": false + }, + "resource_id": { + "name": "resource_id", + "type": "string", + "description": "The resource id of the azure KeyVault that user wants to associate the scope with.", + "required": false + } + } + }, + "workspace.ScopeBackendType": { + "name": "ScopeBackendType", + "package": "workspace", + "description": "The types of secret scope backends in the Secret Manager. Azure KeyVault backed secret scopes\nwill be supported in a later release.", + "fields": {} + } + }, + "enums": { + "compute.State": { + "name": "State", + "package": "compute", + "description": "The state of a cluster.", + "values": [ + "ERROR", + "PENDING", + "RESIZING", + "RESTARTING", + "RUNNING", + "TERMINATED", + "TERMINATING", + "UNKNOWN" + ] + }, + "jobs.RunLifeCycleState": { + "name": "RunLifeCycleState", + "package": "jobs", + "description": "The current state of the run lifecycle.", + "values": [ + "INTERNAL_ERROR", + "PENDING", + "RUNNING", + "SKIPPED", + "TERMINATED", + "TERMINATING" + ] + }, + "pipelines.PipelineState": { + "name": "PipelineState", + "package": "pipelines", + "description": "The state of a pipeline.", + "values": [ + "DELETED", + "FAILED", + "IDLE", + "RECOVERING", + "RESETTING", + "RUNNING", + "STARTING", + "STOPPING" + ] + } + } +} \ No newline at end of file diff --git a/experimental/aitools/lib/providers/sdkdocs/search.go b/experimental/aitools/lib/providers/sdkdocs/search.go new file mode 100644 index 0000000000..8428df29ac --- /dev/null +++ b/experimental/aitools/lib/providers/sdkdocs/search.go @@ -0,0 +1,265 @@ +package sdkdocs + +import ( + "sort" + "strings" +) + +// SearchResult represents a single search result. +type SearchResult struct { + Type string `json:"type"` // "service", "method", "type", "enum" + Name string `json:"name"` + Path string `json:"path"` + Service string `json:"service,omitempty"` + Description string `json:"description"` + Score float64 `json:"score"` +} + +// SearchOptions configures the search behavior. +type SearchOptions struct { + Query string + Category string // "services", "methods", "types", "enums", or empty for all + Service string // filter by specific service + Limit int +} + +// Search performs a search across the SDK documentation index. +func (idx *SDKDocsIndex) Search(opts SearchOptions) []SearchResult { + if opts.Limit <= 0 { + opts.Limit = 10 + } + if opts.Limit > 50 { + opts.Limit = 50 + } + + query := strings.ToLower(strings.TrimSpace(opts.Query)) + if query == "" { + return nil + } + + terms := tokenize(query) + var results []SearchResult + + // Search services + if opts.Category == "" || opts.Category == "services" { + for name, service := range idx.Services { + if opts.Service != "" && name != opts.Service { + continue + } + score := computeScore(terms, name, service.Name, service.Description) + if score > 0 { + results = append(results, SearchResult{ + Type: "service", + Name: service.Name, + Path: name, + Description: truncate(service.Description, 200), + Score: score, + }) + } + } + } + + // Search methods + if opts.Category == "" || opts.Category == "methods" { + for serviceName, service := range idx.Services { + if opts.Service != "" && serviceName != opts.Service { + continue + } + for methodName, method := range service.Methods { + score := computeScore(terms, methodName, method.Name, method.Description) + // Boost score if query contains the service name + if containsAny(query, serviceName, service.Name) { + score *= 1.5 + } + if score > 0 { + results = append(results, SearchResult{ + Type: "method", + Name: methodName, + Path: serviceName + "." + methodName, + Service: serviceName, + Description: truncate(method.Description, 200), + Score: score, + }) + } + } + } + } + + // Search types + if opts.Category == "" || opts.Category == "types" { + for typePath, typeDoc := range idx.Types { + if opts.Service != "" && !strings.HasPrefix(typePath, opts.Service+".") { + continue + } + score := computeScore(terms, typeDoc.Name, typePath, typeDoc.Description) + if score > 0 { + results = append(results, SearchResult{ + Type: "type", + Name: typeDoc.Name, + Path: typePath, + Service: typeDoc.Package, + Description: truncate(typeDoc.Description, 200), + Score: score, + }) + } + } + } + + // Search enums + if opts.Category == "" || opts.Category == "enums" { + for enumPath, enumDoc := range idx.Enums { + if opts.Service != "" && !strings.HasPrefix(enumPath, opts.Service+".") { + continue + } + // Include enum values in search + valuesStr := strings.Join(enumDoc.Values, " ") + score := computeScore(terms, enumDoc.Name, enumPath, enumDoc.Description+" "+valuesStr) + if score > 0 { + results = append(results, SearchResult{ + Type: "enum", + Name: enumDoc.Name, + Path: enumPath, + Service: enumDoc.Package, + Description: truncate(enumDoc.Description, 200), + Score: score, + }) + } + } + } + + // Sort by score descending + sort.Slice(results, func(i, j int) bool { + if results[i].Score != results[j].Score { + return results[i].Score > results[j].Score + } + // Secondary sort by name for stability + return results[i].Name < results[j].Name + }) + + // Apply limit + if len(results) > opts.Limit { + results = results[:opts.Limit] + } + + return results +} + +// tokenize splits a query into searchable terms. +func tokenize(query string) []string { + // Split on common separators + query = strings.NewReplacer( + "_", " ", + "-", " ", + ".", " ", + ",", " ", + "?", " ", + "!", " ", + ).Replace(query) + + words := strings.Fields(query) + terms := make([]string, 0, len(words)) + + // Filter out common stop words + stopWords := map[string]bool{ + "a": true, "an": true, "the": true, "is": true, "are": true, + "to": true, "for": true, "in": true, "on": true, "of": true, + "how": true, "do": true, "i": true, "can": true, "what": true, + "get": true, "use": true, "with": true, "from": true, + } + + for _, word := range words { + word = strings.ToLower(word) + if len(word) >= 2 && !stopWords[word] { + terms = append(terms, word) + } + } + + return terms +} + +// computeScore calculates a relevance score for a document. +func computeScore(queryTerms []string, names ...string) float64 { + if len(queryTerms) == 0 { + return 0 + } + + // Combine all searchable text + combined := strings.ToLower(strings.Join(names, " ")) + + var totalScore float64 + matchedTerms := 0 + + for _, term := range queryTerms { + termScore := 0.0 + + // Exact word match (highest score) + if containsWord(combined, term) { + termScore = 10.0 + matchedTerms++ + } else if strings.Contains(combined, term) { + // Substring match (lower score) + termScore = 5.0 + matchedTerms++ + } else { + // Try prefix matching + words := strings.Fields(combined) + for _, word := range words { + if strings.HasPrefix(word, term) { + termScore = 3.0 + matchedTerms++ + break + } + } + } + + // Boost if term appears in first name (usually the identifier) + if len(names) > 0 && strings.Contains(strings.ToLower(names[0]), term) { + termScore *= 1.5 + } + + totalScore += termScore + } + + // Require at least one term to match + if matchedTerms == 0 { + return 0 + } + + // Normalize by number of query terms and boost by match ratio + matchRatio := float64(matchedTerms) / float64(len(queryTerms)) + return totalScore * matchRatio +} + +// containsWord checks if text contains word as a complete word. +func containsWord(text, word string) bool { + words := strings.Fields(text) + for _, w := range words { + if w == word { + return true + } + } + return false +} + +// containsAny checks if text contains any of the given substrings. +func containsAny(text string, substrs ...string) bool { + text = strings.ToLower(text) + for _, s := range substrs { + if strings.Contains(text, strings.ToLower(s)) { + return true + } + } + return false +} + +// truncate shortens a string to the specified length. +func truncate(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + // Find last space before maxLen to avoid cutting words + if idx := strings.LastIndex(s[:maxLen], " "); idx > maxLen/2 { + return s[:idx] + "..." + } + return s[:maxLen-3] + "..." +} diff --git a/experimental/aitools/lib/providers/sdkdocs/search_test.go b/experimental/aitools/lib/providers/sdkdocs/search_test.go new file mode 100644 index 0000000000..1a4d45feb0 --- /dev/null +++ b/experimental/aitools/lib/providers/sdkdocs/search_test.go @@ -0,0 +1,274 @@ +package sdkdocs + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTokenize(t *testing.T) { + tests := []struct { + name string + input string + expected []string + }{ + { + name: "simple query", + input: "create job", + expected: []string{"create", "job"}, + }, + { + name: "query with stop words", + input: "how do I create a job", + expected: []string{"create", "job"}, + }, + { + name: "query with underscores", + input: "cluster_name field", + expected: []string{"cluster", "name", "field"}, + }, + { + name: "empty query", + input: "", + expected: []string{}, + }, + { + name: "only stop words", + input: "how do I", + expected: []string{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tokenize(tt.input) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestComputeScore(t *testing.T) { + tests := []struct { + name string + queryTerms []string + names []string + expectZero bool + }{ + { + name: "exact match", + queryTerms: []string{"create"}, + names: []string{"Create", "Create a new job"}, + expectZero: false, + }, + { + name: "no match", + queryTerms: []string{"delete"}, + names: []string{"Create", "Create a new job"}, + expectZero: true, + }, + { + name: "partial match", + queryTerms: []string{"job"}, + names: []string{"CreateJob", "Creates a job"}, + expectZero: false, + }, + { + name: "empty query", + queryTerms: []string{}, + names: []string{"Create", "Create a new job"}, + expectZero: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + score := computeScore(tt.queryTerms, tt.names...) + if tt.expectZero { + assert.Equal(t, float64(0), score) + } else { + assert.Greater(t, score, float64(0)) + } + }) + } +} + +func TestTruncate(t *testing.T) { + tests := []struct { + name string + input string + maxLen int + expected string + }{ + { + name: "short string", + input: "hello", + maxLen: 10, + expected: "hello", + }, + { + name: "exact length", + input: "hello", + maxLen: 5, + expected: "hello", + }, + { + name: "long string truncated at word boundary", + input: "hello world this is a long string", + maxLen: 15, + expected: "hello world...", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := truncate(tt.input, tt.maxLen) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestSearch(t *testing.T) { + // Create a test index + index := &SDKDocsIndex{ + Version: "1.0", + Services: map[string]*ServiceDoc{ + "jobs": { + Name: "Jobs", + Description: "The Jobs API allows you to create, edit, and delete jobs.", + Package: "github.com/databricks/databricks-sdk-go/service/jobs", + Methods: map[string]*MethodDoc{ + "Create": { + Name: "Create", + Description: "Create a new job.", + Signature: "Create(ctx context.Context, request CreateJob) (*CreateResponse, error)", + }, + "List": { + Name: "List", + Description: "List all jobs.", + Signature: "List(ctx context.Context, request ListJobsRequest) listing.Iterator[BaseJob]", + }, + "Delete": { + Name: "Delete", + Description: "Delete a job.", + Signature: "Delete(ctx context.Context, request DeleteJob) error", + }, + }, + }, + "compute": { + Name: "Clusters", + Description: "The Clusters API allows you to create and manage clusters.", + Package: "github.com/databricks/databricks-sdk-go/service/compute", + Methods: map[string]*MethodDoc{ + "Create": { + Name: "Create", + Description: "Create a new cluster.", + Signature: "Create(ctx context.Context, request CreateCluster) (*CreateClusterResponse, error)", + }, + }, + }, + }, + Types: map[string]*TypeDoc{ + "jobs.CreateJob": { + Name: "CreateJob", + Package: "jobs", + Description: "Job creation settings.", + Fields: map[string]*FieldDoc{ + "name": { + Name: "name", + Type: "string", + Description: "The job name.", + }, + }, + }, + }, + Enums: map[string]*EnumDoc{ + "jobs.RunLifeCycleState": { + Name: "RunLifeCycleState", + Package: "jobs", + Description: "The current state of the run lifecycle.", + Values: []string{"PENDING", "RUNNING", "TERMINATED"}, + }, + }, + } + + t.Run("search for create job", func(t *testing.T) { + results := index.Search(SearchOptions{ + Query: "create job", + Limit: 10, + }) + + require.NotEmpty(t, results) + // Should find the Jobs.Create method + found := false + for _, r := range results { + if r.Type == "method" && r.Name == "Create" && r.Service == "jobs" { + found = true + break + } + } + assert.True(t, found, "Should find Jobs.Create method") + }) + + t.Run("search with service filter", func(t *testing.T) { + results := index.Search(SearchOptions{ + Query: "create", + Service: "jobs", + Limit: 10, + }) + + for _, r := range results { + if r.Type == "method" { + assert.Equal(t, "jobs", r.Service, "All method results should be from jobs service") + } + } + }) + + t.Run("search with category filter", func(t *testing.T) { + results := index.Search(SearchOptions{ + Query: "job", + Category: "types", + Limit: 10, + }) + + for _, r := range results { + assert.Equal(t, "type", r.Type, "All results should be types") + } + }) + + t.Run("search for enum values", func(t *testing.T) { + results := index.Search(SearchOptions{ + Query: "lifecycle state", + Category: "enums", + Limit: 10, + }) + + require.NotEmpty(t, results) + found := false + for _, r := range results { + if r.Name == "RunLifeCycleState" { + found = true + break + } + } + assert.True(t, found, "Should find RunLifeCycleState enum") + }) + + t.Run("empty query returns no results", func(t *testing.T) { + results := index.Search(SearchOptions{ + Query: "", + Limit: 10, + }) + + assert.Empty(t, results) + }) + + t.Run("limit is enforced", func(t *testing.T) { + results := index.Search(SearchOptions{ + Query: "create", + Limit: 1, + }) + + assert.LessOrEqual(t, len(results), 1) + }) +} diff --git a/experimental/aitools/lib/server/server.go b/experimental/aitools/lib/server/server.go index ff0016f4be..fd081dad31 100644 --- a/experimental/aitools/lib/server/server.go +++ b/experimental/aitools/lib/server/server.go @@ -8,6 +8,7 @@ import ( mcpsdk "github.com/databricks/cli/experimental/aitools/lib/mcp" "github.com/databricks/cli/experimental/aitools/lib/middlewares" "github.com/databricks/cli/experimental/aitools/lib/providers/clitools" + "github.com/databricks/cli/experimental/aitools/lib/providers/sdkdocs" "github.com/databricks/cli/experimental/aitools/lib/session" "github.com/databricks/cli/experimental/aitools/lib/trajectory" "github.com/databricks/cli/internal/build" @@ -77,6 +78,11 @@ func (s *Server) RegisterTools(ctx context.Context) error { return err } + // Register SDK docs provider + if err := s.registerSDKDocsProvider(ctx); err != nil { + return err + } + return nil } @@ -96,6 +102,22 @@ func (s *Server) registerCLIToolsProvider(ctx context.Context) error { return nil } +// registerSDKDocsProvider registers the SDK documentation provider +func (s *Server) registerSDKDocsProvider(ctx context.Context) error { + log.Info(ctx, "Registering SDK docs provider") + + provider, err := sdkdocs.NewProvider(ctx, s.config, s.session) + if err != nil { + return err + } + + if err := provider.RegisterTools(s.server); err != nil { + return err + } + + return nil +} + // Run starts the MCP server with STDIO transport and blocks until the context is cancelled. // The server communicates via standard input/output following the MCP protocol. func (s *Server) Run(ctx context.Context) error { diff --git a/tools/gen_sdk_docs_index.go b/tools/gen_sdk_docs_index.go new file mode 100644 index 0000000000..8bd7bb2ace --- /dev/null +++ b/tools/gen_sdk_docs_index.go @@ -0,0 +1,664 @@ +// Package main generates SDK documentation index for MCP tools. +// +// Usage: +// +// go run tools/gen_sdk_docs_index.go -output experimental/aitools/lib/providers/sdkdocs/ +// +// This tool parses the annotations_openapi.yml file and Go SDK interfaces to generate +// a comprehensive SDK documentation index that is embedded into the CLI binary. +package main + +import ( + "encoding/json" + "flag" + "fmt" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + "time" + + "gopkg.in/yaml.v3" +) + +// SDKDocsIndex represents the complete SDK documentation index. +type SDKDocsIndex struct { + Version string `json:"version"` + GeneratedAt string `json:"generated_at"` + Services map[string]*ServiceDoc `json:"services"` + Types map[string]*TypeDoc `json:"types"` + Enums map[string]*EnumDoc `json:"enums"` +} + +// ServiceDoc represents documentation for an API service. +type ServiceDoc struct { + Name string `json:"name"` + Description string `json:"description"` + Package string `json:"package"` + Methods map[string]*MethodDoc `json:"methods"` +} + +// MethodDoc represents documentation for an API method. +type MethodDoc struct { + Name string `json:"name"` + Description string `json:"description"` + Signature string `json:"signature"` + Parameters []ParamDoc `json:"parameters"` + Returns *ReturnDoc `json:"returns,omitempty"` + Example string `json:"example,omitempty"` + HTTPMethod string `json:"http_method,omitempty"` + HTTPPath string `json:"http_path,omitempty"` +} + +// ParamDoc represents documentation for a method parameter. +type ParamDoc struct { + Name string `json:"name"` + Type string `json:"type"` + Description string `json:"description"` + Required bool `json:"required"` +} + +// ReturnDoc represents documentation for a method return type. +type ReturnDoc struct { + Type string `json:"type"` + Description string `json:"description"` +} + +// TypeDoc represents documentation for a data type. +type TypeDoc struct { + Name string `json:"name"` + Package string `json:"package"` + Description string `json:"description"` + Fields map[string]*FieldDoc `json:"fields"` +} + +// FieldDoc represents documentation for a struct field. +type FieldDoc struct { + Name string `json:"name"` + Type string `json:"type"` + Description string `json:"description"` + Required bool `json:"required"` + OutputOnly bool `json:"output_only,omitempty"` + Deprecated bool `json:"deprecated,omitempty"` +} + +// EnumDoc represents documentation for an enum type. +type EnumDoc struct { + Name string `json:"name"` + Package string `json:"package"` + Description string `json:"description"` + Values []string `json:"values"` +} + +// AnnotationsFile represents the structure of annotations_openapi.yml +type AnnotationsFile map[string]map[string]FieldAnnotation + +// FieldAnnotation represents annotations for a single field +type FieldAnnotation struct { + Description string `yaml:"description"` + OutputOnly string `yaml:"x-databricks-field-behaviors_output_only"` + DeprecationMessage string `yaml:"deprecation_message"` +} + +func main() { + outputDir := flag.String("output", "experimental/aitools/lib/providers/sdkdocs/", "Output directory for generated index") + annotationsPath := flag.String("annotations", "bundle/internal/schema/annotations_openapi.yml", "Path to annotations file") + flag.Parse() + + // Find project root + projectRoot, err := findProjectRoot() + if err != nil { + fmt.Fprintf(os.Stderr, "Error finding project root: %v\n", err) + os.Exit(1) + } + + // Load annotations + annotations, err := loadAnnotations(filepath.Join(projectRoot, *annotationsPath)) + if err != nil { + fmt.Fprintf(os.Stderr, "Error loading annotations: %v\n", err) + os.Exit(1) + } + + // Generate index + index := generateIndex(annotations) + + // Write output + outputPath := filepath.Join(projectRoot, *outputDir, "sdk_docs_index.json") + if err := writeIndex(index, outputPath); err != nil { + fmt.Fprintf(os.Stderr, "Error writing index: %v\n", err) + os.Exit(1) + } + + fmt.Printf("Generated SDK docs index: %s\n", outputPath) + fmt.Printf(" Services: %d\n", len(index.Services)) + fmt.Printf(" Types: %d\n", len(index.Types)) + fmt.Printf(" Enums: %d\n", len(index.Enums)) +} + +func findProjectRoot() (string, error) { + dir, err := os.Getwd() + if err != nil { + return "", err + } + + for { + if _, err := os.Stat(filepath.Join(dir, "go.mod")); err == nil { + return dir, nil + } + parent := filepath.Dir(dir) + if parent == dir { + return "", fmt.Errorf("could not find project root (go.mod)") + } + dir = parent + } +} + +func loadAnnotations(path string) (AnnotationsFile, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("failed to read annotations file: %w", err) + } + + var annotations AnnotationsFile + if err := yaml.Unmarshal(data, &annotations); err != nil { + return nil, fmt.Errorf("failed to parse annotations: %w", err) + } + + return annotations, nil +} + +func generateIndex(annotations AnnotationsFile) *SDKDocsIndex { + index := &SDKDocsIndex{ + Version: "1.0", + GeneratedAt: time.Now().UTC().Format(time.RFC3339), + Services: make(map[string]*ServiceDoc), + Types: make(map[string]*TypeDoc), + Enums: make(map[string]*EnumDoc), + } + + // Extract types from annotations + for fullTypeName, fields := range annotations { + typeName := extractTypeName(fullTypeName) + packageName := extractPackageName(fullTypeName) + + if typeName == "" { + continue + } + + typeDoc := &TypeDoc{ + Name: typeName, + Package: packageName, + Description: inferTypeDescription(typeName), + Fields: make(map[string]*FieldDoc), + } + + for fieldName, annotation := range fields { + if fieldName == "_" { + // Type-level description + if annotation.Description != "" { + typeDoc.Description = annotation.Description + } + continue + } + + fieldDoc := &FieldDoc{ + Name: fieldName, + Type: inferFieldType(fieldName), + Description: annotation.Description, + OutputOnly: annotation.OutputOnly == "true", + Deprecated: annotation.DeprecationMessage != "", + } + typeDoc.Fields[fieldName] = fieldDoc + } + + // Determine the service this type belongs to + service := inferServiceFromPackage(packageName) + typePath := service + "." + typeName + index.Types[typePath] = typeDoc + } + + // Add well-known services with common methods + addCoreServices(index) + + return index +} + +func extractTypeName(fullPath string) string { + // Extract type name from paths like "github.com/databricks/cli/bundle/config/resources.Alert" + parts := strings.Split(fullPath, ".") + if len(parts) > 0 { + return parts[len(parts)-1] + } + return "" +} + +func extractPackageName(fullPath string) string { + // Extract package from paths like "github.com/databricks/cli/bundle/config/resources.Alert" + parts := strings.Split(fullPath, "/") + if len(parts) > 0 { + lastPart := parts[len(parts)-1] + if idx := strings.Index(lastPart, "."); idx > 0 { + return lastPart[:idx] + } + return lastPart + } + return "" +} + +func inferServiceFromPackage(packageName string) string { + // Map package names to service names + serviceMap := map[string]string{ + "resources": "bundle", + "jobs": "jobs", + "clusters": "compute", + "compute": "compute", + "pipelines": "pipelines", + "catalog": "catalog", + "sql": "sql", + "apps": "apps", + "serving": "serving", + "ml": "ml", + "workspace": "workspace", + "iam": "iam", + "settings": "settings", + "files": "files", + "sharing": "sharing", + } + + if service, ok := serviceMap[packageName]; ok { + return service + } + return packageName +} + +func inferTypeDescription(typeName string) string { + // Generate reasonable descriptions based on type name patterns + if strings.HasSuffix(typeName, "Request") { + base := strings.TrimSuffix(typeName, "Request") + return fmt.Sprintf("Request parameters for %s operation.", toSentenceCase(base)) + } + if strings.HasSuffix(typeName, "Response") { + base := strings.TrimSuffix(typeName, "Response") + return fmt.Sprintf("Response from %s operation.", toSentenceCase(base)) + } + if strings.HasSuffix(typeName, "Settings") { + base := strings.TrimSuffix(typeName, "Settings") + return fmt.Sprintf("Configuration settings for %s.", toSentenceCase(base)) + } + if strings.HasSuffix(typeName, "Spec") { + base := strings.TrimSuffix(typeName, "Spec") + return fmt.Sprintf("Specification for %s.", toSentenceCase(base)) + } + return fmt.Sprintf("%s configuration.", toSentenceCase(typeName)) +} + +func inferFieldType(fieldName string) string { + // Infer type from common field name patterns + patterns := map[*regexp.Regexp]string{ + regexp.MustCompile(`(?i)_id$`): "string", + regexp.MustCompile(`(?i)_ids$`): "[]string", + regexp.MustCompile(`(?i)_time$`): "string (timestamp)", + regexp.MustCompile(`(?i)_at$`): "string (timestamp)", + regexp.MustCompile(`(?i)^is_`): "bool", + regexp.MustCompile(`(?i)^has_`): "bool", + regexp.MustCompile(`(?i)^enable`): "bool", + regexp.MustCompile(`(?i)_enabled$`): "bool", + regexp.MustCompile(`(?i)_count$`): "int", + regexp.MustCompile(`(?i)_size$`): "int", + regexp.MustCompile(`(?i)_minutes$`): "int", + regexp.MustCompile(`(?i)_seconds$`): "int", + regexp.MustCompile(`(?i)_name$`): "string", + regexp.MustCompile(`(?i)_path$`): "string", + regexp.MustCompile(`(?i)_url$`): "string", + regexp.MustCompile(`(?i)description`): "string", + regexp.MustCompile(`(?i)tags$`): "map[string]string", + } + + for pattern, typeName := range patterns { + if pattern.MatchString(fieldName) { + return typeName + } + } + + return "any" +} + +func toSentenceCase(s string) string { + // Convert CamelCase to sentence case + var result strings.Builder + for i, r := range s { + if i > 0 && r >= 'A' && r <= 'Z' { + result.WriteRune(' ') + } + result.WriteRune(r) + } + return strings.ToLower(result.String()) +} + +func addCoreServices(index *SDKDocsIndex) { + // Jobs service + index.Services["jobs"] = &ServiceDoc{ + Name: "Jobs", + Description: "The Jobs API allows you to create, edit, and delete jobs. Jobs are the primary unit of scheduled execution in Databricks.", + Package: "github.com/databricks/databricks-sdk-go/service/jobs", + Methods: map[string]*MethodDoc{ + "Create": { + Name: "Create", + Description: "Create a new job.", + Signature: "Create(ctx context.Context, request CreateJob) (*CreateResponse, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "CreateJob", Description: "Job creation parameters including name, tasks, and schedule", Required: true}, + }, + Returns: &ReturnDoc{Type: "*CreateResponse", Description: "Contains the job_id of the created job"}, + Example: "resp, err := w.Jobs.Create(ctx, jobs.CreateJob{\n Name: \"my-job\",\n Tasks: []jobs.Task{{TaskKey: \"main\", ...}},\n})", + }, + "List": { + Name: "List", + Description: "Retrieves a list of jobs.", + Signature: "List(ctx context.Context, request ListJobsRequest) listing.Iterator[BaseJob]", + Parameters: []ParamDoc{ + {Name: "request", Type: "ListJobsRequest", Description: "Filter and pagination parameters", Required: false}, + }, + Returns: &ReturnDoc{Type: "listing.Iterator[BaseJob]", Description: "Iterator over jobs matching the filter"}, + }, + "Get": { + Name: "Get", + Description: "Retrieves the details for a single job.", + Signature: "Get(ctx context.Context, request GetJobRequest) (*Job, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "GetJobRequest", Description: "Contains job_id to retrieve", Required: true}, + }, + Returns: &ReturnDoc{Type: "*Job", Description: "Full job details including settings and run history"}, + }, + "Delete": { + Name: "Delete", + Description: "Deletes a job.", + Signature: "Delete(ctx context.Context, request DeleteJob) error", + Parameters: []ParamDoc{ + {Name: "request", Type: "DeleteJob", Description: "Contains job_id to delete", Required: true}, + }, + }, + "RunNow": { + Name: "RunNow", + Description: "Triggers an immediate run of a job.", + Signature: "RunNow(ctx context.Context, request RunNow) (*RunNowResponse, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "RunNow", Description: "Job ID and optional parameters for the run", Required: true}, + }, + Returns: &ReturnDoc{Type: "*RunNowResponse", Description: "Contains run_id of the triggered run"}, + }, + }, + } + + // Clusters/Compute service + index.Services["compute"] = &ServiceDoc{ + Name: "Clusters", + Description: "The Clusters API allows you to create, start, edit, and terminate clusters. Clusters are managed cloud resources for running Spark workloads.", + Package: "github.com/databricks/databricks-sdk-go/service/compute", + Methods: map[string]*MethodDoc{ + "Create": { + Name: "Create", + Description: "Create a new Spark cluster.", + Signature: "Create(ctx context.Context, request CreateCluster) (*CreateClusterResponse, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "CreateCluster", Description: "Cluster configuration including node types, autoscaling, and Spark version", Required: true}, + }, + Returns: &ReturnDoc{Type: "*CreateClusterResponse", Description: "Contains cluster_id of the created cluster"}, + }, + "List": { + Name: "List", + Description: "Returns information about all clusters.", + Signature: "List(ctx context.Context, request ListClustersRequest) listing.Iterator[ClusterDetails]", + Returns: &ReturnDoc{Type: "listing.Iterator[ClusterDetails]", Description: "Iterator over cluster details"}, + }, + "Get": { + Name: "Get", + Description: "Retrieves the information for a cluster given its identifier.", + Signature: "Get(ctx context.Context, request GetClusterRequest) (*ClusterDetails, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "GetClusterRequest", Description: "Contains cluster_id", Required: true}, + }, + Returns: &ReturnDoc{Type: "*ClusterDetails", Description: "Full cluster configuration and state"}, + }, + "Start": { + Name: "Start", + Description: "Starts a terminated cluster.", + Signature: "Start(ctx context.Context, request StartCluster) error", + Parameters: []ParamDoc{ + {Name: "request", Type: "StartCluster", Description: "Contains cluster_id to start", Required: true}, + }, + }, + "Delete": { + Name: "Delete", + Description: "Permanently deletes a Spark cluster.", + Signature: "Delete(ctx context.Context, request DeleteCluster) error", + Parameters: []ParamDoc{ + {Name: "request", Type: "DeleteCluster", Description: "Contains cluster_id to delete", Required: true}, + }, + }, + }, + } + + // Pipelines service + index.Services["pipelines"] = &ServiceDoc{ + Name: "Pipelines", + Description: "The Delta Live Tables API allows you to create, edit, and run pipelines for data transformation and ingestion.", + Package: "github.com/databricks/databricks-sdk-go/service/pipelines", + Methods: map[string]*MethodDoc{ + "Create": { + Name: "Create", + Description: "Creates a new data processing pipeline.", + Signature: "Create(ctx context.Context, request CreatePipeline) (*CreatePipelineResponse, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "CreatePipeline", Description: "Pipeline configuration including clusters, libraries, and target", Required: true}, + }, + Returns: &ReturnDoc{Type: "*CreatePipelineResponse", Description: "Contains pipeline_id of the created pipeline"}, + }, + "List": { + Name: "List", + Description: "Lists pipelines defined in the workspace.", + Signature: "List(ctx context.Context, request ListPipelinesRequest) listing.Iterator[PipelineStateInfo]", + Returns: &ReturnDoc{Type: "listing.Iterator[PipelineStateInfo]", Description: "Iterator over pipeline info"}, + }, + "StartUpdate": { + Name: "StartUpdate", + Description: "Starts a new update for the pipeline.", + Signature: "StartUpdate(ctx context.Context, request StartUpdate) (*StartUpdateResponse, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "StartUpdate", Description: "Pipeline ID and update options", Required: true}, + }, + Returns: &ReturnDoc{Type: "*StartUpdateResponse", Description: "Contains update_id of the started update"}, + }, + }, + } + + // Catalog service + index.Services["catalog"] = &ServiceDoc{ + Name: "Catalog", + Description: "Unity Catalog APIs for managing catalogs, schemas, tables, and other data assets.", + Package: "github.com/databricks/databricks-sdk-go/service/catalog", + Methods: map[string]*MethodDoc{ + "ListCatalogs": { + Name: "ListCatalogs", + Description: "Lists all catalogs in the metastore.", + Signature: "List(ctx context.Context, request ListCatalogsRequest) listing.Iterator[CatalogInfo]", + Returns: &ReturnDoc{Type: "listing.Iterator[CatalogInfo]", Description: "Iterator over catalog information"}, + }, + "ListSchemas": { + Name: "ListSchemas", + Description: "Lists all schemas in a catalog.", + Signature: "List(ctx context.Context, request ListSchemasRequest) listing.Iterator[SchemaInfo]", + Parameters: []ParamDoc{ + {Name: "request", Type: "ListSchemasRequest", Description: "Contains catalog_name to list schemas from", Required: true}, + }, + Returns: &ReturnDoc{Type: "listing.Iterator[SchemaInfo]", Description: "Iterator over schema information"}, + }, + "ListTables": { + Name: "ListTables", + Description: "Lists all tables in a schema.", + Signature: "List(ctx context.Context, request ListTablesRequest) listing.Iterator[TableInfo]", + Parameters: []ParamDoc{ + {Name: "request", Type: "ListTablesRequest", Description: "Contains catalog_name and schema_name", Required: true}, + }, + Returns: &ReturnDoc{Type: "listing.Iterator[TableInfo]", Description: "Iterator over table information"}, + }, + }, + } + + // Apps service + index.Services["apps"] = &ServiceDoc{ + Name: "Apps", + Description: "Databricks Apps API for deploying and managing web applications on Databricks.", + Package: "github.com/databricks/databricks-sdk-go/service/apps", + Methods: map[string]*MethodDoc{ + "Create": { + Name: "Create", + Description: "Creates a new app.", + Signature: "Create(ctx context.Context, request CreateAppRequest) (*App, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "CreateAppRequest", Description: "App configuration including name and description", Required: true}, + }, + Returns: &ReturnDoc{Type: "*App", Description: "The created app details"}, + }, + "Deploy": { + Name: "Deploy", + Description: "Deploys an app to Databricks Apps.", + Signature: "Deploy(ctx context.Context, request CreateAppDeploymentRequest) (*AppDeployment, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "CreateAppDeploymentRequest", Description: "Deployment configuration", Required: true}, + }, + Returns: &ReturnDoc{Type: "*AppDeployment", Description: "Deployment status and details"}, + }, + "List": { + Name: "List", + Description: "Lists all apps in the workspace.", + Signature: "List(ctx context.Context, request ListAppsRequest) listing.Iterator[App]", + Returns: &ReturnDoc{Type: "listing.Iterator[App]", Description: "Iterator over apps"}, + }, + }, + } + + // SQL service + index.Services["sql"] = &ServiceDoc{ + Name: "SQL", + Description: "Databricks SQL APIs for managing warehouses, queries, and dashboards.", + Package: "github.com/databricks/databricks-sdk-go/service/sql", + Methods: map[string]*MethodDoc{ + "ExecuteStatement": { + Name: "ExecuteStatement", + Description: "Execute a SQL statement and return results.", + Signature: "ExecuteStatement(ctx context.Context, request ExecuteStatementRequest) (*ExecuteStatementResponse, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "ExecuteStatementRequest", Description: "SQL statement, warehouse ID, and execution options", Required: true}, + }, + Returns: &ReturnDoc{Type: "*ExecuteStatementResponse", Description: "Query results or statement ID for async execution"}, + }, + "ListWarehouses": { + Name: "ListWarehouses", + Description: "Lists all SQL warehouses.", + Signature: "List(ctx context.Context, request ListWarehousesRequest) listing.Iterator[EndpointInfo]", + Returns: &ReturnDoc{Type: "listing.Iterator[EndpointInfo]", Description: "Iterator over warehouse information"}, + }, + }, + } + + // Workspace service + index.Services["workspace"] = &ServiceDoc{ + Name: "Workspace", + Description: "Workspace API for managing notebooks, folders, and other workspace objects.", + Package: "github.com/databricks/databricks-sdk-go/service/workspace", + Methods: map[string]*MethodDoc{ + "List": { + Name: "List", + Description: "Lists the contents of a directory.", + Signature: "List(ctx context.Context, request ListWorkspaceRequest) listing.Iterator[ObjectInfo]", + Parameters: []ParamDoc{ + {Name: "request", Type: "ListWorkspaceRequest", Description: "Contains path to list", Required: true}, + }, + Returns: &ReturnDoc{Type: "listing.Iterator[ObjectInfo]", Description: "Iterator over workspace objects"}, + }, + "GetStatus": { + Name: "GetStatus", + Description: "Gets the status of a workspace object.", + Signature: "GetStatus(ctx context.Context, request GetStatusRequest) (*ObjectInfo, error)", + Parameters: []ParamDoc{ + {Name: "request", Type: "GetStatusRequest", Description: "Contains path to get status for", Required: true}, + }, + Returns: &ReturnDoc{Type: "*ObjectInfo", Description: "Object information including type and path"}, + }, + "Import": { + Name: "Import", + Description: "Imports a notebook or file into the workspace.", + Signature: "Import(ctx context.Context, request Import) error", + Parameters: []ParamDoc{ + {Name: "request", Type: "Import", Description: "Path, content, and format of the object to import", Required: true}, + }, + }, + }, + } + + // Add some common enums + index.Enums["jobs.RunLifeCycleState"] = &EnumDoc{ + Name: "RunLifeCycleState", + Package: "jobs", + Description: "The current state of the run lifecycle.", + Values: []string{"PENDING", "RUNNING", "TERMINATING", "TERMINATED", "SKIPPED", "INTERNAL_ERROR"}, + } + + index.Enums["compute.State"] = &EnumDoc{ + Name: "State", + Package: "compute", + Description: "The state of a cluster.", + Values: []string{"PENDING", "RUNNING", "RESTARTING", "RESIZING", "TERMINATING", "TERMINATED", "ERROR", "UNKNOWN"}, + } + + index.Enums["pipelines.PipelineState"] = &EnumDoc{ + Name: "PipelineState", + Package: "pipelines", + Description: "The state of a pipeline.", + Values: []string{"IDLE", "RUNNING", "STARTING", "STOPPING", "DELETED", "RECOVERING", "FAILED", "RESETTING"}, + } +} + +func writeIndex(index *SDKDocsIndex, path string) error { + // Ensure directory exists + if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + // Sort maps for deterministic output + sortIndex(index) + + // Marshal with indentation for readability + data, err := json.MarshalIndent(index, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal index: %w", err) + } + + if err := os.WriteFile(path, data, 0644); err != nil { + return fmt.Errorf("failed to write index file: %w", err) + } + + return nil +} + +func sortIndex(index *SDKDocsIndex) { + // Sort service methods + for _, service := range index.Services { + // Methods are already in a map, which will be sorted by JSON marshaling + _ = service + } + + // Sort type fields + for _, typeDoc := range index.Types { + // Sort fields by converting to sorted slice would require changing structure + // For now, rely on JSON marshaling order + _ = typeDoc + } + + // Sort enum values + for _, enumDoc := range index.Enums { + sort.Strings(enumDoc.Values) + } +} From 142b8c92b17fea698adbc4e008a426dcae0309b5 Mon Sep 17 00:00:00 2001 From: Evgenii Kniazev Date: Mon, 12 Jan 2026 15:56:00 +0000 Subject: [PATCH 2/2] Add Claude Code skill for SDK documentation queries Adds a skill that helps Claude Code users discover and use the databricks_query_sdk_docs MCP tool effectively when asking about SDK methods, types, and parameters. Co-Authored-By: Claude Opus 4.5 --- .claude/skills/sdk-docs/SKILL.md | 62 +++++++++++ .claude/skills/sdk-docs/query-sdk-docs.sh | 129 ++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 .claude/skills/sdk-docs/SKILL.md create mode 100755 .claude/skills/sdk-docs/query-sdk-docs.sh diff --git a/.claude/skills/sdk-docs/SKILL.md b/.claude/skills/sdk-docs/SKILL.md new file mode 100644 index 0000000000..287055816a --- /dev/null +++ b/.claude/skills/sdk-docs/SKILL.md @@ -0,0 +1,62 @@ +--- +name: databricks-sdk-docs +description: Use this skill when the user asks about Databricks SDK methods, API signatures, parameter types, return types, or how to use specific Databricks APIs programmatically. Triggers on questions like "how do I create a job", "what parameters does X take", "SDK method for Y", or "JobSettings fields". +allowed-tools: mcp__databricks-mcp__databricks_query_sdk_docs +--- + +# Databricks SDK Documentation Skill + +When users ask about Databricks SDK usage, API methods, or type definitions, use the `databricks_query_sdk_docs` MCP tool to find accurate documentation. + +## When to Use This Skill + +- User asks "how do I create a job/cluster/pipeline using the SDK?" +- User needs method signatures: "what's the signature for Jobs.Create?" +- User asks about type fields: "what fields does CreateJob have?" +- User needs enum values: "what are the possible run lifecycle states?" +- User is confused about SDK API parameters or return types + +## How to Query + +Use the `databricks_query_sdk_docs` tool with these parameters: + +```json +{ + "query": "search terms", + "category": "methods|types|enums|services", // optional filter + "service": "jobs|clusters|pipelines|...", // optional filter + "limit": 10 // default 10, max 50 +} +``` + +## Example Queries + +| User Question | Tool Query | +|---------------|------------| +| "How do I create a job?" | `{"query": "create job", "category": "methods"}` | +| "What fields does JobSettings have?" | `{"query": "JobSettings", "category": "types"}` | +| "What are the run states?" | `{"query": "run lifecycle state", "category": "enums"}` | +| "List all jobs API methods" | `{"query": "jobs", "service": "jobs", "category": "methods"}` | + +## Response Guidelines + +After querying, provide: +1. The method signature with parameter types +2. A brief description of what the method does +3. Key parameters the user likely needs +4. A simple code example if applicable + +Keep responses focused on what the user asked - don't dump all documentation. + +## CLI Fallback + +If MCP is unavailable, use the helper script: + +```bash +# From the CLI repo root +.claude/skills/sdk-docs/query-sdk-docs.sh "create job" +.claude/skills/sdk-docs/query-sdk-docs.sh "JobSettings" types +.claude/skills/sdk-docs/query-sdk-docs.sh "list" methods jobs 20 +``` + +The script searches the embedded SDK docs index directly using `jq`. diff --git a/.claude/skills/sdk-docs/query-sdk-docs.sh b/.claude/skills/sdk-docs/query-sdk-docs.sh new file mode 100755 index 0000000000..dc36bb7d9d --- /dev/null +++ b/.claude/skills/sdk-docs/query-sdk-docs.sh @@ -0,0 +1,129 @@ +#!/usr/bin/env bash +# +# Query Databricks SDK documentation from the command line. +# Usage: ./query-sdk-docs.sh [category] [service] [limit] +# +# Examples: +# ./query-sdk-docs.sh "create job" +# ./query-sdk-docs.sh "JobSettings" types +# ./query-sdk-docs.sh "list" methods jobs +# ./query-sdk-docs.sh "cluster" methods compute 20 +# +# Categories: methods, types, enums, services +# Services: jobs, clusters, pipelines, workspace, etc. + +set -euo pipefail + +QUERY="${1:-}" +CATEGORY="${2:-}" +SERVICE="${3:-}" +LIMIT="${4:-10}" + +if [[ -z "$QUERY" ]]; then + echo "Usage: $0 [category] [service] [limit]" + echo "" + echo "Examples:" + echo " $0 'create job' # Search for 'create job'" + echo " $0 'JobSettings' types # Search types for 'JobSettings'" + echo " $0 'list' methods jobs # Search jobs service methods for 'list'" + echo "" + echo "Categories: methods, types, enums, services" + exit 1 +fi + +# Build the JSON input for the MCP tool +build_json_input() { + local json="{\"query\": \"$QUERY\"" + + if [[ -n "$CATEGORY" ]]; then + json+=", \"category\": \"$CATEGORY\"" + fi + + if [[ -n "$SERVICE" ]]; then + json+=", \"service\": \"$SERVICE\"" + fi + + json+=", \"limit\": $LIMIT}" + echo "$json" +} + +# Try to find the SDK docs index file for direct search +SDK_DOCS_INDEX="${SDK_DOCS_INDEX:-}" +if [[ -z "$SDK_DOCS_INDEX" ]]; then + # Look for the index in common locations + SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + CLI_ROOT="$(cd "$SCRIPT_DIR/../../.." && pwd)" + + POSSIBLE_PATHS=( + "$CLI_ROOT/experimental/aitools/lib/providers/sdkdocs/sdk_docs_index.json" + "./sdk_docs_index.json" + ) + + for path in "${POSSIBLE_PATHS[@]}"; do + if [[ -f "$path" ]]; then + SDK_DOCS_INDEX="$path" + break + fi + done +fi + +# If we have jq and the index file, do a direct search +if command -v jq &>/dev/null && [[ -n "$SDK_DOCS_INDEX" && -f "$SDK_DOCS_INDEX" ]]; then + echo "Searching SDK docs for: $QUERY" + echo "---" + + QUERY_LOWER=$(echo "$QUERY" | tr '[:upper:]' '[:lower:]') + + # Search methods + if [[ -z "$CATEGORY" || "$CATEGORY" == "methods" ]]; then + echo "" + echo "## Methods" + jq -r --arg q "$QUERY_LOWER" --arg svc "$SERVICE" ' + .services | to_entries[] | + select($svc == "" or .key == $svc) | + .key as $service | + .value.methods // {} | to_entries[] | + select( + (.key | ascii_downcase | contains($q)) or + (.value.description // "" | ascii_downcase | contains($q)) + ) | + "- \($service).\(.key): \(.value.description // "No description")[signature: \(.value.signature // "N/A")]" + ' "$SDK_DOCS_INDEX" 2>/dev/null | head -n "$LIMIT" || echo " (no matches)" + fi + + # Search types + if [[ -z "$CATEGORY" || "$CATEGORY" == "types" ]]; then + echo "" + echo "## Types" + jq -r --arg q "$QUERY_LOWER" ' + .types // {} | to_entries[] | + select( + (.key | ascii_downcase | contains($q)) or + (.value.description // "" | ascii_downcase | contains($q)) + ) | + "- \(.key): \(.value.description // "No description")" + ' "$SDK_DOCS_INDEX" 2>/dev/null | head -n "$LIMIT" || echo " (no matches)" + fi + + # Search enums + if [[ -z "$CATEGORY" || "$CATEGORY" == "enums" ]]; then + echo "" + echo "## Enums" + jq -r --arg q "$QUERY_LOWER" ' + .enums // {} | to_entries[] | + select( + (.key | ascii_downcase | contains($q)) or + (.value.description // "" | ascii_downcase | contains($q)) + ) | + "- \(.key): \(.value.values // [] | join(", "))" + ' "$SDK_DOCS_INDEX" 2>/dev/null | head -n "$LIMIT" || echo " (no matches)" + fi +else + # Fallback: show how to use the MCP tool + echo "SDK docs index not found locally. Use the MCP tool instead:" + echo "" + echo "databricks_query_sdk_docs with input:" + build_json_input + echo "" + echo "Or set SDK_DOCS_INDEX environment variable to point to sdk_docs_index.json" +fi