Databricks v1.56.0, Nov 12 24

Databricks v1.56.0 published on Tuesday, Nov 12, 2024 by Pulumi

databricks.ModelServing

Explore with Pulumi AI

Databricks v1.56.0 published on Tuesday, Nov 12, 2024 by Pulumi

Example Usage

import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";

const _this = new databricks.ModelServing("this", {
    name: "ads-serving-endpoint",
    config: {
        servedEntities: [
            {
                name: "prod_model",
                entityName: "ads-model",
                entityVersion: "2",
                workloadSize: "Small",
                scaleToZeroEnabled: true,
            },
            {
                name: "candidate_model",
                entityName: "ads-model",
                entityVersion: "4",
                workloadSize: "Small",
                scaleToZeroEnabled: false,
            },
        ],
        trafficConfig: {
            routes: [
                {
                    servedModelName: "prod_model",
                    trafficPercentage: 90,
                },
                {
                    servedModelName: "candidate_model",
                    trafficPercentage: 10,
                },
            ],
        },
    },
});

import pulumi
import pulumi_databricks as databricks

this = databricks.ModelServing("this",
    name="ads-serving-endpoint",
    config={
        "served_entities": [
            {
                "name": "prod_model",
                "entity_name": "ads-model",
                "entity_version": "2",
                "workload_size": "Small",
                "scale_to_zero_enabled": True,
            },
            {
                "name": "candidate_model",
                "entity_name": "ads-model",
                "entity_version": "4",
                "workload_size": "Small",
                "scale_to_zero_enabled": False,
            },
        ],
        "traffic_config": {
            "routes": [
                {
                    "served_model_name": "prod_model",
                    "traffic_percentage": 90,
                },
                {
                    "served_model_name": "candidate_model",
                    "traffic_percentage": 10,
                },
            ],
        },
    })

package main

import (
	"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := databricks.NewModelServing(ctx, "this", &databricks.ModelServingArgs{
			Name: pulumi.String("ads-serving-endpoint"),
			Config: &databricks.ModelServingConfigArgs{
				ServedEntities: databricks.ModelServingConfigServedEntityArray{
					&databricks.ModelServingConfigServedEntityArgs{
						Name:               pulumi.String("prod_model"),
						EntityName:         pulumi.String("ads-model"),
						EntityVersion:      pulumi.String("2"),
						WorkloadSize:       pulumi.String("Small"),
						ScaleToZeroEnabled: pulumi.Bool(true),
					},
					&databricks.ModelServingConfigServedEntityArgs{
						Name:               pulumi.String("candidate_model"),
						EntityName:         pulumi.String("ads-model"),
						EntityVersion:      pulumi.String("4"),
						WorkloadSize:       pulumi.String("Small"),
						ScaleToZeroEnabled: pulumi.Bool(false),
					},
				},
				TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
					Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
						&databricks.ModelServingConfigTrafficConfigRouteArgs{
							ServedModelName:   pulumi.String("prod_model"),
							TrafficPercentage: pulumi.Int(90),
						},
						&databricks.ModelServingConfigTrafficConfigRouteArgs{
							ServedModelName:   pulumi.String("candidate_model"),
							TrafficPercentage: pulumi.Int(10),
						},
					},
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;

return await Deployment.RunAsync(() => 
{
    var @this = new Databricks.ModelServing("this", new()
    {
        Name = "ads-serving-endpoint",
        Config = new Databricks.Inputs.ModelServingConfigArgs
        {
            ServedEntities = new[]
            {
                new Databricks.Inputs.ModelServingConfigServedEntityArgs
                {
                    Name = "prod_model",
                    EntityName = "ads-model",
                    EntityVersion = "2",
                    WorkloadSize = "Small",
                    ScaleToZeroEnabled = true,
                },
                new Databricks.Inputs.ModelServingConfigServedEntityArgs
                {
                    Name = "candidate_model",
                    EntityName = "ads-model",
                    EntityVersion = "4",
                    WorkloadSize = "Small",
                    ScaleToZeroEnabled = false,
                },
            },
            TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
            {
                Routes = new[]
                {
                    new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                    {
                        ServedModelName = "prod_model",
                        TrafficPercentage = 90,
                    },
                    new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                    {
                        ServedModelName = "candidate_model",
                        TrafficPercentage = 10,
                    },
                },
            },
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.ModelServing;
import com.pulumi.databricks.ModelServingArgs;
import com.pulumi.databricks.inputs.ModelServingConfigArgs;
import com.pulumi.databricks.inputs.ModelServingConfigTrafficConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var this_ = new ModelServing("this", ModelServingArgs.builder()
            .name("ads-serving-endpoint")
            .config(ModelServingConfigArgs.builder()
                .servedEntities(                
                    ModelServingConfigServedEntityArgs.builder()
                        .name("prod_model")
                        .entityName("ads-model")
                        .entityVersion("2")
                        .workloadSize("Small")
                        .scaleToZeroEnabled(true)
                        .build(),
                    ModelServingConfigServedEntityArgs.builder()
                        .name("candidate_model")
                        .entityName("ads-model")
                        .entityVersion("4")
                        .workloadSize("Small")
                        .scaleToZeroEnabled(false)
                        .build())
                .trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
                    .routes(                    
                        ModelServingConfigTrafficConfigRouteArgs.builder()
                            .servedModelName("prod_model")
                            .trafficPercentage(90)
                            .build(),
                        ModelServingConfigTrafficConfigRouteArgs.builder()
                            .servedModelName("candidate_model")
                            .trafficPercentage(10)
                            .build())
                    .build())
                .build())
            .build());

    }
}

resources:
  this:
    type: databricks:ModelServing
    properties:
      name: ads-serving-endpoint
      config:
        servedEntities:
          - name: prod_model
            entityName: ads-model
            entityVersion: '2'
            workloadSize: Small
            scaleToZeroEnabled: true
          - name: candidate_model
            entityName: ads-model
            entityVersion: '4'
            workloadSize: Small
            scaleToZeroEnabled: false
        trafficConfig:
          routes:
            - servedModelName: prod_model
              trafficPercentage: 90
            - servedModelName: candidate_model
              trafficPercentage: 10

Access Control

databricks.Permissions can control which groups or individual users can Manage, Query or View individual serving endpoints.

The following resources are often used in the same context:

databricks.RegisteredModel to create Models in Unity Catalog in Databricks.
End to end workspace management guide.
databricks.Directory to manage directories in Databricks Workspace.
databricks.MlflowModel to create models in the workspace model registry in Databricks.
databricks.Notebook to manage Databricks Notebooks.
databricks.Notebook data to export a notebook from Databricks Workspace.
databricks.Repo to manage Databricks Repos.

Create ModelServing Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

new ModelServing(name: string, args: ModelServingArgs, opts?: CustomResourceOptions);

@overload
def ModelServing(resource_name: str,
                 args: ModelServingArgs,
                 opts: Optional[ResourceOptions] = None)

@overload
def ModelServing(resource_name: str,
                 opts: Optional[ResourceOptions] = None,
                 config: Optional[ModelServingConfigArgs] = None,
                 ai_gateway: Optional[ModelServingAiGatewayArgs] = None,
                 name: Optional[str] = None,
                 rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
                 route_optimized: Optional[bool] = None,
                 tags: Optional[Sequence[ModelServingTagArgs]] = None)

func NewModelServing(ctx *Context, name string, args ModelServingArgs, opts ...ResourceOption) (*ModelServing, error)

public ModelServing(string name, ModelServingArgs args, CustomResourceOptions? opts = null)

public ModelServing(String name, ModelServingArgs args)
public ModelServing(String name, ModelServingArgs args, CustomResourceOptions options)

type: databricks:ModelServing
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args ModelServingArgs: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

Constructor example

The following reference example uses placeholder values for all input properties.

var modelServingResource = new Databricks.ModelServing("modelServingResource", new()
{
    Config = new Databricks.Inputs.ModelServingConfigArgs
    {
        AutoCaptureConfig = new Databricks.Inputs.ModelServingConfigAutoCaptureConfigArgs
        {
            CatalogName = "string",
            Enabled = false,
            SchemaName = "string",
            TableNamePrefix = "string",
        },
        ServedEntities = new[]
        {
            new Databricks.Inputs.ModelServingConfigServedEntityArgs
            {
                EntityName = "string",
                EntityVersion = "string",
                EnvironmentVars = 
                {
                    { "string", "string" },
                },
                ExternalModel = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelArgs
                {
                    Name = "string",
                    Provider = "string",
                    Task = "string",
                    Ai21labsConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs
                    {
                        Ai21labsApiKey = "string",
                        Ai21labsApiKeyPlaintext = "string",
                    },
                    AmazonBedrockConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs
                    {
                        AwsRegion = "string",
                        BedrockProvider = "string",
                        AwsAccessKeyId = "string",
                        AwsAccessKeyIdPlaintext = "string",
                        AwsSecretAccessKey = "string",
                        AwsSecretAccessKeyPlaintext = "string",
                    },
                    AnthropicConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs
                    {
                        AnthropicApiKey = "string",
                        AnthropicApiKeyPlaintext = "string",
                    },
                    CohereConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelCohereConfigArgs
                    {
                        CohereApiBase = "string",
                        CohereApiKey = "string",
                        CohereApiKeyPlaintext = "string",
                    },
                    DatabricksModelServingConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs
                    {
                        DatabricksWorkspaceUrl = "string",
                        DatabricksApiToken = "string",
                        DatabricksApiTokenPlaintext = "string",
                    },
                    GoogleCloudVertexAiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs
                    {
                        PrivateKey = "string",
                        PrivateKeyPlaintext = "string",
                        ProjectId = "string",
                        Region = "string",
                    },
                    OpenaiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs
                    {
                        MicrosoftEntraClientId = "string",
                        MicrosoftEntraClientSecret = "string",
                        MicrosoftEntraClientSecretPlaintext = "string",
                        MicrosoftEntraTenantId = "string",
                        OpenaiApiBase = "string",
                        OpenaiApiKey = "string",
                        OpenaiApiKeyPlaintext = "string",
                        OpenaiApiType = "string",
                        OpenaiApiVersion = "string",
                        OpenaiDeploymentName = "string",
                        OpenaiOrganization = "string",
                    },
                    PalmConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelPalmConfigArgs
                    {
                        PalmApiKey = "string",
                        PalmApiKeyPlaintext = "string",
                    },
                },
                InstanceProfileArn = "string",
                MaxProvisionedThroughput = 0,
                MinProvisionedThroughput = 0,
                Name = "string",
                ScaleToZeroEnabled = false,
                WorkloadSize = "string",
                WorkloadType = "string",
            },
        },
        TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
        {
            Routes = new[]
            {
                new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
                {
                    ServedModelName = "string",
                    TrafficPercentage = 0,
                },
            },
        },
    },
    AiGateway = new Databricks.Inputs.ModelServingAiGatewayArgs
    {
        Guardrails = new Databricks.Inputs.ModelServingAiGatewayGuardrailsArgs
        {
            Input = new Databricks.Inputs.ModelServingAiGatewayGuardrailsInputArgs
            {
                InvalidKeywords = new[]
                {
                    "string",
                },
                Pii = new Databricks.Inputs.ModelServingAiGatewayGuardrailsInputPiiArgs
                {
                    Behavior = "string",
                },
                Safety = false,
                ValidTopics = new[]
                {
                    "string",
                },
            },
            Output = new Databricks.Inputs.ModelServingAiGatewayGuardrailsOutputArgs
            {
                InvalidKeywords = new[]
                {
                    "string",
                },
                Pii = new Databricks.Inputs.ModelServingAiGatewayGuardrailsOutputPiiArgs
                {
                    Behavior = "string",
                },
                Safety = false,
                ValidTopics = new[]
                {
                    "string",
                },
            },
        },
        InferenceTableConfig = new Databricks.Inputs.ModelServingAiGatewayInferenceTableConfigArgs
        {
            CatalogName = "string",
            Enabled = false,
            SchemaName = "string",
            TableNamePrefix = "string",
        },
        RateLimits = new[]
        {
            new Databricks.Inputs.ModelServingAiGatewayRateLimitArgs
            {
                Calls = 0,
                RenewalPeriod = "string",
                Key = "string",
            },
        },
        UsageTrackingConfig = new Databricks.Inputs.ModelServingAiGatewayUsageTrackingConfigArgs
        {
            Enabled = false,
        },
    },
    Name = "string",
    RateLimits = new[]
    {
        new Databricks.Inputs.ModelServingRateLimitArgs
        {
            Calls = 0,
            RenewalPeriod = "string",
            Key = "string",
        },
    },
    RouteOptimized = false,
    Tags = new[]
    {
        new Databricks.Inputs.ModelServingTagArgs
        {
            Key = "string",
            Value = "string",
        },
    },
});

example, err := databricks.NewModelServing(ctx, "modelServingResource", &databricks.ModelServingArgs{
	Config: &databricks.ModelServingConfigArgs{
		AutoCaptureConfig: &databricks.ModelServingConfigAutoCaptureConfigArgs{
			CatalogName:     pulumi.String("string"),
			Enabled:         pulumi.Bool(false),
			SchemaName:      pulumi.String("string"),
			TableNamePrefix: pulumi.String("string"),
		},
		ServedEntities: databricks.ModelServingConfigServedEntityArray{
			&databricks.ModelServingConfigServedEntityArgs{
				EntityName:    pulumi.String("string"),
				EntityVersion: pulumi.String("string"),
				EnvironmentVars: pulumi.StringMap{
					"string": pulumi.String("string"),
				},
				ExternalModel: &databricks.ModelServingConfigServedEntityExternalModelArgs{
					Name:     pulumi.String("string"),
					Provider: pulumi.String("string"),
					Task:     pulumi.String("string"),
					Ai21labsConfig: &databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs{
						Ai21labsApiKey:          pulumi.String("string"),
						Ai21labsApiKeyPlaintext: pulumi.String("string"),
					},
					AmazonBedrockConfig: &databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs{
						AwsRegion:                   pulumi.String("string"),
						BedrockProvider:             pulumi.String("string"),
						AwsAccessKeyId:              pulumi.String("string"),
						AwsAccessKeyIdPlaintext:     pulumi.String("string"),
						AwsSecretAccessKey:          pulumi.String("string"),
						AwsSecretAccessKeyPlaintext: pulumi.String("string"),
					},
					AnthropicConfig: &databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs{
						AnthropicApiKey:          pulumi.String("string"),
						AnthropicApiKeyPlaintext: pulumi.String("string"),
					},
					CohereConfig: &databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs{
						CohereApiBase:         pulumi.String("string"),
						CohereApiKey:          pulumi.String("string"),
						CohereApiKeyPlaintext: pulumi.String("string"),
					},
					DatabricksModelServingConfig: &databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs{
						DatabricksWorkspaceUrl:      pulumi.String("string"),
						DatabricksApiToken:          pulumi.String("string"),
						DatabricksApiTokenPlaintext: pulumi.String("string"),
					},
					GoogleCloudVertexAiConfig: &databricks.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs{
						PrivateKey:          pulumi.String("string"),
						PrivateKeyPlaintext: pulumi.String("string"),
						ProjectId:           pulumi.String("string"),
						Region:              pulumi.String("string"),
					},
					OpenaiConfig: &databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs{
						MicrosoftEntraClientId:              pulumi.String("string"),
						MicrosoftEntraClientSecret:          pulumi.String("string"),
						MicrosoftEntraClientSecretPlaintext: pulumi.String("string"),
						MicrosoftEntraTenantId:              pulumi.String("string"),
						OpenaiApiBase:                       pulumi.String("string"),
						OpenaiApiKey:                        pulumi.String("string"),
						OpenaiApiKeyPlaintext:               pulumi.String("string"),
						OpenaiApiType:                       pulumi.String("string"),
						OpenaiApiVersion:                    pulumi.String("string"),
						OpenaiDeploymentName:                pulumi.String("string"),
						OpenaiOrganization:                  pulumi.String("string"),
					},
					PalmConfig: &databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs{
						PalmApiKey:          pulumi.String("string"),
						PalmApiKeyPlaintext: pulumi.String("string"),
					},
				},
				InstanceProfileArn:       pulumi.String("string"),
				MaxProvisionedThroughput: pulumi.Int(0),
				MinProvisionedThroughput: pulumi.Int(0),
				Name:                     pulumi.String("string"),
				ScaleToZeroEnabled:       pulumi.Bool(false),
				WorkloadSize:             pulumi.String("string"),
				WorkloadType:             pulumi.String("string"),
			},
		},
		TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
			Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
				&databricks.ModelServingConfigTrafficConfigRouteArgs{
					ServedModelName:   pulumi.String("string"),
					TrafficPercentage: pulumi.Int(0),
				},
			},
		},
	},
	AiGateway: &databricks.ModelServingAiGatewayArgs{
		Guardrails: &databricks.ModelServingAiGatewayGuardrailsArgs{
			Input: &databricks.ModelServingAiGatewayGuardrailsInputTypeArgs{
				InvalidKeywords: pulumi.StringArray{
					pulumi.String("string"),
				},
				Pii: &databricks.ModelServingAiGatewayGuardrailsInputPiiArgs{
					Behavior: pulumi.String("string"),
				},
				Safety: pulumi.Bool(false),
				ValidTopics: pulumi.StringArray{
					pulumi.String("string"),
				},
			},
			Output: &databricks.ModelServingAiGatewayGuardrailsOutputTypeArgs{
				InvalidKeywords: pulumi.StringArray{
					pulumi.String("string"),
				},
				Pii: &databricks.ModelServingAiGatewayGuardrailsOutputPiiArgs{
					Behavior: pulumi.String("string"),
				},
				Safety: pulumi.Bool(false),
				ValidTopics: pulumi.StringArray{
					pulumi.String("string"),
				},
			},
		},
		InferenceTableConfig: &databricks.ModelServingAiGatewayInferenceTableConfigArgs{
			CatalogName:     pulumi.String("string"),
			Enabled:         pulumi.Bool(false),
			SchemaName:      pulumi.String("string"),
			TableNamePrefix: pulumi.String("string"),
		},
		RateLimits: databricks.ModelServingAiGatewayRateLimitArray{
			&databricks.ModelServingAiGatewayRateLimitArgs{
				Calls:         pulumi.Int(0),
				RenewalPeriod: pulumi.String("string"),
				Key:           pulumi.String("string"),
			},
		},
		UsageTrackingConfig: &databricks.ModelServingAiGatewayUsageTrackingConfigArgs{
			Enabled: pulumi.Bool(false),
		},
	},
	Name: pulumi.String("string"),
	RateLimits: databricks.ModelServingRateLimitArray{
		&databricks.ModelServingRateLimitArgs{
			Calls:         pulumi.Int(0),
			RenewalPeriod: pulumi.String("string"),
			Key:           pulumi.String("string"),
		},
	},
	RouteOptimized: pulumi.Bool(false),
	Tags: databricks.ModelServingTagArray{
		&databricks.ModelServingTagArgs{
			Key:   pulumi.String("string"),
			Value: pulumi.String("string"),
		},
	},
})

var modelServingResource = new ModelServing("modelServingResource", ModelServingArgs.builder()
    .config(ModelServingConfigArgs.builder()
        .autoCaptureConfig(ModelServingConfigAutoCaptureConfigArgs.builder()
            .catalogName("string")
            .enabled(false)
            .schemaName("string")
            .tableNamePrefix("string")
            .build())
        .servedEntities(ModelServingConfigServedEntityArgs.builder()
            .entityName("string")
            .entityVersion("string")
            .environmentVars(Map.of("string", "string"))
            .externalModel(ModelServingConfigServedEntityExternalModelArgs.builder()
                .name("string")
                .provider("string")
                .task("string")
                .ai21labsConfig(ModelServingConfigServedEntityExternalModelAi21labsConfigArgs.builder()
                    .ai21labsApiKey("string")
                    .ai21labsApiKeyPlaintext("string")
                    .build())
                .amazonBedrockConfig(ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs.builder()
                    .awsRegion("string")
                    .bedrockProvider("string")
                    .awsAccessKeyId("string")
                    .awsAccessKeyIdPlaintext("string")
                    .awsSecretAccessKey("string")
                    .awsSecretAccessKeyPlaintext("string")
                    .build())
                .anthropicConfig(ModelServingConfigServedEntityExternalModelAnthropicConfigArgs.builder()
                    .anthropicApiKey("string")
                    .anthropicApiKeyPlaintext("string")
                    .build())
                .cohereConfig(ModelServingConfigServedEntityExternalModelCohereConfigArgs.builder()
                    .cohereApiBase("string")
                    .cohereApiKey("string")
                    .cohereApiKeyPlaintext("string")
                    .build())
                .databricksModelServingConfig(ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs.builder()
                    .databricksWorkspaceUrl("string")
                    .databricksApiToken("string")
                    .databricksApiTokenPlaintext("string")
                    .build())
                .googleCloudVertexAiConfig(ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs.builder()
                    .privateKey("string")
                    .privateKeyPlaintext("string")
                    .projectId("string")
                    .region("string")
                    .build())
                .openaiConfig(ModelServingConfigServedEntityExternalModelOpenaiConfigArgs.builder()
                    .microsoftEntraClientId("string")
                    .microsoftEntraClientSecret("string")
                    .microsoftEntraClientSecretPlaintext("string")
                    .microsoftEntraTenantId("string")
                    .openaiApiBase("string")
                    .openaiApiKey("string")
                    .openaiApiKeyPlaintext("string")
                    .openaiApiType("string")
                    .openaiApiVersion("string")
                    .openaiDeploymentName("string")
                    .openaiOrganization("string")
                    .build())
                .palmConfig(ModelServingConfigServedEntityExternalModelPalmConfigArgs.builder()
                    .palmApiKey("string")
                    .palmApiKeyPlaintext("string")
                    .build())
                .build())
            .instanceProfileArn("string")
            .maxProvisionedThroughput(0)
            .minProvisionedThroughput(0)
            .name("string")
            .scaleToZeroEnabled(false)
            .workloadSize("string")
            .workloadType("string")
            .build())
        .trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
            .routes(ModelServingConfigTrafficConfigRouteArgs.builder()
                .servedModelName("string")
                .trafficPercentage(0)
                .build())
            .build())
        .build())
    .aiGateway(ModelServingAiGatewayArgs.builder()
        .guardrails(ModelServingAiGatewayGuardrailsArgs.builder()
            .input(ModelServingAiGatewayGuardrailsInputArgs.builder()
                .invalidKeywords("string")
                .pii(ModelServingAiGatewayGuardrailsInputPiiArgs.builder()
                    .behavior("string")
                    .build())
                .safety(false)
                .validTopics("string")
                .build())
            .output(ModelServingAiGatewayGuardrailsOutputArgs.builder()
                .invalidKeywords("string")
                .pii(ModelServingAiGatewayGuardrailsOutputPiiArgs.builder()
                    .behavior("string")
                    .build())
                .safety(false)
                .validTopics("string")
                .build())
            .build())
        .inferenceTableConfig(ModelServingAiGatewayInferenceTableConfigArgs.builder()
            .catalogName("string")
            .enabled(false)
            .schemaName("string")
            .tableNamePrefix("string")
            .build())
        .rateLimits(ModelServingAiGatewayRateLimitArgs.builder()
            .calls(0)
            .renewalPeriod("string")
            .key("string")
            .build())
        .usageTrackingConfig(ModelServingAiGatewayUsageTrackingConfigArgs.builder()
            .enabled(false)
            .build())
        .build())
    .name("string")
    .rateLimits(ModelServingRateLimitArgs.builder()
        .calls(0)
        .renewalPeriod("string")
        .key("string")
        .build())
    .routeOptimized(false)
    .tags(ModelServingTagArgs.builder()
        .key("string")
        .value("string")
        .build())
    .build());

model_serving_resource = databricks.ModelServing("modelServingResource",
    config={
        "auto_capture_config": {
            "catalog_name": "string",
            "enabled": False,
            "schema_name": "string",
            "table_name_prefix": "string",
        },
        "served_entities": [{
            "entity_name": "string",
            "entity_version": "string",
            "environment_vars": {
                "string": "string",
            },
            "external_model": {
                "name": "string",
                "provider": "string",
                "task": "string",
                "ai21labs_config": {
                    "ai21labs_api_key": "string",
                    "ai21labs_api_key_plaintext": "string",
                },
                "amazon_bedrock_config": {
                    "aws_region": "string",
                    "bedrock_provider": "string",
                    "aws_access_key_id": "string",
                    "aws_access_key_id_plaintext": "string",
                    "aws_secret_access_key": "string",
                    "aws_secret_access_key_plaintext": "string",
                },
                "anthropic_config": {
                    "anthropic_api_key": "string",
                    "anthropic_api_key_plaintext": "string",
                },
                "cohere_config": {
                    "cohere_api_base": "string",
                    "cohere_api_key": "string",
                    "cohere_api_key_plaintext": "string",
                },
                "databricks_model_serving_config": {
                    "databricks_workspace_url": "string",
                    "databricks_api_token": "string",
                    "databricks_api_token_plaintext": "string",
                },
                "google_cloud_vertex_ai_config": {
                    "private_key": "string",
                    "private_key_plaintext": "string",
                    "project_id": "string",
                    "region": "string",
                },
                "openai_config": {
                    "microsoft_entra_client_id": "string",
                    "microsoft_entra_client_secret": "string",
                    "microsoft_entra_client_secret_plaintext": "string",
                    "microsoft_entra_tenant_id": "string",
                    "openai_api_base": "string",
                    "openai_api_key": "string",
                    "openai_api_key_plaintext": "string",
                    "openai_api_type": "string",
                    "openai_api_version": "string",
                    "openai_deployment_name": "string",
                    "openai_organization": "string",
                },
                "palm_config": {
                    "palm_api_key": "string",
                    "palm_api_key_plaintext": "string",
                },
            },
            "instance_profile_arn": "string",
            "max_provisioned_throughput": 0,
            "min_provisioned_throughput": 0,
            "name": "string",
            "scale_to_zero_enabled": False,
            "workload_size": "string",
            "workload_type": "string",
        }],
        "traffic_config": {
            "routes": [{
                "served_model_name": "string",
                "traffic_percentage": 0,
            }],
        },
    },
    ai_gateway={
        "guardrails": {
            "input": {
                "invalid_keywords": ["string"],
                "pii": {
                    "behavior": "string",
                },
                "safety": False,
                "valid_topics": ["string"],
            },
            "output": {
                "invalid_keywords": ["string"],
                "pii": {
                    "behavior": "string",
                },
                "safety": False,
                "valid_topics": ["string"],
            },
        },
        "inference_table_config": {
            "catalog_name": "string",
            "enabled": False,
            "schema_name": "string",
            "table_name_prefix": "string",
        },
        "rate_limits": [{
            "calls": 0,
            "renewal_period": "string",
            "key": "string",
        }],
        "usage_tracking_config": {
            "enabled": False,
        },
    },
    name="string",
    rate_limits=[{
        "calls": 0,
        "renewal_period": "string",
        "key": "string",
    }],
    route_optimized=False,
    tags=[{
        "key": "string",
        "value": "string",
    }])

const modelServingResource = new databricks.ModelServing("modelServingResource", {
    config: {
        autoCaptureConfig: {
            catalogName: "string",
            enabled: false,
            schemaName: "string",
            tableNamePrefix: "string",
        },
        servedEntities: [{
            entityName: "string",
            entityVersion: "string",
            environmentVars: {
                string: "string",
            },
            externalModel: {
                name: "string",
                provider: "string",
                task: "string",
                ai21labsConfig: {
                    ai21labsApiKey: "string",
                    ai21labsApiKeyPlaintext: "string",
                },
                amazonBedrockConfig: {
                    awsRegion: "string",
                    bedrockProvider: "string",
                    awsAccessKeyId: "string",
                    awsAccessKeyIdPlaintext: "string",
                    awsSecretAccessKey: "string",
                    awsSecretAccessKeyPlaintext: "string",
                },
                anthropicConfig: {
                    anthropicApiKey: "string",
                    anthropicApiKeyPlaintext: "string",
                },
                cohereConfig: {
                    cohereApiBase: "string",
                    cohereApiKey: "string",
                    cohereApiKeyPlaintext: "string",
                },
                databricksModelServingConfig: {
                    databricksWorkspaceUrl: "string",
                    databricksApiToken: "string",
                    databricksApiTokenPlaintext: "string",
                },
                googleCloudVertexAiConfig: {
                    privateKey: "string",
                    privateKeyPlaintext: "string",
                    projectId: "string",
                    region: "string",
                },
                openaiConfig: {
                    microsoftEntraClientId: "string",
                    microsoftEntraClientSecret: "string",
                    microsoftEntraClientSecretPlaintext: "string",
                    microsoftEntraTenantId: "string",
                    openaiApiBase: "string",
                    openaiApiKey: "string",
                    openaiApiKeyPlaintext: "string",
                    openaiApiType: "string",
                    openaiApiVersion: "string",
                    openaiDeploymentName: "string",
                    openaiOrganization: "string",
                },
                palmConfig: {
                    palmApiKey: "string",
                    palmApiKeyPlaintext: "string",
                },
            },
            instanceProfileArn: "string",
            maxProvisionedThroughput: 0,
            minProvisionedThroughput: 0,
            name: "string",
            scaleToZeroEnabled: false,
            workloadSize: "string",
            workloadType: "string",
        }],
        trafficConfig: {
            routes: [{
                servedModelName: "string",
                trafficPercentage: 0,
            }],
        },
    },
    aiGateway: {
        guardrails: {
            input: {
                invalidKeywords: ["string"],
                pii: {
                    behavior: "string",
                },
                safety: false,
                validTopics: ["string"],
            },
            output: {
                invalidKeywords: ["string"],
                pii: {
                    behavior: "string",
                },
                safety: false,
                validTopics: ["string"],
            },
        },
        inferenceTableConfig: {
            catalogName: "string",
            enabled: false,
            schemaName: "string",
            tableNamePrefix: "string",
        },
        rateLimits: [{
            calls: 0,
            renewalPeriod: "string",
            key: "string",
        }],
        usageTrackingConfig: {
            enabled: false,
        },
    },
    name: "string",
    rateLimits: [{
        calls: 0,
        renewalPeriod: "string",
        key: "string",
    }],
    routeOptimized: false,
    tags: [{
        key: "string",
        value: "string",
    }],
});

type: databricks:ModelServing
properties:
    aiGateway:
        guardrails:
            input:
                invalidKeywords:
                    - string
                pii:
                    behavior: string
                safety: false
                validTopics:
                    - string
            output:
                invalidKeywords:
                    - string
                pii:
                    behavior: string
                safety: false
                validTopics:
                    - string
        inferenceTableConfig:
            catalogName: string
            enabled: false
            schemaName: string
            tableNamePrefix: string
        rateLimits:
            - calls: 0
              key: string
              renewalPeriod: string
        usageTrackingConfig:
            enabled: false
    config:
        autoCaptureConfig:
            catalogName: string
            enabled: false
            schemaName: string
            tableNamePrefix: string
        servedEntities:
            - entityName: string
              entityVersion: string
              environmentVars:
                string: string
              externalModel:
                ai21labsConfig:
                    ai21labsApiKey: string
                    ai21labsApiKeyPlaintext: string
                amazonBedrockConfig:
                    awsAccessKeyId: string
                    awsAccessKeyIdPlaintext: string
                    awsRegion: string
                    awsSecretAccessKey: string
                    awsSecretAccessKeyPlaintext: string
                    bedrockProvider: string
                anthropicConfig:
                    anthropicApiKey: string
                    anthropicApiKeyPlaintext: string
                cohereConfig:
                    cohereApiBase: string
                    cohereApiKey: string
                    cohereApiKeyPlaintext: string
                databricksModelServingConfig:
                    databricksApiToken: string
                    databricksApiTokenPlaintext: string
                    databricksWorkspaceUrl: string
                googleCloudVertexAiConfig:
                    privateKey: string
                    privateKeyPlaintext: string
                    projectId: string
                    region: string
                name: string
                openaiConfig:
                    microsoftEntraClientId: string
                    microsoftEntraClientSecret: string
                    microsoftEntraClientSecretPlaintext: string
                    microsoftEntraTenantId: string
                    openaiApiBase: string
                    openaiApiKey: string
                    openaiApiKeyPlaintext: string
                    openaiApiType: string
                    openaiApiVersion: string
                    openaiDeploymentName: string
                    openaiOrganization: string
                palmConfig:
                    palmApiKey: string
                    palmApiKeyPlaintext: string
                provider: string
                task: string
              instanceProfileArn: string
              maxProvisionedThroughput: 0
              minProvisionedThroughput: 0
              name: string
              scaleToZeroEnabled: false
              workloadSize: string
              workloadType: string
        trafficConfig:
            routes:
                - servedModelName: string
                  trafficPercentage: 0
    name: string
    rateLimits:
        - calls: 0
          key: string
          renewalPeriod: string
    routeOptimized: false
    tags:
        - key: string
          value: string

ModelServing Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

The ModelServing resource accepts the following input properties:

Config ModelServingConfig: The model serving endpoint configuration.
AiGateway ModelServingAiGateway: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
Name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
RateLimits List<ModelServingRateLimit>: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
RouteOptimized bool: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
Tags List<ModelServingTag>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

Config ModelServingConfigArgs: The model serving endpoint configuration.
AiGateway ModelServingAiGatewayArgs: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
Name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
RateLimits []ModelServingRateLimitArgs: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
RouteOptimized bool: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
Tags []ModelServingTagArgs: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config ModelServingConfig: The model serving endpoint configuration.
aiGateway ModelServingAiGateway: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
name String: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
rateLimits List<ModelServingRateLimit>: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
routeOptimized Boolean: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
tags List<ModelServingTag>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config ModelServingConfig: The model serving endpoint configuration.
aiGateway ModelServingAiGateway: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
rateLimits ModelServingRateLimit[]: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
routeOptimized boolean: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
tags ModelServingTag[]: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config ModelServingConfigArgs: The model serving endpoint configuration.
ai_gateway ModelServingAiGatewayArgs: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
name str: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
rate_limits Sequence[ModelServingRateLimitArgs]: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
route_optimized bool: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
tags Sequence[ModelServingTagArgs]: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

config Property Map: The model serving endpoint configuration.
aiGateway Property Map: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
name String: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
rateLimits List<Property Map>: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
routeOptimized Boolean: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
tags List<Property Map>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

Outputs

All input properties are implicitly available as output properties. Additionally, the ModelServing resource produces the following output properties:

Id string: The provider-assigned unique ID for this managed resource.
ServingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

Id string: The provider-assigned unique ID for this managed resource.
ServingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

id String: The provider-assigned unique ID for this managed resource.
servingEndpointId String: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

id string: The provider-assigned unique ID for this managed resource.
servingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

id str: The provider-assigned unique ID for this managed resource.
serving_endpoint_id str: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

id String: The provider-assigned unique ID for this managed resource.
servingEndpointId String: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.

Look up Existing ModelServing Resource

Get an existing ModelServing resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

public static get(name: string, id: Input<ID>, state?: ModelServingState, opts?: CustomResourceOptions): ModelServing

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        ai_gateway: Optional[ModelServingAiGatewayArgs] = None,
        config: Optional[ModelServingConfigArgs] = None,
        name: Optional[str] = None,
        rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
        route_optimized: Optional[bool] = None,
        serving_endpoint_id: Optional[str] = None,
        tags: Optional[Sequence[ModelServingTagArgs]] = None) -> ModelServing

func GetModelServing(ctx *Context, name string, id IDInput, state *ModelServingState, opts ...ResourceOption) (*ModelServing, error)

public static ModelServing Get(string name, Input<string> id, ModelServingState? state, CustomResourceOptions? opts = null)

public static ModelServing get(String name, Output<String> id, ModelServingState state, CustomResourceOptions options)

Resource lookup is not supported in YAML

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

AiGateway ModelServingAiGateway: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
Config ModelServingConfig: The model serving endpoint configuration.
Name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
RateLimits List<ModelServingRateLimit>: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
RouteOptimized bool: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
ServingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
Tags List<ModelServingTag>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

AiGateway ModelServingAiGatewayArgs: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
Config ModelServingConfigArgs: The model serving endpoint configuration.
Name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
RateLimits []ModelServingRateLimitArgs: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
RouteOptimized bool: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
ServingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
Tags []ModelServingTagArgs: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

aiGateway ModelServingAiGateway: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
config ModelServingConfig: The model serving endpoint configuration.
name String: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
rateLimits List<ModelServingRateLimit>: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
routeOptimized Boolean: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
servingEndpointId String: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
tags List<ModelServingTag>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

aiGateway ModelServingAiGateway: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
config ModelServingConfig: The model serving endpoint configuration.
name string: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
rateLimits ModelServingRateLimit[]: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
routeOptimized boolean: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
servingEndpointId string: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
tags ModelServingTag[]: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

ai_gateway ModelServingAiGatewayArgs: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
config ModelServingConfigArgs: The model serving endpoint configuration.
name str: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
rate_limits Sequence[ModelServingRateLimitArgs]: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
route_optimized bool: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
serving_endpoint_id str: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
tags Sequence[ModelServingTagArgs]: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

aiGateway Property Map: A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
config Property Map: The model serving endpoint configuration.
name String: The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
rateLimits List<Property Map>: A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
routeOptimized Boolean: A boolean enabling route optimization for the endpoint. Note: only available for custom models.
servingEndpointId String: Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
tags List<Property Map>: Tags to be attached to the serving endpoint and automatically propagated to billing logs.

Supporting Types

ModelServingAiGateway, ModelServingAiGatewayArgs

Guardrails ModelServingAiGatewayGuardrails: Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
InferenceTableConfig ModelServingAiGatewayInferenceTableConfig: Block describing the configuration of usage tracking. Consists of the following attributes:
RateLimits List<ModelServingAiGatewayRateLimit>: Block describing rate limits for AI gateway. For details see the description of rate_limits block above.
UsageTrackingConfig ModelServingAiGatewayUsageTrackingConfig: Block with configuration for payload logging using inference tables. For details see the description of auto_capture_config block above.

Guardrails ModelServingAiGatewayGuardrails: Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
InferenceTableConfig ModelServingAiGatewayInferenceTableConfig: Block describing the configuration of usage tracking. Consists of the following attributes:
RateLimits []ModelServingAiGatewayRateLimit: Block describing rate limits for AI gateway. For details see the description of rate_limits block above.
UsageTrackingConfig ModelServingAiGatewayUsageTrackingConfig: Block with configuration for payload logging using inference tables. For details see the description of auto_capture_config block above.

guardrails ModelServingAiGatewayGuardrails: Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
inferenceTableConfig ModelServingAiGatewayInferenceTableConfig: Block describing the configuration of usage tracking. Consists of the following attributes:
rateLimits List<ModelServingAiGatewayRateLimit>: Block describing rate limits for AI gateway. For details see the description of rate_limits block above.
usageTrackingConfig ModelServingAiGatewayUsageTrackingConfig: Block with configuration for payload logging using inference tables. For details see the description of auto_capture_config block above.

guardrails ModelServingAiGatewayGuardrails: Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
inferenceTableConfig ModelServingAiGatewayInferenceTableConfig: Block describing the configuration of usage tracking. Consists of the following attributes:
rateLimits ModelServingAiGatewayRateLimit[]: Block describing rate limits for AI gateway. For details see the description of rate_limits block above.
usageTrackingConfig ModelServingAiGatewayUsageTrackingConfig: Block with configuration for payload logging using inference tables. For details see the description of auto_capture_config block above.

guardrails ModelServingAiGatewayGuardrails: Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
inference_table_config ModelServingAiGatewayInferenceTableConfig: Block describing the configuration of usage tracking. Consists of the following attributes:
rate_limits Sequence[ModelServingAiGatewayRateLimit]: Block describing rate limits for AI gateway. For details see the description of rate_limits block above.
usage_tracking_config ModelServingAiGatewayUsageTrackingConfig: Block with configuration for payload logging using inference tables. For details see the description of auto_capture_config block above.

guardrails Property Map: Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
inferenceTableConfig Property Map: Block describing the configuration of usage tracking. Consists of the following attributes:
rateLimits List<Property Map>: Block describing rate limits for AI gateway. For details see the description of rate_limits block above.
usageTrackingConfig Property Map: Block with configuration for payload logging using inference tables. For details see the description of auto_capture_config block above.

ModelServingAiGatewayGuardrails, ModelServingAiGatewayGuardrailsArgs

Input ModelServingAiGatewayGuardrailsInput: A block with configuration for input guardrail filters:
Output ModelServingAiGatewayGuardrailsOutput: A block with configuration for output guardrail filters. Has the same structure as input block.

Input ModelServingAiGatewayGuardrailsInputType: A block with configuration for input guardrail filters:
Output ModelServingAiGatewayGuardrailsOutputType: A block with configuration for output guardrail filters. Has the same structure as input block.

input ModelServingAiGatewayGuardrailsInput: A block with configuration for input guardrail filters:
output ModelServingAiGatewayGuardrailsOutput: A block with configuration for output guardrail filters. Has the same structure as input block.

input ModelServingAiGatewayGuardrailsInput: A block with configuration for input guardrail filters:
output ModelServingAiGatewayGuardrailsOutput: A block with configuration for output guardrail filters. Has the same structure as input block.

input ModelServingAiGatewayGuardrailsInput: A block with configuration for input guardrail filters:
output ModelServingAiGatewayGuardrailsOutput: A block with configuration for output guardrail filters. Has the same structure as input block.

input Property Map: A block with configuration for input guardrail filters:
output Property Map: A block with configuration for output guardrail filters. Has the same structure as input block.

ModelServingAiGatewayGuardrailsInput, ModelServingAiGatewayGuardrailsInputArgs

InvalidKeywords List<string>: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
Pii ModelServingAiGatewayGuardrailsInputPii: Block with configuration for guardrail PII filter:
Safety bool: the boolean flag that indicates whether the safety filter is enabled.
ValidTopics List<string>: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

InvalidKeywords []string: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
Pii ModelServingAiGatewayGuardrailsInputPii: Block with configuration for guardrail PII filter:
Safety bool: the boolean flag that indicates whether the safety filter is enabled.
ValidTopics []string: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

invalidKeywords List<String>: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
pii ModelServingAiGatewayGuardrailsInputPii: Block with configuration for guardrail PII filter:
safety Boolean: the boolean flag that indicates whether the safety filter is enabled.
validTopics List<String>: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

invalidKeywords string[]: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
pii ModelServingAiGatewayGuardrailsInputPii: Block with configuration for guardrail PII filter:
safety boolean: the boolean flag that indicates whether the safety filter is enabled.
validTopics string[]: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

invalid_keywords Sequence[str]: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
pii ModelServingAiGatewayGuardrailsInputPii: Block with configuration for guardrail PII filter:
safety bool: the boolean flag that indicates whether the safety filter is enabled.
valid_topics Sequence[str]: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

invalidKeywords List<String>: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
pii Property Map: Block with configuration for guardrail PII filter:
safety Boolean: the boolean flag that indicates whether the safety filter is enabled.
validTopics List<String>: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

ModelServingAiGatewayGuardrailsInputPii, ModelServingAiGatewayGuardrailsInputPiiArgs

Behavior string: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

Behavior string: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

behavior String: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

behavior string: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

behavior str: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

behavior String: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

ModelServingAiGatewayGuardrailsOutput, ModelServingAiGatewayGuardrailsOutputArgs

InvalidKeywords List<string>: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
Pii ModelServingAiGatewayGuardrailsOutputPii: Block with configuration for guardrail PII filter:
Safety bool: the boolean flag that indicates whether the safety filter is enabled.
ValidTopics List<string>: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

InvalidKeywords []string: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
Pii ModelServingAiGatewayGuardrailsOutputPii: Block with configuration for guardrail PII filter:
Safety bool: the boolean flag that indicates whether the safety filter is enabled.
ValidTopics []string: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

invalidKeywords List<String>: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
pii ModelServingAiGatewayGuardrailsOutputPii: Block with configuration for guardrail PII filter:
safety Boolean: the boolean flag that indicates whether the safety filter is enabled.
validTopics List<String>: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

invalidKeywords string[]: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
pii ModelServingAiGatewayGuardrailsOutputPii: Block with configuration for guardrail PII filter:
safety boolean: the boolean flag that indicates whether the safety filter is enabled.
validTopics string[]: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

invalid_keywords Sequence[str]: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
pii ModelServingAiGatewayGuardrailsOutputPii: Block with configuration for guardrail PII filter:
safety bool: the boolean flag that indicates whether the safety filter is enabled.
valid_topics Sequence[str]: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

invalidKeywords List<String>: List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
pii Property Map: Block with configuration for guardrail PII filter:
safety Boolean: the boolean flag that indicates whether the safety filter is enabled.
validTopics List<String>: The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.

ModelServingAiGatewayGuardrailsOutputPii, ModelServingAiGatewayGuardrailsOutputPiiArgs

Behavior string: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

Behavior string: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

behavior String: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

behavior string: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

behavior str: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

behavior String: a string that describes the behavior for PII filter. Currently only BLOCK value is supported.

ModelServingAiGatewayInferenceTableConfig, ModelServingAiGatewayInferenceTableConfigArgs

CatalogName string: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
Enabled bool: boolean flag specifying if usage tracking is enabled.
SchemaName string: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
TableNamePrefix string: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

CatalogName string: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
Enabled bool: boolean flag specifying if usage tracking is enabled.
SchemaName string: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
TableNamePrefix string: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalogName String: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled Boolean: boolean flag specifying if usage tracking is enabled.
schemaName String: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
tableNamePrefix String: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalogName string: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled boolean: boolean flag specifying if usage tracking is enabled.
schemaName string: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
tableNamePrefix string: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalog_name str: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled bool: boolean flag specifying if usage tracking is enabled.
schema_name str: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
table_name_prefix str: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalogName String: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled Boolean: boolean flag specifying if usage tracking is enabled.
schemaName String: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
tableNamePrefix String: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

ModelServingAiGatewayRateLimit, ModelServingAiGatewayRateLimitArgs

Calls int: Used to specify how many calls are allowed for a key within the renewal_period.
RenewalPeriod string: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
Key string: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

Calls int: Used to specify how many calls are allowed for a key within the renewal_period.
RenewalPeriod string: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
Key string: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls Integer: Used to specify how many calls are allowed for a key within the renewal_period.
renewalPeriod String: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key String: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls number: Used to specify how many calls are allowed for a key within the renewal_period.
renewalPeriod string: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key string: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls int: Used to specify how many calls are allowed for a key within the renewal_period.
renewal_period str: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key str: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls Number: Used to specify how many calls are allowed for a key within the renewal_period.
renewalPeriod String: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key String: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

ModelServingAiGatewayUsageTrackingConfig, ModelServingAiGatewayUsageTrackingConfigArgs

Enabled bool

Enabled bool

enabled Boolean

enabled boolean

enabled bool

enabled Boolean

ModelServingConfig, ModelServingConfigArgs

AutoCaptureConfig ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
ServedEntities List<ModelServingConfigServedEntity>: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
ServedModels List<ModelServingConfigServedModel>: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
TrafficConfig ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

AutoCaptureConfig ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
ServedEntities []ModelServingConfigServedEntity: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
ServedModels []ModelServingConfigServedModel: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
TrafficConfig ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

autoCaptureConfig ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
servedEntities List<ModelServingConfigServedEntity>: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
servedModels List<ModelServingConfigServedModel>: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
trafficConfig ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

autoCaptureConfig ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
servedEntities ModelServingConfigServedEntity[]: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
servedModels ModelServingConfigServedModel[]: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
trafficConfig ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

auto_capture_config ModelServingConfigAutoCaptureConfig: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
served_entities Sequence[ModelServingConfigServedEntity]: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
served_models Sequence[ModelServingConfigServedModel]: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
traffic_config ModelServingConfigTrafficConfig: A single block represents the traffic split configuration amongst the served models.

autoCaptureConfig Property Map: Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
servedEntities List<Property Map>: A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
servedModels List<Property Map>: Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
Deprecated: Please use 'config.served_entities' instead of 'config.served_models'.
trafficConfig Property Map: A single block represents the traffic split configuration amongst the served models.

ModelServingConfigAutoCaptureConfig, ModelServingConfigAutoCaptureConfigArgs

CatalogName string: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
Enabled bool: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
SchemaName string: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
TableNamePrefix string: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

CatalogName string: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
Enabled bool: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
SchemaName string: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
TableNamePrefix string: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalogName String: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled Boolean: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
schemaName String: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
tableNamePrefix String: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalogName string: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled boolean: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
schemaName string: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
tableNamePrefix string: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalog_name str: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled bool: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
schema_name str: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
table_name_prefix str: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

catalogName String: The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
enabled Boolean: If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
schemaName String: The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
tableNamePrefix String: The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.

ModelServingConfigServedEntity, ModelServingConfigServedEntityArgs

EntityName string: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
EntityVersion string: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
EnvironmentVars Dictionary<string, string>: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
ExternalModel ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. An existing endpoint with external_model can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
InstanceProfileArn string: ARN of the instance profile that the served entity uses to access AWS resources.
MaxProvisionedThroughput int: The maximum tokens per second that the endpoint can scale up to.
MinProvisionedThroughput int: The minimum tokens per second that the endpoint can scale down to.
Name string: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
ScaleToZeroEnabled bool: Whether the compute resources for the served entity should scale down to zero.
WorkloadSize string: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
WorkloadType string: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

EntityName string: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
EntityVersion string: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
EnvironmentVars map[string]string: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
ExternalModel ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. An existing endpoint with external_model can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
InstanceProfileArn string: ARN of the instance profile that the served entity uses to access AWS resources.
MaxProvisionedThroughput int: The maximum tokens per second that the endpoint can scale up to.
MinProvisionedThroughput int: The minimum tokens per second that the endpoint can scale down to.
Name string: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
ScaleToZeroEnabled bool: Whether the compute resources for the served entity should scale down to zero.
WorkloadSize string: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
WorkloadType string: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

entityName String: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
entityVersion String: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
environmentVars Map<String,String>: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
externalModel ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. An existing endpoint with external_model can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
instanceProfileArn String: ARN of the instance profile that the served entity uses to access AWS resources.
maxProvisionedThroughput Integer: The maximum tokens per second that the endpoint can scale up to.
minProvisionedThroughput Integer: The minimum tokens per second that the endpoint can scale down to.
name String: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
scaleToZeroEnabled Boolean: Whether the compute resources for the served entity should scale down to zero.
workloadSize String: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
workloadType String: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

entityName string: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
entityVersion string: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
environmentVars {[key: string]: string}: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
externalModel ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. An existing endpoint with external_model can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
instanceProfileArn string: ARN of the instance profile that the served entity uses to access AWS resources.
maxProvisionedThroughput number: The maximum tokens per second that the endpoint can scale up to.
minProvisionedThroughput number: The minimum tokens per second that the endpoint can scale down to.
name string: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
scaleToZeroEnabled boolean: Whether the compute resources for the served entity should scale down to zero.
workloadSize string: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
workloadType string: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

entity_name str: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
entity_version str: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
environment_vars Mapping[str, str]: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
external_model ModelServingConfigServedEntityExternalModel: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. An existing endpoint with external_model can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
instance_profile_arn str: ARN of the instance profile that the served entity uses to access AWS resources.
max_provisioned_throughput int: The maximum tokens per second that the endpoint can scale up to.
min_provisioned_throughput int: The minimum tokens per second that the endpoint can scale down to.
name str: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
scale_to_zero_enabled bool: Whether the compute resources for the served entity should scale down to zero.
workload_size str: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
workload_type str: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

entityName String: The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of catalog_name.schema_name.model_name.
entityVersion String: The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.
environmentVars Map<String>: An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets: {"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
externalModel Property Map: The external model to be served. NOTE: Only one of external_model and (entity_name, entity_version, workload_size, workload_type, and scale_to_zero_enabled) can be specified with the latter set being used for custom model serving for a Databricks registered model. When an external_model is present, the served entities list can only have one served_entity object. An existing endpoint with external_model can not be updated to an endpoint without external_model. If the endpoint is created without external_model, users cannot update it to add external_model later.
instanceProfileArn String: ARN of the instance profile that the served entity uses to access AWS resources.
maxProvisionedThroughput Number: The maximum tokens per second that the endpoint can scale up to.
minProvisionedThroughput Number: The minimum tokens per second that the endpoint can scale down to.
name String: The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -.
scaleToZeroEnabled Boolean: Whether the compute resources for the served entity should scale down to zero.
workloadSize String: The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency). If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.
workloadType String: The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is CPU. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the available GPU types.

ModelServingConfigServedEntityExternalModel, ModelServingConfigServedEntityExternalModelArgs

Name string: The name of the external model.
Provider string: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, google-cloud-vertex-ai, openai, and palm.
Task string: The task type of the external model.
Ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig: AI21Labs Config
AmazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig: Amazon Bedrock Config
AnthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig: Anthropic Config
CohereConfig ModelServingConfigServedEntityExternalModelCohereConfig: Cohere Config
DatabricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig: Databricks Model Serving Config
GoogleCloudVertexAiConfig ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig: Google Cloud Vertex AI Config.
OpenaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig: OpenAI Config
PalmConfig ModelServingConfigServedEntityExternalModelPalmConfig: PaLM Config

Name string: The name of the external model.
Provider string: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, google-cloud-vertex-ai, openai, and palm.
Task string: The task type of the external model.
Ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig: AI21Labs Config
AmazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig: Amazon Bedrock Config
AnthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig: Anthropic Config
CohereConfig ModelServingConfigServedEntityExternalModelCohereConfig: Cohere Config
DatabricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig: Databricks Model Serving Config
GoogleCloudVertexAiConfig ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig: Google Cloud Vertex AI Config.
OpenaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig: OpenAI Config
PalmConfig ModelServingConfigServedEntityExternalModelPalmConfig: PaLM Config

name String: The name of the external model.
provider String: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, google-cloud-vertex-ai, openai, and palm.
task String: The task type of the external model.
ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig: AI21Labs Config
amazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig: Amazon Bedrock Config
anthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig: Anthropic Config
cohereConfig ModelServingConfigServedEntityExternalModelCohereConfig: Cohere Config
databricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig: Databricks Model Serving Config
googleCloudVertexAiConfig ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig: Google Cloud Vertex AI Config.
openaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig: OpenAI Config
palmConfig ModelServingConfigServedEntityExternalModelPalmConfig: PaLM Config

name string: The name of the external model.
provider string: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, google-cloud-vertex-ai, openai, and palm.
task string: The task type of the external model.
ai21labsConfig ModelServingConfigServedEntityExternalModelAi21labsConfig: AI21Labs Config
amazonBedrockConfig ModelServingConfigServedEntityExternalModelAmazonBedrockConfig: Amazon Bedrock Config
anthropicConfig ModelServingConfigServedEntityExternalModelAnthropicConfig: Anthropic Config
cohereConfig ModelServingConfigServedEntityExternalModelCohereConfig: Cohere Config
databricksModelServingConfig ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig: Databricks Model Serving Config
googleCloudVertexAiConfig ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig: Google Cloud Vertex AI Config.
openaiConfig ModelServingConfigServedEntityExternalModelOpenaiConfig: OpenAI Config
palmConfig ModelServingConfigServedEntityExternalModelPalmConfig: PaLM Config

name str: The name of the external model.
provider str: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, google-cloud-vertex-ai, openai, and palm.
task str: The task type of the external model.
ai21labs_config ModelServingConfigServedEntityExternalModelAi21labsConfig: AI21Labs Config
amazon_bedrock_config ModelServingConfigServedEntityExternalModelAmazonBedrockConfig: Amazon Bedrock Config
anthropic_config ModelServingConfigServedEntityExternalModelAnthropicConfig: Anthropic Config
cohere_config ModelServingConfigServedEntityExternalModelCohereConfig: Cohere Config
databricks_model_serving_config ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig: Databricks Model Serving Config
google_cloud_vertex_ai_config ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig: Google Cloud Vertex AI Config.
openai_config ModelServingConfigServedEntityExternalModelOpenaiConfig: OpenAI Config
palm_config ModelServingConfigServedEntityExternalModelPalmConfig: PaLM Config

name String: The name of the external model.
provider String: The name of the provider for the external model. Currently, the supported providers are ai21labs, anthropic, amazon-bedrock, cohere, databricks-model-serving, google-cloud-vertex-ai, openai, and palm.
task String: The task type of the external model.
ai21labsConfig Property Map: AI21Labs Config
amazonBedrockConfig Property Map: Amazon Bedrock Config
anthropicConfig Property Map: Anthropic Config
cohereConfig Property Map: Cohere Config
databricksModelServingConfig Property Map: Databricks Model Serving Config
googleCloudVertexAiConfig Property Map: Google Cloud Vertex AI Config.
openaiConfig Property Map: OpenAI Config
palmConfig Property Map: PaLM Config

ModelServingConfigServedEntityExternalModelAi21labsConfig, ModelServingConfigServedEntityExternalModelAi21labsConfigArgs

Ai21labsApiKey string: The Databricks secret key reference for an AI21Labs API key.
Ai21labsApiKeyPlaintext string: An AI21 Labs API key provided as a plaintext string.

Ai21labsApiKey string: The Databricks secret key reference for an AI21Labs API key.
Ai21labsApiKeyPlaintext string: An AI21 Labs API key provided as a plaintext string.

ai21labsApiKey String: The Databricks secret key reference for an AI21Labs API key.
ai21labsApiKeyPlaintext String: An AI21 Labs API key provided as a plaintext string.

ai21labsApiKey string: The Databricks secret key reference for an AI21Labs API key.
ai21labsApiKeyPlaintext string: An AI21 Labs API key provided as a plaintext string.

ai21labs_api_key str: The Databricks secret key reference for an AI21Labs API key.
ai21labs_api_key_plaintext str: An AI21 Labs API key provided as a plaintext string.

ai21labsApiKey String: The Databricks secret key reference for an AI21Labs API key.
ai21labsApiKeyPlaintext String: An AI21 Labs API key provided as a plaintext string.

ModelServingConfigServedEntityExternalModelAmazonBedrockConfig, ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs

AwsRegion string: The AWS region to use. Bedrock has to be enabled there.
BedrockProvider string: The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
AwsAccessKeyId string: The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
AwsAccessKeyIdPlaintext string: An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
AwsSecretAccessKey string: The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
AwsSecretAccessKeyPlaintext string: An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.

AwsRegion string: The AWS region to use. Bedrock has to be enabled there.
BedrockProvider string: The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
AwsAccessKeyId string: The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
AwsAccessKeyIdPlaintext string: An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
AwsSecretAccessKey string: The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
AwsSecretAccessKeyPlaintext string: An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.

awsRegion String: The AWS region to use. Bedrock has to be enabled there.
bedrockProvider String: The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
awsAccessKeyId String: The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
awsAccessKeyIdPlaintext String: An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
awsSecretAccessKey String: The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
awsSecretAccessKeyPlaintext String: An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.

awsRegion string: The AWS region to use. Bedrock has to be enabled there.
bedrockProvider string: The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
awsAccessKeyId string: The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
awsAccessKeyIdPlaintext string: An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
awsSecretAccessKey string: The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
awsSecretAccessKeyPlaintext string: An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.

aws_region str: The AWS region to use. Bedrock has to be enabled there.
bedrock_provider str: The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
aws_access_key_id str: The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
aws_access_key_id_plaintext str: An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
aws_secret_access_key str: The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
aws_secret_access_key_plaintext str: An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.

awsRegion String: The AWS region to use. Bedrock has to be enabled there.
bedrockProvider String: The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.
awsAccessKeyId String: The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
awsAccessKeyIdPlaintext String: An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
awsSecretAccessKey String: The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
awsSecretAccessKeyPlaintext String: An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.

ModelServingConfigServedEntityExternalModelAnthropicConfig, ModelServingConfigServedEntityExternalModelAnthropicConfigArgs

AnthropicApiKey string: The Databricks secret key reference for an Anthropic API key.
AnthropicApiKeyPlaintext string: The Anthropic API key provided as a plaintext string.

AnthropicApiKey string: The Databricks secret key reference for an Anthropic API key.
AnthropicApiKeyPlaintext string: The Anthropic API key provided as a plaintext string.

anthropicApiKey String: The Databricks secret key reference for an Anthropic API key.
anthropicApiKeyPlaintext String: The Anthropic API key provided as a plaintext string.

anthropicApiKey string: The Databricks secret key reference for an Anthropic API key.
anthropicApiKeyPlaintext string: The Anthropic API key provided as a plaintext string.

anthropic_api_key str: The Databricks secret key reference for an Anthropic API key.
anthropic_api_key_plaintext str: The Anthropic API key provided as a plaintext string.

anthropicApiKey String: The Databricks secret key reference for an Anthropic API key.
anthropicApiKeyPlaintext String: The Anthropic API key provided as a plaintext string.

ModelServingConfigServedEntityExternalModelCohereConfig, ModelServingConfigServedEntityExternalModelCohereConfigArgs

CohereApiBase string
CohereApiKey string: The Databricks secret key reference for a Cohere API key.
CohereApiKeyPlaintext string: The Cohere API key provided as a plaintext string.

CohereApiBase string
CohereApiKey string: The Databricks secret key reference for a Cohere API key.
CohereApiKeyPlaintext string: The Cohere API key provided as a plaintext string.

cohereApiBase String
cohereApiKey String: The Databricks secret key reference for a Cohere API key.
cohereApiKeyPlaintext String: The Cohere API key provided as a plaintext string.

cohereApiBase string
cohereApiKey string: The Databricks secret key reference for a Cohere API key.
cohereApiKeyPlaintext string: The Cohere API key provided as a plaintext string.

cohere_api_base str
cohere_api_key str: The Databricks secret key reference for a Cohere API key.
cohere_api_key_plaintext str: The Cohere API key provided as a plaintext string.

cohereApiBase String
cohereApiKey String: The Databricks secret key reference for a Cohere API key.
cohereApiKeyPlaintext String: The Cohere API key provided as a plaintext string.

ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig, ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs

DatabricksWorkspaceUrl string: The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
DatabricksApiToken string: The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
DatabricksApiTokenPlaintext string: The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.

DatabricksWorkspaceUrl string: The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
DatabricksApiToken string: The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
DatabricksApiTokenPlaintext string: The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.

databricksWorkspaceUrl String: The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
databricksApiToken String: The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
databricksApiTokenPlaintext String: The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.

databricksWorkspaceUrl string: The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
databricksApiToken string: The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
databricksApiTokenPlaintext string: The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.

databricks_workspace_url str: The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
databricks_api_token str: The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
databricks_api_token_plaintext str: The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.

databricksWorkspaceUrl String: The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
databricksApiToken String: The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
databricksApiTokenPlaintext String: The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.

ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig, ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs

PrivateKey string: The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
PrivateKeyPlaintext string: The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
ProjectId string: This is the Google Cloud project id that the service account is associated with.
Region string: This is the region for the Google Cloud Vertex AI Service.

PrivateKey string: The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
PrivateKeyPlaintext string: The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
ProjectId string: This is the Google Cloud project id that the service account is associated with.
Region string: This is the region for the Google Cloud Vertex AI Service.

privateKey String: The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
privateKeyPlaintext String: The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
projectId String: This is the Google Cloud project id that the service account is associated with.
region String: This is the region for the Google Cloud Vertex AI Service.

privateKey string: The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
privateKeyPlaintext string: The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
projectId string: This is the Google Cloud project id that the service account is associated with.
region string: This is the region for the Google Cloud Vertex AI Service.

private_key str: The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
private_key_plaintext str: The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
project_id str: This is the Google Cloud project id that the service account is associated with.
region str: This is the region for the Google Cloud Vertex AI Service.

privateKey String: The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
privateKeyPlaintext String: The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
projectId String: This is the Google Cloud project id that the service account is associated with.
region String: This is the region for the Google Cloud Vertex AI Service.

ModelServingConfigServedEntityExternalModelOpenaiConfig, ModelServingConfigServedEntityExternalModelOpenaiConfigArgs

MicrosoftEntraClientId string: This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
MicrosoftEntraClientSecret string: The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
MicrosoftEntraClientSecretPlaintext string: The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
MicrosoftEntraTenantId string: This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
OpenaiApiBase string: This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
OpenaiApiKey string: The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
OpenaiApiKeyPlaintext string: The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
OpenaiApiType string: This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
OpenaiApiVersion string: This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
OpenaiDeploymentName string: This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
OpenaiOrganization string: This is an optional field to specify the organization in OpenAI or Azure OpenAI.

MicrosoftEntraClientId string: This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
MicrosoftEntraClientSecret string: The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
MicrosoftEntraClientSecretPlaintext string: The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
MicrosoftEntraTenantId string: This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
OpenaiApiBase string: This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
OpenaiApiKey string: The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
OpenaiApiKeyPlaintext string: The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
OpenaiApiType string: This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
OpenaiApiVersion string: This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
OpenaiDeploymentName string: This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
OpenaiOrganization string: This is an optional field to specify the organization in OpenAI or Azure OpenAI.

microsoftEntraClientId String: This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
microsoftEntraClientSecret String: The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
microsoftEntraClientSecretPlaintext String: The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
microsoftEntraTenantId String: This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
openaiApiBase String: This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
openaiApiKey String: The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
openaiApiKeyPlaintext String: The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
openaiApiType String: This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
openaiApiVersion String: This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
openaiDeploymentName String: This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
openaiOrganization String: This is an optional field to specify the organization in OpenAI or Azure OpenAI.

microsoftEntraClientId string: This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
microsoftEntraClientSecret string: The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
microsoftEntraClientSecretPlaintext string: The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
microsoftEntraTenantId string: This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
openaiApiBase string: This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
openaiApiKey string: The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
openaiApiKeyPlaintext string: The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
openaiApiType string: This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
openaiApiVersion string: This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
openaiDeploymentName string: This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
openaiOrganization string: This is an optional field to specify the organization in OpenAI or Azure OpenAI.

microsoft_entra_client_id str: This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
microsoft_entra_client_secret str: The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
microsoft_entra_client_secret_plaintext str: The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
microsoft_entra_tenant_id str: This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
openai_api_base str: This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
openai_api_key str: The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
openai_api_key_plaintext str: The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
openai_api_type str: This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
openai_api_version str: This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
openai_deployment_name str: This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
openai_organization str: This is an optional field to specify the organization in OpenAI or Azure OpenAI.

microsoftEntraClientId String: This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
microsoftEntraClientSecret String: The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
microsoftEntraClientSecretPlaintext String: The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
microsoftEntraTenantId String: This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
openaiApiBase String: This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
openaiApiKey String: The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
openaiApiKeyPlaintext String: The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
openaiApiType String: This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use azure. For authentication using Azure Active Directory (Azure AD) use, azuread.
openaiApiVersion String: This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
openaiDeploymentName String: This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
openaiOrganization String: This is an optional field to specify the organization in OpenAI or Azure OpenAI.

ModelServingConfigServedEntityExternalModelPalmConfig, ModelServingConfigServedEntityExternalModelPalmConfigArgs

PalmApiKey string: The Databricks secret key reference for a PaLM API key.
PalmApiKeyPlaintext string: The PaLM API key provided as a plaintext string.

PalmApiKey string: The Databricks secret key reference for a PaLM API key.
PalmApiKeyPlaintext string: The PaLM API key provided as a plaintext string.

palmApiKey String: The Databricks secret key reference for a PaLM API key.
palmApiKeyPlaintext String: The PaLM API key provided as a plaintext string.

palmApiKey string: The Databricks secret key reference for a PaLM API key.
palmApiKeyPlaintext string: The PaLM API key provided as a plaintext string.

palm_api_key str: The Databricks secret key reference for a PaLM API key.
palm_api_key_plaintext str: The PaLM API key provided as a plaintext string.

palmApiKey String: The Databricks secret key reference for a PaLM API key.
palmApiKeyPlaintext String: The PaLM API key provided as a plaintext string.

ModelServingConfigServedModel, ModelServingConfigServedModelArgs

ModelName string: The name of the model in Databricks Model Registry to be served.
ModelVersion string: The version of the model in Databricks Model Registry to be served.
EnvironmentVars Dictionary<string, string>: a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
InstanceProfileArn string: ARN of the instance profile that the served model will use to access AWS resources.
MaxProvisionedThroughput int: The maximum tokens per second that the endpoint can scale up to.
MinProvisionedThroughput int: The minimum tokens per second that the endpoint can scale down to.
Name string: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
ScaleToZeroEnabled bool: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
WorkloadSize string: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
WorkloadType string: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the documentation for all options. The default value is CPU.

ModelName string: The name of the model in Databricks Model Registry to be served.
ModelVersion string: The version of the model in Databricks Model Registry to be served.
EnvironmentVars map[string]string: a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
InstanceProfileArn string: ARN of the instance profile that the served model will use to access AWS resources.
MaxProvisionedThroughput int: The maximum tokens per second that the endpoint can scale up to.
MinProvisionedThroughput int: The minimum tokens per second that the endpoint can scale down to.
Name string: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
ScaleToZeroEnabled bool: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
WorkloadSize string: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
WorkloadType string: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the documentation for all options. The default value is CPU.

modelName String: The name of the model in Databricks Model Registry to be served.
modelVersion String: The version of the model in Databricks Model Registry to be served.
environmentVars Map<String,String>: a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
instanceProfileArn String: ARN of the instance profile that the served model will use to access AWS resources.
maxProvisionedThroughput Integer: The maximum tokens per second that the endpoint can scale up to.
minProvisionedThroughput Integer: The minimum tokens per second that the endpoint can scale down to.
name String: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
scaleToZeroEnabled Boolean: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
workloadSize String: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
workloadType String: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the documentation for all options. The default value is CPU.

modelName string: The name of the model in Databricks Model Registry to be served.
modelVersion string: The version of the model in Databricks Model Registry to be served.
environmentVars {[key: string]: string}: a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
instanceProfileArn string: ARN of the instance profile that the served model will use to access AWS resources.
maxProvisionedThroughput number: The maximum tokens per second that the endpoint can scale up to.
minProvisionedThroughput number: The minimum tokens per second that the endpoint can scale down to.
name string: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
scaleToZeroEnabled boolean: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
workloadSize string: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
workloadType string: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the documentation for all options. The default value is CPU.

model_name str: The name of the model in Databricks Model Registry to be served.
model_version str: The version of the model in Databricks Model Registry to be served.
environment_vars Mapping[str, str]: a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
instance_profile_arn str: ARN of the instance profile that the served model will use to access AWS resources.
max_provisioned_throughput int: The maximum tokens per second that the endpoint can scale up to.
min_provisioned_throughput int: The minimum tokens per second that the endpoint can scale down to.
name str: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
scale_to_zero_enabled bool: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
workload_size str: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
workload_type str: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the documentation for all options. The default value is CPU.

modelName String: The name of the model in Databricks Model Registry to be served.
modelVersion String: The version of the model in Databricks Model Registry to be served.
environmentVars Map<String>: a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax: {{secrets/secret_scope/secret_key}}.
instanceProfileArn String: ARN of the instance profile that the served model will use to access AWS resources.
maxProvisionedThroughput Number: The maximum tokens per second that the endpoint can scale up to.
minProvisionedThroughput Number: The minimum tokens per second that the endpoint can scale down to.
name String: The name of a served model. It must be unique across an endpoint. If not specified, this field will default to modelname-modelversion. A served model name can consist of alphanumeric characters, dashes, and underscores.
scaleToZeroEnabled Boolean: Whether the compute resources for the served model should scale down to zero. If scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value is true.
workloadSize String: The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are Small (4 - 4 provisioned concurrency), Medium (8 - 16 provisioned concurrency), and Large (16 - 64 provisioned concurrency).
workloadType String: The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See the documentation for all options. The default value is CPU.

ModelServingConfigTrafficConfig, ModelServingConfigTrafficConfigArgs

Routes List<ModelServingConfigTrafficConfigRoute>: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

Routes []ModelServingConfigTrafficConfigRoute: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

routes List<ModelServingConfigTrafficConfigRoute>: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

routes ModelServingConfigTrafficConfigRoute[]: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

routes Sequence[ModelServingConfigTrafficConfigRoute]: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

routes List<Property Map>: Each block represents a route that defines traffic to each served entity. Each served_entity block needs to have a corresponding routes block.

ModelServingConfigTrafficConfigRoute, ModelServingConfigTrafficConfigRouteArgs

ServedModelName string
TrafficPercentage int: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

ServedModelName string
TrafficPercentage int: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

servedModelName String
trafficPercentage Integer: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

servedModelName string
trafficPercentage number: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

served_model_name str
traffic_percentage int: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

servedModelName String
trafficPercentage Number: The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.

ModelServingRateLimit, ModelServingRateLimitArgs

Calls int: Used to specify how many calls are allowed for a key within the renewal_period.
RenewalPeriod string: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
Key string: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

Calls int: Used to specify how many calls are allowed for a key within the renewal_period.
RenewalPeriod string: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
Key string: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls Integer: Used to specify how many calls are allowed for a key within the renewal_period.
renewalPeriod String: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key String: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls number: Used to specify how many calls are allowed for a key within the renewal_period.
renewalPeriod string: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key string: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls int: Used to specify how many calls are allowed for a key within the renewal_period.
renewal_period str: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key str: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

calls Number: Used to specify how many calls are allowed for a key within the renewal_period.
renewalPeriod String: Renewal period field for a serving endpoint rate limit. Currently, only minute is supported.
key String: Key field for a serving endpoint rate limit. Currently, only user and endpoint are supported, with endpoint being the default if not specified.

ModelServingTag, ModelServingTagArgs

Key string: The key field for a tag.
Value string: The value field for a tag.

Key string: The key field for a tag.
Value string: The value field for a tag.

key String: The key field for a tag.
value String: The value field for a tag.

key string: The key field for a tag.
value string: The value field for a tag.

key str: The key field for a tag.
value str: The value field for a tag.

key String: The key field for a tag.
value String: The value field for a tag.

Import

The model serving resource can be imported using the name of the endpoint.

bash

$ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: databricks pulumi/pulumi-databricks
License: Apache-2.0
Notes: This Pulumi package is based on the databricks Terraform Provider.

Databricks v1.56.0 published on Tuesday, Nov 12, 2024 by Pulumi

pulumi/pulumi-databricks

databricks.ModelServing

On this page

On this page

Example Usage

Access Control

Related Resources

Create ModelServing Resource

Constructor syntax

Parameters

Constructor example

ModelServing Resource Properties

Inputs

Outputs

Look up Existing ModelServing Resource

Supporting Types

ModelServingAiGateway, ModelServingAiGatewayArgs

ModelServingAiGatewayGuardrails, ModelServingAiGatewayGuardrailsArgs

ModelServingAiGatewayGuardrailsInput, ModelServingAiGatewayGuardrailsInputArgs

ModelServingAiGatewayGuardrailsInputPii, ModelServingAiGatewayGuardrailsInputPiiArgs

ModelServingAiGatewayGuardrailsOutput, ModelServingAiGatewayGuardrailsOutputArgs

ModelServingAiGatewayGuardrailsOutputPii, ModelServingAiGatewayGuardrailsOutputPiiArgs

ModelServingAiGatewayInferenceTableConfig, ModelServingAiGatewayInferenceTableConfigArgs

ModelServingAiGatewayRateLimit, ModelServingAiGatewayRateLimitArgs

ModelServingAiGatewayUsageTrackingConfig, ModelServingAiGatewayUsageTrackingConfigArgs

ModelServingConfig, ModelServingConfigArgs

ModelServingConfigAutoCaptureConfig, ModelServingConfigAutoCaptureConfigArgs

ModelServingConfigServedEntity, ModelServingConfigServedEntityArgs

ModelServingConfigServedEntityExternalModel, ModelServingConfigServedEntityExternalModelArgs

ModelServingConfigServedEntityExternalModelAi21labsConfig, ModelServingConfigServedEntityExternalModelAi21labsConfigArgs

ModelServingConfigServedEntityExternalModelAmazonBedrockConfig, ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs

ModelServingConfigServedEntityExternalModelAnthropicConfig, ModelServingConfigServedEntityExternalModelAnthropicConfigArgs

ModelServingConfigServedEntityExternalModelCohereConfig, ModelServingConfigServedEntityExternalModelCohereConfigArgs

ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig, ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs

ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig, ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs

ModelServingConfigServedEntityExternalModelOpenaiConfig, ModelServingConfigServedEntityExternalModelOpenaiConfigArgs

ModelServingConfigServedEntityExternalModelPalmConfig, ModelServingConfigServedEntityExternalModelPalmConfigArgs

ModelServingConfigServedModel, ModelServingConfigServedModelArgs

ModelServingConfigTrafficConfig, ModelServingConfigTrafficConfigArgs

ModelServingConfigTrafficConfigRoute, ModelServingConfigTrafficConfigRouteArgs

ModelServingRateLimit, ModelServingRateLimitArgs

ModelServingTag, ModelServingTagArgs

Import

Package Details

On this page

On this page