databricks.ModelServing
Explore with Pulumi AI
This resource allows you to manage Model Serving endpoints in Databricks.
If you replace
served_models
withserved_entities
in an existing serving endpoint, the serving endpoint will briefly go into an update state (~30 seconds) and increment the config version.
Example Usage
import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";
const _this = new databricks.ModelServing("this", {
name: "ads-serving-endpoint",
config: {
servedEntities: [
{
name: "prod_model",
entityName: "ads-model",
entityVersion: "2",
workloadSize: "Small",
scaleToZeroEnabled: true,
},
{
name: "candidate_model",
entityName: "ads-model",
entityVersion: "4",
workloadSize: "Small",
scaleToZeroEnabled: false,
},
],
trafficConfig: {
routes: [
{
servedModelName: "prod_model",
trafficPercentage: 90,
},
{
servedModelName: "candidate_model",
trafficPercentage: 10,
},
],
},
},
});
import pulumi
import pulumi_databricks as databricks
this = databricks.ModelServing("this",
name="ads-serving-endpoint",
config={
"served_entities": [
{
"name": "prod_model",
"entity_name": "ads-model",
"entity_version": "2",
"workload_size": "Small",
"scale_to_zero_enabled": True,
},
{
"name": "candidate_model",
"entity_name": "ads-model",
"entity_version": "4",
"workload_size": "Small",
"scale_to_zero_enabled": False,
},
],
"traffic_config": {
"routes": [
{
"served_model_name": "prod_model",
"traffic_percentage": 90,
},
{
"served_model_name": "candidate_model",
"traffic_percentage": 10,
},
],
},
})
package main
import (
"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := databricks.NewModelServing(ctx, "this", &databricks.ModelServingArgs{
Name: pulumi.String("ads-serving-endpoint"),
Config: &databricks.ModelServingConfigArgs{
ServedEntities: databricks.ModelServingConfigServedEntityArray{
&databricks.ModelServingConfigServedEntityArgs{
Name: pulumi.String("prod_model"),
EntityName: pulumi.String("ads-model"),
EntityVersion: pulumi.String("2"),
WorkloadSize: pulumi.String("Small"),
ScaleToZeroEnabled: pulumi.Bool(true),
},
&databricks.ModelServingConfigServedEntityArgs{
Name: pulumi.String("candidate_model"),
EntityName: pulumi.String("ads-model"),
EntityVersion: pulumi.String("4"),
WorkloadSize: pulumi.String("Small"),
ScaleToZeroEnabled: pulumi.Bool(false),
},
},
TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("prod_model"),
TrafficPercentage: pulumi.Int(90),
},
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("candidate_model"),
TrafficPercentage: pulumi.Int(10),
},
},
},
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;
return await Deployment.RunAsync(() =>
{
var @this = new Databricks.ModelServing("this", new()
{
Name = "ads-serving-endpoint",
Config = new Databricks.Inputs.ModelServingConfigArgs
{
ServedEntities = new[]
{
new Databricks.Inputs.ModelServingConfigServedEntityArgs
{
Name = "prod_model",
EntityName = "ads-model",
EntityVersion = "2",
WorkloadSize = "Small",
ScaleToZeroEnabled = true,
},
new Databricks.Inputs.ModelServingConfigServedEntityArgs
{
Name = "candidate_model",
EntityName = "ads-model",
EntityVersion = "4",
WorkloadSize = "Small",
ScaleToZeroEnabled = false,
},
},
TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
{
Routes = new[]
{
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "prod_model",
TrafficPercentage = 90,
},
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "candidate_model",
TrafficPercentage = 10,
},
},
},
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.ModelServing;
import com.pulumi.databricks.ModelServingArgs;
import com.pulumi.databricks.inputs.ModelServingConfigArgs;
import com.pulumi.databricks.inputs.ModelServingConfigTrafficConfigArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var this_ = new ModelServing("this", ModelServingArgs.builder()
.name("ads-serving-endpoint")
.config(ModelServingConfigArgs.builder()
.servedEntities(
ModelServingConfigServedEntityArgs.builder()
.name("prod_model")
.entityName("ads-model")
.entityVersion("2")
.workloadSize("Small")
.scaleToZeroEnabled(true)
.build(),
ModelServingConfigServedEntityArgs.builder()
.name("candidate_model")
.entityName("ads-model")
.entityVersion("4")
.workloadSize("Small")
.scaleToZeroEnabled(false)
.build())
.trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
.routes(
ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("prod_model")
.trafficPercentage(90)
.build(),
ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("candidate_model")
.trafficPercentage(10)
.build())
.build())
.build())
.build());
}
}
resources:
this:
type: databricks:ModelServing
properties:
name: ads-serving-endpoint
config:
servedEntities:
- name: prod_model
entityName: ads-model
entityVersion: '2'
workloadSize: Small
scaleToZeroEnabled: true
- name: candidate_model
entityName: ads-model
entityVersion: '4'
workloadSize: Small
scaleToZeroEnabled: false
trafficConfig:
routes:
- servedModelName: prod_model
trafficPercentage: 90
- servedModelName: candidate_model
trafficPercentage: 10
Access Control
- databricks.Permissions can control which groups or individual users can Manage, Query or View individual serving endpoints.
Related Resources
The following resources are often used in the same context:
- databricks.RegisteredModel to create Models in Unity Catalog in Databricks.
- End to end workspace management guide.
- databricks.Directory to manage directories in Databricks Workspace.
- databricks.MlflowModel to create models in the workspace model registry in Databricks.
- databricks.Notebook to manage Databricks Notebooks.
- databricks.Notebook data to export a notebook from Databricks Workspace.
- databricks.Repo to manage Databricks Repos.
Create ModelServing Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new ModelServing(name: string, args: ModelServingArgs, opts?: CustomResourceOptions);
@overload
def ModelServing(resource_name: str,
args: ModelServingArgs,
opts: Optional[ResourceOptions] = None)
@overload
def ModelServing(resource_name: str,
opts: Optional[ResourceOptions] = None,
config: Optional[ModelServingConfigArgs] = None,
ai_gateway: Optional[ModelServingAiGatewayArgs] = None,
name: Optional[str] = None,
rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
route_optimized: Optional[bool] = None,
tags: Optional[Sequence[ModelServingTagArgs]] = None)
func NewModelServing(ctx *Context, name string, args ModelServingArgs, opts ...ResourceOption) (*ModelServing, error)
public ModelServing(string name, ModelServingArgs args, CustomResourceOptions? opts = null)
public ModelServing(String name, ModelServingArgs args)
public ModelServing(String name, ModelServingArgs args, CustomResourceOptions options)
type: databricks:ModelServing
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args ModelServingArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var modelServingResource = new Databricks.ModelServing("modelServingResource", new()
{
Config = new Databricks.Inputs.ModelServingConfigArgs
{
AutoCaptureConfig = new Databricks.Inputs.ModelServingConfigAutoCaptureConfigArgs
{
CatalogName = "string",
Enabled = false,
SchemaName = "string",
TableNamePrefix = "string",
},
ServedEntities = new[]
{
new Databricks.Inputs.ModelServingConfigServedEntityArgs
{
EntityName = "string",
EntityVersion = "string",
EnvironmentVars =
{
{ "string", "string" },
},
ExternalModel = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelArgs
{
Name = "string",
Provider = "string",
Task = "string",
Ai21labsConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs
{
Ai21labsApiKey = "string",
Ai21labsApiKeyPlaintext = "string",
},
AmazonBedrockConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs
{
AwsRegion = "string",
BedrockProvider = "string",
AwsAccessKeyId = "string",
AwsAccessKeyIdPlaintext = "string",
AwsSecretAccessKey = "string",
AwsSecretAccessKeyPlaintext = "string",
},
AnthropicConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs
{
AnthropicApiKey = "string",
AnthropicApiKeyPlaintext = "string",
},
CohereConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelCohereConfigArgs
{
CohereApiBase = "string",
CohereApiKey = "string",
CohereApiKeyPlaintext = "string",
},
DatabricksModelServingConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs
{
DatabricksWorkspaceUrl = "string",
DatabricksApiToken = "string",
DatabricksApiTokenPlaintext = "string",
},
GoogleCloudVertexAiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs
{
PrivateKey = "string",
PrivateKeyPlaintext = "string",
ProjectId = "string",
Region = "string",
},
OpenaiConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs
{
MicrosoftEntraClientId = "string",
MicrosoftEntraClientSecret = "string",
MicrosoftEntraClientSecretPlaintext = "string",
MicrosoftEntraTenantId = "string",
OpenaiApiBase = "string",
OpenaiApiKey = "string",
OpenaiApiKeyPlaintext = "string",
OpenaiApiType = "string",
OpenaiApiVersion = "string",
OpenaiDeploymentName = "string",
OpenaiOrganization = "string",
},
PalmConfig = new Databricks.Inputs.ModelServingConfigServedEntityExternalModelPalmConfigArgs
{
PalmApiKey = "string",
PalmApiKeyPlaintext = "string",
},
},
InstanceProfileArn = "string",
MaxProvisionedThroughput = 0,
MinProvisionedThroughput = 0,
Name = "string",
ScaleToZeroEnabled = false,
WorkloadSize = "string",
WorkloadType = "string",
},
},
TrafficConfig = new Databricks.Inputs.ModelServingConfigTrafficConfigArgs
{
Routes = new[]
{
new Databricks.Inputs.ModelServingConfigTrafficConfigRouteArgs
{
ServedModelName = "string",
TrafficPercentage = 0,
},
},
},
},
AiGateway = new Databricks.Inputs.ModelServingAiGatewayArgs
{
Guardrails = new Databricks.Inputs.ModelServingAiGatewayGuardrailsArgs
{
Input = new Databricks.Inputs.ModelServingAiGatewayGuardrailsInputArgs
{
InvalidKeywords = new[]
{
"string",
},
Pii = new Databricks.Inputs.ModelServingAiGatewayGuardrailsInputPiiArgs
{
Behavior = "string",
},
Safety = false,
ValidTopics = new[]
{
"string",
},
},
Output = new Databricks.Inputs.ModelServingAiGatewayGuardrailsOutputArgs
{
InvalidKeywords = new[]
{
"string",
},
Pii = new Databricks.Inputs.ModelServingAiGatewayGuardrailsOutputPiiArgs
{
Behavior = "string",
},
Safety = false,
ValidTopics = new[]
{
"string",
},
},
},
InferenceTableConfig = new Databricks.Inputs.ModelServingAiGatewayInferenceTableConfigArgs
{
CatalogName = "string",
Enabled = false,
SchemaName = "string",
TableNamePrefix = "string",
},
RateLimits = new[]
{
new Databricks.Inputs.ModelServingAiGatewayRateLimitArgs
{
Calls = 0,
RenewalPeriod = "string",
Key = "string",
},
},
UsageTrackingConfig = new Databricks.Inputs.ModelServingAiGatewayUsageTrackingConfigArgs
{
Enabled = false,
},
},
Name = "string",
RateLimits = new[]
{
new Databricks.Inputs.ModelServingRateLimitArgs
{
Calls = 0,
RenewalPeriod = "string",
Key = "string",
},
},
RouteOptimized = false,
Tags = new[]
{
new Databricks.Inputs.ModelServingTagArgs
{
Key = "string",
Value = "string",
},
},
});
example, err := databricks.NewModelServing(ctx, "modelServingResource", &databricks.ModelServingArgs{
Config: &databricks.ModelServingConfigArgs{
AutoCaptureConfig: &databricks.ModelServingConfigAutoCaptureConfigArgs{
CatalogName: pulumi.String("string"),
Enabled: pulumi.Bool(false),
SchemaName: pulumi.String("string"),
TableNamePrefix: pulumi.String("string"),
},
ServedEntities: databricks.ModelServingConfigServedEntityArray{
&databricks.ModelServingConfigServedEntityArgs{
EntityName: pulumi.String("string"),
EntityVersion: pulumi.String("string"),
EnvironmentVars: pulumi.StringMap{
"string": pulumi.String("string"),
},
ExternalModel: &databricks.ModelServingConfigServedEntityExternalModelArgs{
Name: pulumi.String("string"),
Provider: pulumi.String("string"),
Task: pulumi.String("string"),
Ai21labsConfig: &databricks.ModelServingConfigServedEntityExternalModelAi21labsConfigArgs{
Ai21labsApiKey: pulumi.String("string"),
Ai21labsApiKeyPlaintext: pulumi.String("string"),
},
AmazonBedrockConfig: &databricks.ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs{
AwsRegion: pulumi.String("string"),
BedrockProvider: pulumi.String("string"),
AwsAccessKeyId: pulumi.String("string"),
AwsAccessKeyIdPlaintext: pulumi.String("string"),
AwsSecretAccessKey: pulumi.String("string"),
AwsSecretAccessKeyPlaintext: pulumi.String("string"),
},
AnthropicConfig: &databricks.ModelServingConfigServedEntityExternalModelAnthropicConfigArgs{
AnthropicApiKey: pulumi.String("string"),
AnthropicApiKeyPlaintext: pulumi.String("string"),
},
CohereConfig: &databricks.ModelServingConfigServedEntityExternalModelCohereConfigArgs{
CohereApiBase: pulumi.String("string"),
CohereApiKey: pulumi.String("string"),
CohereApiKeyPlaintext: pulumi.String("string"),
},
DatabricksModelServingConfig: &databricks.ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs{
DatabricksWorkspaceUrl: pulumi.String("string"),
DatabricksApiToken: pulumi.String("string"),
DatabricksApiTokenPlaintext: pulumi.String("string"),
},
GoogleCloudVertexAiConfig: &databricks.ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs{
PrivateKey: pulumi.String("string"),
PrivateKeyPlaintext: pulumi.String("string"),
ProjectId: pulumi.String("string"),
Region: pulumi.String("string"),
},
OpenaiConfig: &databricks.ModelServingConfigServedEntityExternalModelOpenaiConfigArgs{
MicrosoftEntraClientId: pulumi.String("string"),
MicrosoftEntraClientSecret: pulumi.String("string"),
MicrosoftEntraClientSecretPlaintext: pulumi.String("string"),
MicrosoftEntraTenantId: pulumi.String("string"),
OpenaiApiBase: pulumi.String("string"),
OpenaiApiKey: pulumi.String("string"),
OpenaiApiKeyPlaintext: pulumi.String("string"),
OpenaiApiType: pulumi.String("string"),
OpenaiApiVersion: pulumi.String("string"),
OpenaiDeploymentName: pulumi.String("string"),
OpenaiOrganization: pulumi.String("string"),
},
PalmConfig: &databricks.ModelServingConfigServedEntityExternalModelPalmConfigArgs{
PalmApiKey: pulumi.String("string"),
PalmApiKeyPlaintext: pulumi.String("string"),
},
},
InstanceProfileArn: pulumi.String("string"),
MaxProvisionedThroughput: pulumi.Int(0),
MinProvisionedThroughput: pulumi.Int(0),
Name: pulumi.String("string"),
ScaleToZeroEnabled: pulumi.Bool(false),
WorkloadSize: pulumi.String("string"),
WorkloadType: pulumi.String("string"),
},
},
TrafficConfig: &databricks.ModelServingConfigTrafficConfigArgs{
Routes: databricks.ModelServingConfigTrafficConfigRouteArray{
&databricks.ModelServingConfigTrafficConfigRouteArgs{
ServedModelName: pulumi.String("string"),
TrafficPercentage: pulumi.Int(0),
},
},
},
},
AiGateway: &databricks.ModelServingAiGatewayArgs{
Guardrails: &databricks.ModelServingAiGatewayGuardrailsArgs{
Input: &databricks.ModelServingAiGatewayGuardrailsInputTypeArgs{
InvalidKeywords: pulumi.StringArray{
pulumi.String("string"),
},
Pii: &databricks.ModelServingAiGatewayGuardrailsInputPiiArgs{
Behavior: pulumi.String("string"),
},
Safety: pulumi.Bool(false),
ValidTopics: pulumi.StringArray{
pulumi.String("string"),
},
},
Output: &databricks.ModelServingAiGatewayGuardrailsOutputTypeArgs{
InvalidKeywords: pulumi.StringArray{
pulumi.String("string"),
},
Pii: &databricks.ModelServingAiGatewayGuardrailsOutputPiiArgs{
Behavior: pulumi.String("string"),
},
Safety: pulumi.Bool(false),
ValidTopics: pulumi.StringArray{
pulumi.String("string"),
},
},
},
InferenceTableConfig: &databricks.ModelServingAiGatewayInferenceTableConfigArgs{
CatalogName: pulumi.String("string"),
Enabled: pulumi.Bool(false),
SchemaName: pulumi.String("string"),
TableNamePrefix: pulumi.String("string"),
},
RateLimits: databricks.ModelServingAiGatewayRateLimitArray{
&databricks.ModelServingAiGatewayRateLimitArgs{
Calls: pulumi.Int(0),
RenewalPeriod: pulumi.String("string"),
Key: pulumi.String("string"),
},
},
UsageTrackingConfig: &databricks.ModelServingAiGatewayUsageTrackingConfigArgs{
Enabled: pulumi.Bool(false),
},
},
Name: pulumi.String("string"),
RateLimits: databricks.ModelServingRateLimitArray{
&databricks.ModelServingRateLimitArgs{
Calls: pulumi.Int(0),
RenewalPeriod: pulumi.String("string"),
Key: pulumi.String("string"),
},
},
RouteOptimized: pulumi.Bool(false),
Tags: databricks.ModelServingTagArray{
&databricks.ModelServingTagArgs{
Key: pulumi.String("string"),
Value: pulumi.String("string"),
},
},
})
var modelServingResource = new ModelServing("modelServingResource", ModelServingArgs.builder()
.config(ModelServingConfigArgs.builder()
.autoCaptureConfig(ModelServingConfigAutoCaptureConfigArgs.builder()
.catalogName("string")
.enabled(false)
.schemaName("string")
.tableNamePrefix("string")
.build())
.servedEntities(ModelServingConfigServedEntityArgs.builder()
.entityName("string")
.entityVersion("string")
.environmentVars(Map.of("string", "string"))
.externalModel(ModelServingConfigServedEntityExternalModelArgs.builder()
.name("string")
.provider("string")
.task("string")
.ai21labsConfig(ModelServingConfigServedEntityExternalModelAi21labsConfigArgs.builder()
.ai21labsApiKey("string")
.ai21labsApiKeyPlaintext("string")
.build())
.amazonBedrockConfig(ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs.builder()
.awsRegion("string")
.bedrockProvider("string")
.awsAccessKeyId("string")
.awsAccessKeyIdPlaintext("string")
.awsSecretAccessKey("string")
.awsSecretAccessKeyPlaintext("string")
.build())
.anthropicConfig(ModelServingConfigServedEntityExternalModelAnthropicConfigArgs.builder()
.anthropicApiKey("string")
.anthropicApiKeyPlaintext("string")
.build())
.cohereConfig(ModelServingConfigServedEntityExternalModelCohereConfigArgs.builder()
.cohereApiBase("string")
.cohereApiKey("string")
.cohereApiKeyPlaintext("string")
.build())
.databricksModelServingConfig(ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs.builder()
.databricksWorkspaceUrl("string")
.databricksApiToken("string")
.databricksApiTokenPlaintext("string")
.build())
.googleCloudVertexAiConfig(ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs.builder()
.privateKey("string")
.privateKeyPlaintext("string")
.projectId("string")
.region("string")
.build())
.openaiConfig(ModelServingConfigServedEntityExternalModelOpenaiConfigArgs.builder()
.microsoftEntraClientId("string")
.microsoftEntraClientSecret("string")
.microsoftEntraClientSecretPlaintext("string")
.microsoftEntraTenantId("string")
.openaiApiBase("string")
.openaiApiKey("string")
.openaiApiKeyPlaintext("string")
.openaiApiType("string")
.openaiApiVersion("string")
.openaiDeploymentName("string")
.openaiOrganization("string")
.build())
.palmConfig(ModelServingConfigServedEntityExternalModelPalmConfigArgs.builder()
.palmApiKey("string")
.palmApiKeyPlaintext("string")
.build())
.build())
.instanceProfileArn("string")
.maxProvisionedThroughput(0)
.minProvisionedThroughput(0)
.name("string")
.scaleToZeroEnabled(false)
.workloadSize("string")
.workloadType("string")
.build())
.trafficConfig(ModelServingConfigTrafficConfigArgs.builder()
.routes(ModelServingConfigTrafficConfigRouteArgs.builder()
.servedModelName("string")
.trafficPercentage(0)
.build())
.build())
.build())
.aiGateway(ModelServingAiGatewayArgs.builder()
.guardrails(ModelServingAiGatewayGuardrailsArgs.builder()
.input(ModelServingAiGatewayGuardrailsInputArgs.builder()
.invalidKeywords("string")
.pii(ModelServingAiGatewayGuardrailsInputPiiArgs.builder()
.behavior("string")
.build())
.safety(false)
.validTopics("string")
.build())
.output(ModelServingAiGatewayGuardrailsOutputArgs.builder()
.invalidKeywords("string")
.pii(ModelServingAiGatewayGuardrailsOutputPiiArgs.builder()
.behavior("string")
.build())
.safety(false)
.validTopics("string")
.build())
.build())
.inferenceTableConfig(ModelServingAiGatewayInferenceTableConfigArgs.builder()
.catalogName("string")
.enabled(false)
.schemaName("string")
.tableNamePrefix("string")
.build())
.rateLimits(ModelServingAiGatewayRateLimitArgs.builder()
.calls(0)
.renewalPeriod("string")
.key("string")
.build())
.usageTrackingConfig(ModelServingAiGatewayUsageTrackingConfigArgs.builder()
.enabled(false)
.build())
.build())
.name("string")
.rateLimits(ModelServingRateLimitArgs.builder()
.calls(0)
.renewalPeriod("string")
.key("string")
.build())
.routeOptimized(false)
.tags(ModelServingTagArgs.builder()
.key("string")
.value("string")
.build())
.build());
model_serving_resource = databricks.ModelServing("modelServingResource",
config={
"auto_capture_config": {
"catalog_name": "string",
"enabled": False,
"schema_name": "string",
"table_name_prefix": "string",
},
"served_entities": [{
"entity_name": "string",
"entity_version": "string",
"environment_vars": {
"string": "string",
},
"external_model": {
"name": "string",
"provider": "string",
"task": "string",
"ai21labs_config": {
"ai21labs_api_key": "string",
"ai21labs_api_key_plaintext": "string",
},
"amazon_bedrock_config": {
"aws_region": "string",
"bedrock_provider": "string",
"aws_access_key_id": "string",
"aws_access_key_id_plaintext": "string",
"aws_secret_access_key": "string",
"aws_secret_access_key_plaintext": "string",
},
"anthropic_config": {
"anthropic_api_key": "string",
"anthropic_api_key_plaintext": "string",
},
"cohere_config": {
"cohere_api_base": "string",
"cohere_api_key": "string",
"cohere_api_key_plaintext": "string",
},
"databricks_model_serving_config": {
"databricks_workspace_url": "string",
"databricks_api_token": "string",
"databricks_api_token_plaintext": "string",
},
"google_cloud_vertex_ai_config": {
"private_key": "string",
"private_key_plaintext": "string",
"project_id": "string",
"region": "string",
},
"openai_config": {
"microsoft_entra_client_id": "string",
"microsoft_entra_client_secret": "string",
"microsoft_entra_client_secret_plaintext": "string",
"microsoft_entra_tenant_id": "string",
"openai_api_base": "string",
"openai_api_key": "string",
"openai_api_key_plaintext": "string",
"openai_api_type": "string",
"openai_api_version": "string",
"openai_deployment_name": "string",
"openai_organization": "string",
},
"palm_config": {
"palm_api_key": "string",
"palm_api_key_plaintext": "string",
},
},
"instance_profile_arn": "string",
"max_provisioned_throughput": 0,
"min_provisioned_throughput": 0,
"name": "string",
"scale_to_zero_enabled": False,
"workload_size": "string",
"workload_type": "string",
}],
"traffic_config": {
"routes": [{
"served_model_name": "string",
"traffic_percentage": 0,
}],
},
},
ai_gateway={
"guardrails": {
"input": {
"invalid_keywords": ["string"],
"pii": {
"behavior": "string",
},
"safety": False,
"valid_topics": ["string"],
},
"output": {
"invalid_keywords": ["string"],
"pii": {
"behavior": "string",
},
"safety": False,
"valid_topics": ["string"],
},
},
"inference_table_config": {
"catalog_name": "string",
"enabled": False,
"schema_name": "string",
"table_name_prefix": "string",
},
"rate_limits": [{
"calls": 0,
"renewal_period": "string",
"key": "string",
}],
"usage_tracking_config": {
"enabled": False,
},
},
name="string",
rate_limits=[{
"calls": 0,
"renewal_period": "string",
"key": "string",
}],
route_optimized=False,
tags=[{
"key": "string",
"value": "string",
}])
const modelServingResource = new databricks.ModelServing("modelServingResource", {
config: {
autoCaptureConfig: {
catalogName: "string",
enabled: false,
schemaName: "string",
tableNamePrefix: "string",
},
servedEntities: [{
entityName: "string",
entityVersion: "string",
environmentVars: {
string: "string",
},
externalModel: {
name: "string",
provider: "string",
task: "string",
ai21labsConfig: {
ai21labsApiKey: "string",
ai21labsApiKeyPlaintext: "string",
},
amazonBedrockConfig: {
awsRegion: "string",
bedrockProvider: "string",
awsAccessKeyId: "string",
awsAccessKeyIdPlaintext: "string",
awsSecretAccessKey: "string",
awsSecretAccessKeyPlaintext: "string",
},
anthropicConfig: {
anthropicApiKey: "string",
anthropicApiKeyPlaintext: "string",
},
cohereConfig: {
cohereApiBase: "string",
cohereApiKey: "string",
cohereApiKeyPlaintext: "string",
},
databricksModelServingConfig: {
databricksWorkspaceUrl: "string",
databricksApiToken: "string",
databricksApiTokenPlaintext: "string",
},
googleCloudVertexAiConfig: {
privateKey: "string",
privateKeyPlaintext: "string",
projectId: "string",
region: "string",
},
openaiConfig: {
microsoftEntraClientId: "string",
microsoftEntraClientSecret: "string",
microsoftEntraClientSecretPlaintext: "string",
microsoftEntraTenantId: "string",
openaiApiBase: "string",
openaiApiKey: "string",
openaiApiKeyPlaintext: "string",
openaiApiType: "string",
openaiApiVersion: "string",
openaiDeploymentName: "string",
openaiOrganization: "string",
},
palmConfig: {
palmApiKey: "string",
palmApiKeyPlaintext: "string",
},
},
instanceProfileArn: "string",
maxProvisionedThroughput: 0,
minProvisionedThroughput: 0,
name: "string",
scaleToZeroEnabled: false,
workloadSize: "string",
workloadType: "string",
}],
trafficConfig: {
routes: [{
servedModelName: "string",
trafficPercentage: 0,
}],
},
},
aiGateway: {
guardrails: {
input: {
invalidKeywords: ["string"],
pii: {
behavior: "string",
},
safety: false,
validTopics: ["string"],
},
output: {
invalidKeywords: ["string"],
pii: {
behavior: "string",
},
safety: false,
validTopics: ["string"],
},
},
inferenceTableConfig: {
catalogName: "string",
enabled: false,
schemaName: "string",
tableNamePrefix: "string",
},
rateLimits: [{
calls: 0,
renewalPeriod: "string",
key: "string",
}],
usageTrackingConfig: {
enabled: false,
},
},
name: "string",
rateLimits: [{
calls: 0,
renewalPeriod: "string",
key: "string",
}],
routeOptimized: false,
tags: [{
key: "string",
value: "string",
}],
});
type: databricks:ModelServing
properties:
aiGateway:
guardrails:
input:
invalidKeywords:
- string
pii:
behavior: string
safety: false
validTopics:
- string
output:
invalidKeywords:
- string
pii:
behavior: string
safety: false
validTopics:
- string
inferenceTableConfig:
catalogName: string
enabled: false
schemaName: string
tableNamePrefix: string
rateLimits:
- calls: 0
key: string
renewalPeriod: string
usageTrackingConfig:
enabled: false
config:
autoCaptureConfig:
catalogName: string
enabled: false
schemaName: string
tableNamePrefix: string
servedEntities:
- entityName: string
entityVersion: string
environmentVars:
string: string
externalModel:
ai21labsConfig:
ai21labsApiKey: string
ai21labsApiKeyPlaintext: string
amazonBedrockConfig:
awsAccessKeyId: string
awsAccessKeyIdPlaintext: string
awsRegion: string
awsSecretAccessKey: string
awsSecretAccessKeyPlaintext: string
bedrockProvider: string
anthropicConfig:
anthropicApiKey: string
anthropicApiKeyPlaintext: string
cohereConfig:
cohereApiBase: string
cohereApiKey: string
cohereApiKeyPlaintext: string
databricksModelServingConfig:
databricksApiToken: string
databricksApiTokenPlaintext: string
databricksWorkspaceUrl: string
googleCloudVertexAiConfig:
privateKey: string
privateKeyPlaintext: string
projectId: string
region: string
name: string
openaiConfig:
microsoftEntraClientId: string
microsoftEntraClientSecret: string
microsoftEntraClientSecretPlaintext: string
microsoftEntraTenantId: string
openaiApiBase: string
openaiApiKey: string
openaiApiKeyPlaintext: string
openaiApiType: string
openaiApiVersion: string
openaiDeploymentName: string
openaiOrganization: string
palmConfig:
palmApiKey: string
palmApiKeyPlaintext: string
provider: string
task: string
instanceProfileArn: string
maxProvisionedThroughput: 0
minProvisionedThroughput: 0
name: string
scaleToZeroEnabled: false
workloadSize: string
workloadType: string
trafficConfig:
routes:
- servedModelName: string
trafficPercentage: 0
name: string
rateLimits:
- calls: 0
key: string
renewalPeriod: string
routeOptimized: false
tags:
- key: string
value: string
ModelServing Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The ModelServing resource accepts the following input properties:
- Config
Model
Serving Config - The model serving endpoint configuration.
- Ai
Gateway ModelServing Ai Gateway - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- Rate
Limits List<ModelServing Rate Limit> - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- Config
Model
Serving Config Args - The model serving endpoint configuration.
- Ai
Gateway ModelServing Ai Gateway Args - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- Rate
Limits []ModelServing Rate Limit Args - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- []Model
Serving Tag Args - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config - The model serving endpoint configuration.
- ai
Gateway ModelServing Ai Gateway - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- rate
Limits List<ModelServing Rate Limit> - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config - The model serving endpoint configuration.
- ai
Gateway ModelServing Ai Gateway - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- rate
Limits ModelServing Rate Limit[] - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- route
Optimized boolean - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- Model
Serving Tag[] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config
Model
Serving Config Args - The model serving endpoint configuration.
- ai_
gateway ModelServing Ai Gateway Args - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- name str
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- rate_
limits Sequence[ModelServing Rate Limit Args] - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- route_
optimized bool - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- Sequence[Model
Serving Tag Args] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- config Property Map
- The model serving endpoint configuration.
- ai
Gateway Property Map - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- rate
Limits List<Property Map> - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- List<Property Map>
- Tags to be attached to the serving endpoint and automatically propagated to billing logs.
Outputs
All input properties are implicitly available as output properties. Additionally, the ModelServing resource produces the following output properties:
- Id string
- The provider-assigned unique ID for this managed resource.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- Id string
- The provider-assigned unique ID for this managed resource.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id String
- The provider-assigned unique ID for this managed resource.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id string
- The provider-assigned unique ID for this managed resource.
- serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id str
- The provider-assigned unique ID for this managed resource.
- serving_
endpoint_ strid - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- id String
- The provider-assigned unique ID for this managed resource.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
Look up Existing ModelServing Resource
Get an existing ModelServing resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: ModelServingState, opts?: CustomResourceOptions): ModelServing
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
ai_gateway: Optional[ModelServingAiGatewayArgs] = None,
config: Optional[ModelServingConfigArgs] = None,
name: Optional[str] = None,
rate_limits: Optional[Sequence[ModelServingRateLimitArgs]] = None,
route_optimized: Optional[bool] = None,
serving_endpoint_id: Optional[str] = None,
tags: Optional[Sequence[ModelServingTagArgs]] = None) -> ModelServing
func GetModelServing(ctx *Context, name string, id IDInput, state *ModelServingState, opts ...ResourceOption) (*ModelServing, error)
public static ModelServing Get(string name, Input<string> id, ModelServingState? state, CustomResourceOptions? opts = null)
public static ModelServing get(String name, Output<String> id, ModelServingState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Ai
Gateway ModelServing Ai Gateway - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- Config
Model
Serving Config - The model serving endpoint configuration.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- Rate
Limits List<ModelServing Rate Limit> - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- Ai
Gateway ModelServing Ai Gateway Args - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- Config
Model
Serving Config Args - The model serving endpoint configuration.
- Name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- Rate
Limits []ModelServing Rate Limit Args - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- Route
Optimized bool - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- Serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- []Model
Serving Tag Args - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- ai
Gateway ModelServing Ai Gateway - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- config
Model
Serving Config - The model serving endpoint configuration.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- rate
Limits List<ModelServing Rate Limit> - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- List<Model
Serving Tag> - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- ai
Gateway ModelServing Ai Gateway - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- config
Model
Serving Config - The model serving endpoint configuration.
- name string
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- rate
Limits ModelServing Rate Limit[] - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- route
Optimized boolean - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- serving
Endpoint stringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- Model
Serving Tag[] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- ai_
gateway ModelServing Ai Gateway Args - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- config
Model
Serving Config Args - The model serving endpoint configuration.
- name str
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- rate_
limits Sequence[ModelServing Rate Limit Args] - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- route_
optimized bool - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- serving_
endpoint_ strid - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- Sequence[Model
Serving Tag Args] - Tags to be attached to the serving endpoint and automatically propagated to billing logs.
- ai
Gateway Property Map - A block with AI Gateway configuration for the serving endpoint. Note: only external model endpoints are supported as of now.
- config Property Map
- The model serving endpoint configuration.
- name String
- The name of the model serving endpoint. This field is required and must be unique across a workspace. An endpoint name can consist of alphanumeric characters, dashes, and underscores. NOTE: Changing this name will delete the existing endpoint and create a new endpoint with the updated name.
- rate
Limits List<Property Map> - A list of rate limit blocks to be applied to the serving endpoint. Note: only external and foundation model endpoints are supported as of now.
- route
Optimized Boolean - A boolean enabling route optimization for the endpoint. Note: only available for custom models.
- serving
Endpoint StringId - Unique identifier of the serving endpoint primarily used to set permissions and refer to this instance for other operations.
- List<Property Map>
- Tags to be attached to the serving endpoint and automatically propagated to billing logs.
Supporting Types
ModelServingAiGateway, ModelServingAiGatewayArgs
- Guardrails
Model
Serving Ai Gateway Guardrails - Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
- Inference
Table ModelConfig Serving Ai Gateway Inference Table Config - Block describing the configuration of usage tracking. Consists of the following attributes:
- Rate
Limits List<ModelServing Ai Gateway Rate Limit> - Block describing rate limits for AI gateway. For details see the description of
rate_limits
block above. - Usage
Tracking ModelConfig Serving Ai Gateway Usage Tracking Config - Block with configuration for payload logging using inference tables. For details see the description of
auto_capture_config
block above.
- Guardrails
Model
Serving Ai Gateway Guardrails - Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
- Inference
Table ModelConfig Serving Ai Gateway Inference Table Config - Block describing the configuration of usage tracking. Consists of the following attributes:
- Rate
Limits []ModelServing Ai Gateway Rate Limit - Block describing rate limits for AI gateway. For details see the description of
rate_limits
block above. - Usage
Tracking ModelConfig Serving Ai Gateway Usage Tracking Config - Block with configuration for payload logging using inference tables. For details see the description of
auto_capture_config
block above.
- guardrails
Model
Serving Ai Gateway Guardrails - Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
- inference
Table ModelConfig Serving Ai Gateway Inference Table Config - Block describing the configuration of usage tracking. Consists of the following attributes:
- rate
Limits List<ModelServing Ai Gateway Rate Limit> - Block describing rate limits for AI gateway. For details see the description of
rate_limits
block above. - usage
Tracking ModelConfig Serving Ai Gateway Usage Tracking Config - Block with configuration for payload logging using inference tables. For details see the description of
auto_capture_config
block above.
- guardrails
Model
Serving Ai Gateway Guardrails - Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
- inference
Table ModelConfig Serving Ai Gateway Inference Table Config - Block describing the configuration of usage tracking. Consists of the following attributes:
- rate
Limits ModelServing Ai Gateway Rate Limit[] - Block describing rate limits for AI gateway. For details see the description of
rate_limits
block above. - usage
Tracking ModelConfig Serving Ai Gateway Usage Tracking Config - Block with configuration for payload logging using inference tables. For details see the description of
auto_capture_config
block above.
- guardrails
Model
Serving Ai Gateway Guardrails - Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
- inference_
table_ Modelconfig Serving Ai Gateway Inference Table Config - Block describing the configuration of usage tracking. Consists of the following attributes:
- rate_
limits Sequence[ModelServing Ai Gateway Rate Limit] - Block describing rate limits for AI gateway. For details see the description of
rate_limits
block above. - usage_
tracking_ Modelconfig Serving Ai Gateway Usage Tracking Config - Block with configuration for payload logging using inference tables. For details see the description of
auto_capture_config
block above.
- guardrails Property Map
- Block with configuration for AI Guardrails to prevent unwanted data and unsafe data in requests and responses. Consists of the following attributes:
- inference
Table Property MapConfig - Block describing the configuration of usage tracking. Consists of the following attributes:
- rate
Limits List<Property Map> - Block describing rate limits for AI gateway. For details see the description of
rate_limits
block above. - usage
Tracking Property MapConfig - Block with configuration for payload logging using inference tables. For details see the description of
auto_capture_config
block above.
ModelServingAiGatewayGuardrails, ModelServingAiGatewayGuardrailsArgs
- Input
Model
Serving Ai Gateway Guardrails Input - A block with configuration for input guardrail filters:
- Output
Model
Serving Ai Gateway Guardrails Output - A block with configuration for output guardrail filters. Has the same structure as
input
block.
- Input
Model
Serving Ai Gateway Guardrails Input Type - A block with configuration for input guardrail filters:
- Output
Model
Serving Ai Gateway Guardrails Output Type - A block with configuration for output guardrail filters. Has the same structure as
input
block.
- input
Model
Serving Ai Gateway Guardrails Input - A block with configuration for input guardrail filters:
- output
Model
Serving Ai Gateway Guardrails Output - A block with configuration for output guardrail filters. Has the same structure as
input
block.
- input
Model
Serving Ai Gateway Guardrails Input - A block with configuration for input guardrail filters:
- output
Model
Serving Ai Gateway Guardrails Output - A block with configuration for output guardrail filters. Has the same structure as
input
block.
- input
Model
Serving Ai Gateway Guardrails Input - A block with configuration for input guardrail filters:
- output
Model
Serving Ai Gateway Guardrails Output - A block with configuration for output guardrail filters. Has the same structure as
input
block.
- input Property Map
- A block with configuration for input guardrail filters:
- output Property Map
- A block with configuration for output guardrail filters. Has the same structure as
input
block.
ModelServingAiGatewayGuardrailsInput, ModelServingAiGatewayGuardrailsInputArgs
- Invalid
Keywords List<string> - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- Pii
Model
Serving Ai Gateway Guardrails Input Pii - Block with configuration for guardrail PII filter:
- Safety bool
- the boolean flag that indicates whether the safety filter is enabled.
- Valid
Topics List<string> - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- Invalid
Keywords []string - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- Pii
Model
Serving Ai Gateway Guardrails Input Pii - Block with configuration for guardrail PII filter:
- Safety bool
- the boolean flag that indicates whether the safety filter is enabled.
- Valid
Topics []string - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- invalid
Keywords List<String> - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- pii
Model
Serving Ai Gateway Guardrails Input Pii - Block with configuration for guardrail PII filter:
- safety Boolean
- the boolean flag that indicates whether the safety filter is enabled.
- valid
Topics List<String> - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- invalid
Keywords string[] - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- pii
Model
Serving Ai Gateway Guardrails Input Pii - Block with configuration for guardrail PII filter:
- safety boolean
- the boolean flag that indicates whether the safety filter is enabled.
- valid
Topics string[] - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- invalid_
keywords Sequence[str] - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- pii
Model
Serving Ai Gateway Guardrails Input Pii - Block with configuration for guardrail PII filter:
- safety bool
- the boolean flag that indicates whether the safety filter is enabled.
- valid_
topics Sequence[str] - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- invalid
Keywords List<String> - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- pii Property Map
- Block with configuration for guardrail PII filter:
- safety Boolean
- the boolean flag that indicates whether the safety filter is enabled.
- valid
Topics List<String> - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
ModelServingAiGatewayGuardrailsInputPii, ModelServingAiGatewayGuardrailsInputPiiArgs
- Behavior string
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- Behavior string
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- behavior String
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- behavior string
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- behavior str
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- behavior String
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
ModelServingAiGatewayGuardrailsOutput, ModelServingAiGatewayGuardrailsOutputArgs
- Invalid
Keywords List<string> - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- Pii
Model
Serving Ai Gateway Guardrails Output Pii - Block with configuration for guardrail PII filter:
- Safety bool
- the boolean flag that indicates whether the safety filter is enabled.
- Valid
Topics List<string> - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- Invalid
Keywords []string - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- Pii
Model
Serving Ai Gateway Guardrails Output Pii - Block with configuration for guardrail PII filter:
- Safety bool
- the boolean flag that indicates whether the safety filter is enabled.
- Valid
Topics []string - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- invalid
Keywords List<String> - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- pii
Model
Serving Ai Gateway Guardrails Output Pii - Block with configuration for guardrail PII filter:
- safety Boolean
- the boolean flag that indicates whether the safety filter is enabled.
- valid
Topics List<String> - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- invalid
Keywords string[] - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- pii
Model
Serving Ai Gateway Guardrails Output Pii - Block with configuration for guardrail PII filter:
- safety boolean
- the boolean flag that indicates whether the safety filter is enabled.
- valid
Topics string[] - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- invalid_
keywords Sequence[str] - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- pii
Model
Serving Ai Gateway Guardrails Output Pii - Block with configuration for guardrail PII filter:
- safety bool
- the boolean flag that indicates whether the safety filter is enabled.
- valid_
topics Sequence[str] - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
- invalid
Keywords List<String> - List of invalid keywords. AI guardrail uses keyword or string matching to decide if the keyword exists in the request or response content.
- pii Property Map
- Block with configuration for guardrail PII filter:
- safety Boolean
- the boolean flag that indicates whether the safety filter is enabled.
- valid
Topics List<String> - The list of allowed topics. Given a chat request, this guardrail flags the request if its topic is not in the allowed topics.
ModelServingAiGatewayGuardrailsOutputPii, ModelServingAiGatewayGuardrailsOutputPiiArgs
- Behavior string
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- Behavior string
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- behavior String
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- behavior string
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- behavior str
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
- behavior String
- a string that describes the behavior for PII filter. Currently only
BLOCK
value is supported.
ModelServingAiGatewayInferenceTableConfig, ModelServingAiGatewayInferenceTableConfigArgs
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- boolean flag specifying if usage tracking is enabled.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- boolean flag specifying if usage tracking is enabled.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- boolean flag specifying if usage tracking is enabled.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled boolean
- boolean flag specifying if usage tracking is enabled.
- schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog_
name str - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled bool
- boolean flag specifying if usage tracking is enabled.
- schema_
name str - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table_
name_ strprefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- boolean flag specifying if usage tracking is enabled.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
ModelServingAiGatewayRateLimit, ModelServingAiGatewayRateLimitArgs
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Integer
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal_
period str - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key str
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
ModelServingAiGatewayUsageTrackingConfig, ModelServingAiGatewayUsageTrackingConfigArgs
- Enabled bool
- Enabled bool
- enabled Boolean
- enabled boolean
- enabled bool
- enabled Boolean
ModelServingConfig, ModelServingConfigArgs
- Auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- Served
Entities List<ModelServing Config Served Entity> - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- Served
Models List<ModelServing Config Served Model> - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- Traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- Auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- Served
Entities []ModelServing Config Served Entity - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- Served
Models []ModelServing Config Served Model - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- Traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served
Entities List<ModelServing Config Served Entity> - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served
Models List<ModelServing Config Served Model> - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto
Capture ModelConfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served
Entities ModelServing Config Served Entity[] - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served
Models ModelServing Config Served Model[] - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto_
capture_ Modelconfig Serving Config Auto Capture Config - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served_
entities Sequence[ModelServing Config Served Entity] - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served_
models Sequence[ModelServing Config Served Model] - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic_
config ModelServing Config Traffic Config - A single block represents the traffic split configuration amongst the served models.
- auto
Capture Property MapConfig - Configuration for Inference Tables which automatically logs requests and responses to Unity Catalog.
- served
Entities List<Property Map> - A list of served entities for the endpoint to serve. A serving endpoint can have up to 10 served entities.
- served
Models List<Property Map> - Each block represents a served model for the endpoint to serve. A model serving endpoint can have up to 10 served models.
- traffic
Config Property Map - A single block represents the traffic split configuration amongst the served models.
ModelServingConfigAutoCaptureConfig, ModelServingConfigAutoCaptureConfigArgs
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- Catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- Enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
- Schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- Table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name string - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
- schema
Name string - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name stringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog_
name str - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled bool
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
- schema_
name str - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table_
name_ strprefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
- catalog
Name String - The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.
- enabled Boolean
- If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable it again.
- schema
Name String - The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.
- table
Name StringPrefix - The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.
ModelServingConfigServedEntity, ModelServingConfigServedEntityArgs
- Entity
Name string - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - Entity
Version string - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - Environment
Vars Dictionary<string, string> - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- External
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. An existing endpoint withexternal_model
can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - Instance
Profile stringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - Scale
To boolZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- Workload
Size string - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - Workload
Type string - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- Entity
Name string - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - Entity
Version string - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - Environment
Vars map[string]string - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- External
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. An existing endpoint withexternal_model
can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - Instance
Profile stringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - Scale
To boolZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- Workload
Size string - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - Workload
Type string - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity
Name String - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity
Version String - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment
Vars Map<String,String> - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. An existing endpoint withexternal_model
can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance
Profile StringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- max
Provisioned IntegerThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned IntegerThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale
To BooleanZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- workload
Size String - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload
Type String - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity
Name string - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity
Version string - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment
Vars {[key: string]: string} - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external
Model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. An existing endpoint withexternal_model
can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance
Profile stringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- max
Provisioned numberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned numberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name string
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale
To booleanZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- workload
Size string - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload
Type string - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity_
name str - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity_
version str - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment_
vars Mapping[str, str] - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external_
model ModelServing Config Served Entity External Model - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. An existing endpoint withexternal_model
can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance_
profile_ strarn - ARN of the instance profile that the served entity uses to access AWS resources.
- max_
provisioned_ intthroughput - The maximum tokens per second that the endpoint can scale up to.
- min_
provisioned_ intthroughput - The minimum tokens per second that the endpoint can scale down to.
- name str
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale_
to_ boolzero_ enabled - Whether the compute resources for the served entity should scale down to zero.
- workload_
size str - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload_
type str - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
- entity
Name String - The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC), or a function of type
FEATURE_SPEC
in the UC. If it is a UC object, the full name of the object should be given in the form ofcatalog_name.schema_name.model_name
. - entity
Version String - The version of the model in Databricks Model Registry to be served or empty if the entity is a
FEATURE_SPEC
. - environment
Vars Map<String> - An object containing a set of optional, user-specified environment variable key-value pairs used for serving this entity. Note: this is an experimental feature and is subject to change. Example entity environment variables that refer to Databricks secrets:
{"OPENAI_API_KEY": "{{secrets/my_scope/my_key}}", "DATABRICKS_TOKEN": "{{secrets/my_scope2/my_key2}}"}
- external
Model Property Map - The external model to be served. NOTE: Only one of
external_model
and (entity_name
,entity_version
,workload_size
,workload_type
, andscale_to_zero_enabled
) can be specified with the latter set being used for custom model serving for a Databricks registered model. When anexternal_model
is present, the served entities list can only have oneserved_entity
object. An existing endpoint withexternal_model
can not be updated to an endpoint withoutexternal_model
. If the endpoint is created withoutexternal_model
, users cannot update it to addexternal_model
later. - instance
Profile StringArn - ARN of the instance profile that the served entity uses to access AWS resources.
- max
Provisioned NumberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned NumberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores. If not specified for an external model, this field defaults to
external_model.name
, with '.' and ':' replaced with '-', and if not specified for other entities, it defaults to -. - scale
To BooleanZero Enabled - Whether the compute resources for the served entity should scale down to zero.
- workload
Size String - The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). Ifscale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size is 0. - workload
Type String - The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is
CPU
. For deep learning workloads, GPU acceleration is available by selecting workload types likeGPU_SMALL
and others. See the available GPU types.
ModelServingConfigServedEntityExternalModel, ModelServingConfigServedEntityExternalModelArgs
- Name string
- The name of the external model.
- Provider string
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,google-cloud-vertex-ai
,openai
, andpalm
. - Task string
- The task type of the external model.
- Ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- Amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- Anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- Cohere
Config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- Databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- Google
Cloud ModelVertex Ai Config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - Google Cloud Vertex AI Config.
- Openai
Config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- Palm
Config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- Name string
- The name of the external model.
- Provider string
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,google-cloud-vertex-ai
,openai
, andpalm
. - Task string
- The task type of the external model.
- Ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- Amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- Anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- Cohere
Config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- Databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- Google
Cloud ModelVertex Ai Config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - Google Cloud Vertex AI Config.
- Openai
Config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- Palm
Config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- name String
- The name of the external model.
- provider String
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,google-cloud-vertex-ai
,openai
, andpalm
. - task String
- The task type of the external model.
- ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- cohere
Config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- google
Cloud ModelVertex Ai Config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - Google Cloud Vertex AI Config.
- openai
Config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- palm
Config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- name string
- The name of the external model.
- provider string
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,google-cloud-vertex-ai
,openai
, andpalm
. - task string
- The task type of the external model.
- ai21labs
Config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- amazon
Bedrock ModelConfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- anthropic
Config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- cohere
Config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- databricks
Model ModelServing Config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- google
Cloud ModelVertex Ai Config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - Google Cloud Vertex AI Config.
- openai
Config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- palm
Config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- name str
- The name of the external model.
- provider str
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,google-cloud-vertex-ai
,openai
, andpalm
. - task str
- The task type of the external model.
- ai21labs_
config ModelServing Config Served Entity External Model Ai21labs Config - AI21Labs Config
- amazon_
bedrock_ Modelconfig Serving Config Served Entity External Model Amazon Bedrock Config - Amazon Bedrock Config
- anthropic_
config ModelServing Config Served Entity External Model Anthropic Config - Anthropic Config
- cohere_
config ModelServing Config Served Entity External Model Cohere Config - Cohere Config
- databricks_
model_ Modelserving_ config Serving Config Served Entity External Model Databricks Model Serving Config - Databricks Model Serving Config
- google_
cloud_ Modelvertex_ ai_ config Serving Config Served Entity External Model Google Cloud Vertex Ai Config - Google Cloud Vertex AI Config.
- openai_
config ModelServing Config Served Entity External Model Openai Config - OpenAI Config
- palm_
config ModelServing Config Served Entity External Model Palm Config - PaLM Config
- name String
- The name of the external model.
- provider String
- The name of the provider for the external model. Currently, the supported providers are
ai21labs
,anthropic
,amazon-bedrock
,cohere
,databricks-model-serving
,google-cloud-vertex-ai
,openai
, andpalm
. - task String
- The task type of the external model.
- ai21labs
Config Property Map - AI21Labs Config
- amazon
Bedrock Property MapConfig - Amazon Bedrock Config
- anthropic
Config Property Map - Anthropic Config
- cohere
Config Property Map - Cohere Config
- databricks
Model Property MapServing Config - Databricks Model Serving Config
- google
Cloud Property MapVertex Ai Config - Google Cloud Vertex AI Config.
- openai
Config Property Map - OpenAI Config
- palm
Config Property Map - PaLM Config
ModelServingConfigServedEntityExternalModelAi21labsConfig, ModelServingConfigServedEntityExternalModelAi21labsConfigArgs
- Ai21labs
Api stringKey - The Databricks secret key reference for an AI21Labs API key.
- Ai21labs
Api stringKey Plaintext - An AI21 Labs API key provided as a plaintext string.
- Ai21labs
Api stringKey - The Databricks secret key reference for an AI21Labs API key.
- Ai21labs
Api stringKey Plaintext - An AI21 Labs API key provided as a plaintext string.
- ai21labs
Api StringKey - The Databricks secret key reference for an AI21Labs API key.
- ai21labs
Api StringKey Plaintext - An AI21 Labs API key provided as a plaintext string.
- ai21labs
Api stringKey - The Databricks secret key reference for an AI21Labs API key.
- ai21labs
Api stringKey Plaintext - An AI21 Labs API key provided as a plaintext string.
- ai21labs_
api_ strkey - The Databricks secret key reference for an AI21Labs API key.
- ai21labs_
api_ strkey_ plaintext - An AI21 Labs API key provided as a plaintext string.
- ai21labs
Api StringKey - The Databricks secret key reference for an AI21Labs API key.
- ai21labs
Api StringKey Plaintext - An AI21 Labs API key provided as a plaintext string.
ModelServingConfigServedEntityExternalModelAmazonBedrockConfig, ModelServingConfigServedEntityExternalModelAmazonBedrockConfigArgs
- Aws
Region string - The AWS region to use. Bedrock has to be enabled there.
- Bedrock
Provider string - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - Aws
Access stringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- Aws
Access stringKey Id Plaintext - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
- Aws
Secret stringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- Aws
Secret stringAccess Key Plaintext - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.
- Aws
Region string - The AWS region to use. Bedrock has to be enabled there.
- Bedrock
Provider string - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - Aws
Access stringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- Aws
Access stringKey Id Plaintext - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
- Aws
Secret stringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- Aws
Secret stringAccess Key Plaintext - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.
- aws
Region String - The AWS region to use. Bedrock has to be enabled there.
- bedrock
Provider String - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - aws
Access StringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- aws
Access StringKey Id Plaintext - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
- aws
Secret StringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- aws
Secret StringAccess Key Plaintext - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.
- aws
Region string - The AWS region to use. Bedrock has to be enabled there.
- bedrock
Provider string - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - aws
Access stringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- aws
Access stringKey Id Plaintext - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
- aws
Secret stringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- aws
Secret stringAccess Key Plaintext - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.
- aws_
region str - The AWS region to use. Bedrock has to be enabled there.
- bedrock_
provider str - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - aws_
access_ strkey_ id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- aws_
access_ strkey_ id_ plaintext - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
- aws_
secret_ straccess_ key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- aws_
secret_ straccess_ key_ plaintext - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.
- aws
Region String - The AWS region to use. Bedrock has to be enabled there.
- bedrock
Provider String - The underlying provider in Amazon Bedrock. Supported values (case insensitive) include:
Anthropic
,Cohere
,AI21Labs
,Amazon
. - aws
Access StringKey Id - The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.
- aws
Access StringKey Id Plaintext - An AWS access key ID with permissions to interact with Bedrock services provided as a plaintext string.
- aws
Secret StringAccess Key - The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.
- aws
Secret StringAccess Key Plaintext - An AWS secret access key paired with the access key ID, with permissions to interact with Bedrock services provided as a plaintext string.
ModelServingConfigServedEntityExternalModelAnthropicConfig, ModelServingConfigServedEntityExternalModelAnthropicConfigArgs
- Anthropic
Api stringKey - The Databricks secret key reference for an Anthropic API key.
- Anthropic
Api stringKey Plaintext - The Anthropic API key provided as a plaintext string.
- Anthropic
Api stringKey - The Databricks secret key reference for an Anthropic API key.
- Anthropic
Api stringKey Plaintext - The Anthropic API key provided as a plaintext string.
- anthropic
Api StringKey - The Databricks secret key reference for an Anthropic API key.
- anthropic
Api StringKey Plaintext - The Anthropic API key provided as a plaintext string.
- anthropic
Api stringKey - The Databricks secret key reference for an Anthropic API key.
- anthropic
Api stringKey Plaintext - The Anthropic API key provided as a plaintext string.
- anthropic_
api_ strkey - The Databricks secret key reference for an Anthropic API key.
- anthropic_
api_ strkey_ plaintext - The Anthropic API key provided as a plaintext string.
- anthropic
Api StringKey - The Databricks secret key reference for an Anthropic API key.
- anthropic
Api StringKey Plaintext - The Anthropic API key provided as a plaintext string.
ModelServingConfigServedEntityExternalModelCohereConfig, ModelServingConfigServedEntityExternalModelCohereConfigArgs
- Cohere
Api stringBase - Cohere
Api stringKey - The Databricks secret key reference for a Cohere API key.
- Cohere
Api stringKey Plaintext - The Cohere API key provided as a plaintext string.
- Cohere
Api stringBase - Cohere
Api stringKey - The Databricks secret key reference for a Cohere API key.
- Cohere
Api stringKey Plaintext - The Cohere API key provided as a plaintext string.
- cohere
Api StringBase - cohere
Api StringKey - The Databricks secret key reference for a Cohere API key.
- cohere
Api StringKey Plaintext - The Cohere API key provided as a plaintext string.
- cohere
Api stringBase - cohere
Api stringKey - The Databricks secret key reference for a Cohere API key.
- cohere
Api stringKey Plaintext - The Cohere API key provided as a plaintext string.
- cohere_
api_ strbase - cohere_
api_ strkey - The Databricks secret key reference for a Cohere API key.
- cohere_
api_ strkey_ plaintext - The Cohere API key provided as a plaintext string.
- cohere
Api StringBase - cohere
Api StringKey - The Databricks secret key reference for a Cohere API key.
- cohere
Api StringKey Plaintext - The Cohere API key provided as a plaintext string.
ModelServingConfigServedEntityExternalModelDatabricksModelServingConfig, ModelServingConfigServedEntityExternalModelDatabricksModelServingConfigArgs
- Databricks
Workspace stringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- Databricks
Api stringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- Databricks
Api stringToken Plaintext - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.
- Databricks
Workspace stringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- Databricks
Api stringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- Databricks
Api stringToken Plaintext - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.
- databricks
Workspace StringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- databricks
Api StringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- databricks
Api StringToken Plaintext - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.
- databricks
Workspace stringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- databricks
Api stringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- databricks
Api stringToken Plaintext - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.
- databricks_
workspace_ strurl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- databricks_
api_ strtoken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- databricks_
api_ strtoken_ plaintext - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.
- databricks
Workspace StringUrl - The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.
- databricks
Api StringToken - The Databricks secret key reference for a Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model.
- databricks
Api StringToken Plaintext - The Databricks API token that corresponds to a user or service principal with Can Query access to the model serving endpoint pointed to by this external model provided as a plaintext string.
ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfig, ModelServingConfigServedEntityExternalModelGoogleCloudVertexAiConfigArgs
- Private
Key string - The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
- Private
Key stringPlaintext - The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
- Project
Id string - This is the Google Cloud project id that the service account is associated with.
- Region string
- This is the region for the Google Cloud Vertex AI Service.
- Private
Key string - The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
- Private
Key stringPlaintext - The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
- Project
Id string - This is the Google Cloud project id that the service account is associated with.
- Region string
- This is the region for the Google Cloud Vertex AI Service.
- private
Key String - The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
- private
Key StringPlaintext - The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
- project
Id String - This is the Google Cloud project id that the service account is associated with.
- region String
- This is the region for the Google Cloud Vertex AI Service.
- private
Key string - The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
- private
Key stringPlaintext - The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
- project
Id string - This is the Google Cloud project id that the service account is associated with.
- region string
- This is the region for the Google Cloud Vertex AI Service.
- private_
key str - The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
- private_
key_ strplaintext - The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
- project_
id str - This is the Google Cloud project id that the service account is associated with.
- region str
- This is the region for the Google Cloud Vertex AI Service.
- private
Key String - The Databricks secret key reference for a private key for the service account that has access to the Google Cloud Vertex AI Service.
- private
Key StringPlaintext - The private key for the service account that has access to the Google Cloud Vertex AI Service is provided as a plaintext secret.
- project
Id String - This is the Google Cloud project id that the service account is associated with.
- region String
- This is the region for the Google Cloud Vertex AI Service.
ModelServingConfigServedEntityExternalModelOpenaiConfig, ModelServingConfigServedEntityExternalModelOpenaiConfigArgs
- Microsoft
Entra stringClient Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
- Microsoft
Entra stringClient Secret - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
- Microsoft
Entra stringClient Secret Plaintext - The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
- Microsoft
Entra stringTenant Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
- Openai
Api stringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
- Openai
Api stringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- Openai
Api stringKey Plaintext - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
- Openai
Api stringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use
azure
. For authentication using Azure Active Directory (Azure AD) use,azuread
. - Openai
Api stringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
- Openai
Deployment stringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- Openai
Organization string - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- Microsoft
Entra stringClient Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
- Microsoft
Entra stringClient Secret - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
- Microsoft
Entra stringClient Secret Plaintext - The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
- Microsoft
Entra stringTenant Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
- Openai
Api stringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
- Openai
Api stringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- Openai
Api stringKey Plaintext - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
- Openai
Api stringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use
azure
. For authentication using Azure Active Directory (Azure AD) use,azuread
. - Openai
Api stringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
- Openai
Deployment stringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- Openai
Organization string - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- microsoft
Entra StringClient Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
- microsoft
Entra StringClient Secret - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
- microsoft
Entra StringClient Secret Plaintext - The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
- microsoft
Entra StringTenant Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
- openai
Api StringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
- openai
Api StringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- openai
Api StringKey Plaintext - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
- openai
Api StringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use
azure
. For authentication using Azure Active Directory (Azure AD) use,azuread
. - openai
Api StringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
- openai
Deployment StringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- openai
Organization String - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- microsoft
Entra stringClient Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
- microsoft
Entra stringClient Secret - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
- microsoft
Entra stringClient Secret Plaintext - The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
- microsoft
Entra stringTenant Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
- openai
Api stringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
- openai
Api stringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- openai
Api stringKey Plaintext - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
- openai
Api stringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use
azure
. For authentication using Azure Active Directory (Azure AD) use,azuread
. - openai
Api stringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
- openai
Deployment stringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- openai
Organization string - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- microsoft_
entra_ strclient_ id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
- microsoft_
entra_ strclient_ secret - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
- microsoft_
entra_ strclient_ secret_ plaintext - The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
- microsoft_
entra_ strtenant_ id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
- openai_
api_ strbase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
- openai_
api_ strkey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- openai_
api_ strkey_ plaintext - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
- openai_
api_ strtype - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use
azure
. For authentication using Azure Active Directory (Azure AD) use,azuread
. - openai_
api_ strversion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
- openai_
deployment_ strname - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- openai_
organization str - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
- microsoft
Entra StringClient Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.
- microsoft
Entra StringClient Secret - The Databricks secret key reference for a client secret used for Microsoft Entra ID authentication.
- microsoft
Entra StringClient Secret Plaintext - The client secret used for Microsoft Entra ID authentication provided as a plaintext string.
- microsoft
Entra StringTenant Id - This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.
- openai
Api StringBase - This is the base URL for the OpenAI API (default: "https://api.openai.com/v1"). For Azure OpenAI, this field is required and is the base URL for the Azure OpenAI API service provided by Azure.
- openai
Api StringKey - The Databricks secret key reference for an OpenAI or Azure OpenAI API key.
- openai
Api StringKey Plaintext - The OpenAI API key using the OpenAI or Azure service provided as a plaintext string.
- openai
Api StringType - This is an optional field to specify the type of OpenAI API to use. For Azure OpenAI, this field is required, and this parameter represents the preferred security access validation protocol. For access token validation, use
azure
. For authentication using Azure Active Directory (Azure AD) use,azuread
. - openai
Api StringVersion - This is an optional field to specify the OpenAI API version. For Azure OpenAI, this field is required and is the version of the Azure OpenAI service to utilize, specified by a date.
- openai
Deployment StringName - This field is only required for Azure OpenAI and is the name of the deployment resource for the Azure OpenAI service.
- openai
Organization String - This is an optional field to specify the organization in OpenAI or Azure OpenAI.
ModelServingConfigServedEntityExternalModelPalmConfig, ModelServingConfigServedEntityExternalModelPalmConfigArgs
- Palm
Api stringKey - The Databricks secret key reference for a PaLM API key.
- Palm
Api stringKey Plaintext - The PaLM API key provided as a plaintext string.
- Palm
Api stringKey - The Databricks secret key reference for a PaLM API key.
- Palm
Api stringKey Plaintext - The PaLM API key provided as a plaintext string.
- palm
Api StringKey - The Databricks secret key reference for a PaLM API key.
- palm
Api StringKey Plaintext - The PaLM API key provided as a plaintext string.
- palm
Api stringKey - The Databricks secret key reference for a PaLM API key.
- palm
Api stringKey Plaintext - The PaLM API key provided as a plaintext string.
- palm_
api_ strkey - The Databricks secret key reference for a PaLM API key.
- palm_
api_ strkey_ plaintext - The PaLM API key provided as a plaintext string.
- palm
Api StringKey - The Databricks secret key reference for a PaLM API key.
- palm
Api StringKey Plaintext - The PaLM API key provided as a plaintext string.
ModelServingConfigServedModel, ModelServingConfigServedModelArgs
- Model
Name string - The name of the model in Databricks Model Registry to be served.
- Model
Version string - The version of the model in Databricks Model Registry to be served.
- Environment
Vars Dictionary<string, string> - a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - Instance
Profile stringArn - ARN of the instance profile that the served model will use to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - Scale
To boolZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - Workload
Size string - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - Workload
Type string - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See the documentation for all options. The default value isCPU
.
- Model
Name string - The name of the model in Databricks Model Registry to be served.
- Model
Version string - The version of the model in Databricks Model Registry to be served.
- Environment
Vars map[string]string - a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - Instance
Profile stringArn - ARN of the instance profile that the served model will use to access AWS resources.
- Max
Provisioned intThroughput - The maximum tokens per second that the endpoint can scale up to.
- Min
Provisioned intThroughput - The minimum tokens per second that the endpoint can scale down to.
- Name string
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - Scale
To boolZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - Workload
Size string - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - Workload
Type string - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See the documentation for all options. The default value isCPU
.
- model
Name String - The name of the model in Databricks Model Registry to be served.
- model
Version String - The version of the model in Databricks Model Registry to be served.
- environment
Vars Map<String,String> - a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance
Profile StringArn - ARN of the instance profile that the served model will use to access AWS resources.
- max
Provisioned IntegerThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned IntegerThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale
To BooleanZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload
Size String - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - workload
Type String - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See the documentation for all options. The default value isCPU
.
- model
Name string - The name of the model in Databricks Model Registry to be served.
- model
Version string - The version of the model in Databricks Model Registry to be served.
- environment
Vars {[key: string]: string} - a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance
Profile stringArn - ARN of the instance profile that the served model will use to access AWS resources.
- max
Provisioned numberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned numberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name string
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale
To booleanZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload
Size string - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - workload
Type string - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See the documentation for all options. The default value isCPU
.
- model_
name str - The name of the model in Databricks Model Registry to be served.
- model_
version str - The version of the model in Databricks Model Registry to be served.
- environment_
vars Mapping[str, str] - a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance_
profile_ strarn - ARN of the instance profile that the served model will use to access AWS resources.
- max_
provisioned_ intthroughput - The maximum tokens per second that the endpoint can scale up to.
- min_
provisioned_ intthroughput - The minimum tokens per second that the endpoint can scale down to.
- name str
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale_
to_ boolzero_ enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload_
size str - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - workload_
type str - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See the documentation for all options. The default value isCPU
.
- model
Name String - The name of the model in Databricks Model Registry to be served.
- model
Version String - The version of the model in Databricks Model Registry to be served.
- environment
Vars Map<String> - a map of environment variable names/values that will be used for serving this model. Environment variables may refer to Databricks secrets using the standard syntax:
{{secrets/secret_scope/secret_key}}
. - instance
Profile StringArn - ARN of the instance profile that the served model will use to access AWS resources.
- max
Provisioned NumberThroughput - The maximum tokens per second that the endpoint can scale up to.
- min
Provisioned NumberThroughput - The minimum tokens per second that the endpoint can scale down to.
- name String
- The name of a served model. It must be unique across an endpoint. If not specified, this field will default to
modelname-modelversion
. A served model name can consist of alphanumeric characters, dashes, and underscores. - scale
To BooleanZero Enabled - Whether the compute resources for the served model should scale down to zero. If
scale-to-zero
is enabled, the lower bound of the provisioned concurrency for each workload size will be 0. The default value istrue
. - workload
Size String - The workload size of the served model. The workload size corresponds to a range of provisioned concurrency that the compute will autoscale between. A single unit of provisioned concurrency can process one request at a time. Valid workload sizes are
Small
(4 - 4 provisioned concurrency),Medium
(8 - 16 provisioned concurrency), andLarge
(16 - 64 provisioned concurrency). - workload
Type String - The workload type of the served model. The workload type selects which type of compute to use in the endpoint. For deep learning workloads, GPU acceleration is available by selecting workload types like
GPU_SMALL
and others. See the documentation for all options. The default value isCPU
.
ModelServingConfigTrafficConfig, ModelServingConfigTrafficConfigArgs
- Routes
List<Model
Serving Config Traffic Config Route> - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- Routes
[]Model
Serving Config Traffic Config Route - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes
List<Model
Serving Config Traffic Config Route> - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes
Model
Serving Config Traffic Config Route[] - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes
Sequence[Model
Serving Config Traffic Config Route] - Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
- routes List<Property Map>
- Each block represents a route that defines traffic to each served entity. Each
served_entity
block needs to have a correspondingroutes
block.
ModelServingConfigTrafficConfigRoute, ModelServingConfigTrafficConfigRouteArgs
- Served
Model stringName - Traffic
Percentage int - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- Served
Model stringName - Traffic
Percentage int - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model StringName - traffic
Percentage Integer - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model stringName - traffic
Percentage number - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served_
model_ strname - traffic_
percentage int - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
- served
Model StringName - traffic
Percentage Number - The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.
ModelServingRateLimit, ModelServingRateLimitArgs
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- Calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- Renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - Key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Integer
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period string - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key string
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls int
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal_
period str - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key str
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
- calls Number
- Used to specify how many calls are allowed for a key within the renewal_period.
- renewal
Period String - Renewal period field for a serving endpoint rate limit. Currently, only
minute
is supported. - key String
- Key field for a serving endpoint rate limit. Currently, only
user
andendpoint
are supported, withendpoint
being the default if not specified.
ModelServingTag, ModelServingTagArgs
Import
The model serving resource can be imported using the name of the endpoint.
bash
$ pulumi import databricks:index/modelServing:ModelServing this <model-serving-endpoint-name>
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- databricks pulumi/pulumi-databricks
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
databricks
Terraform Provider.