1. Packages
  2. AWS
  3. API Docs
  4. sagemaker
  5. EndpointConfiguration
AWS v6.60.0 published on Tuesday, Nov 19, 2024 by Pulumi

aws.sagemaker.EndpointConfiguration

Explore with Pulumi AI

aws logo
AWS v6.60.0 published on Tuesday, Nov 19, 2024 by Pulumi

    Provides a SageMaker endpoint configuration resource.

    Example Usage

    Basic usage:

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const ec = new aws.sagemaker.EndpointConfiguration("ec", {
        name: "my-endpoint-config",
        productionVariants: [{
            variantName: "variant-1",
            modelName: m.name,
            initialInstanceCount: 1,
            instanceType: "ml.t2.medium",
        }],
        tags: {
            Name: "foo",
        },
    });
    
    import pulumi
    import pulumi_aws as aws
    
    ec = aws.sagemaker.EndpointConfiguration("ec",
        name="my-endpoint-config",
        production_variants=[{
            "variant_name": "variant-1",
            "model_name": m["name"],
            "initial_instance_count": 1,
            "instance_type": "ml.t2.medium",
        }],
        tags={
            "Name": "foo",
        })
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/sagemaker"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := sagemaker.NewEndpointConfiguration(ctx, "ec", &sagemaker.EndpointConfigurationArgs{
    			Name: pulumi.String("my-endpoint-config"),
    			ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
    				&sagemaker.EndpointConfigurationProductionVariantArgs{
    					VariantName:          pulumi.String("variant-1"),
    					ModelName:            pulumi.Any(m.Name),
    					InitialInstanceCount: pulumi.Int(1),
    					InstanceType:         pulumi.String("ml.t2.medium"),
    				},
    			},
    			Tags: pulumi.StringMap{
    				"Name": pulumi.String("foo"),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var ec = new Aws.Sagemaker.EndpointConfiguration("ec", new()
        {
            Name = "my-endpoint-config",
            ProductionVariants = new[]
            {
                new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
                {
                    VariantName = "variant-1",
                    ModelName = m.Name,
                    InitialInstanceCount = 1,
                    InstanceType = "ml.t2.medium",
                },
            },
            Tags = 
            {
                { "Name", "foo" },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.sagemaker.EndpointConfiguration;
    import com.pulumi.aws.sagemaker.EndpointConfigurationArgs;
    import com.pulumi.aws.sagemaker.inputs.EndpointConfigurationProductionVariantArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var ec = new EndpointConfiguration("ec", EndpointConfigurationArgs.builder()
                .name("my-endpoint-config")
                .productionVariants(EndpointConfigurationProductionVariantArgs.builder()
                    .variantName("variant-1")
                    .modelName(m.name())
                    .initialInstanceCount(1)
                    .instanceType("ml.t2.medium")
                    .build())
                .tags(Map.of("Name", "foo"))
                .build());
    
        }
    }
    
    resources:
      ec:
        type: aws:sagemaker:EndpointConfiguration
        properties:
          name: my-endpoint-config
          productionVariants:
            - variantName: variant-1
              modelName: ${m.name}
              initialInstanceCount: 1
              instanceType: ml.t2.medium
          tags:
            Name: foo
    

    Create EndpointConfiguration Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new EndpointConfiguration(name: string, args: EndpointConfigurationArgs, opts?: CustomResourceOptions);
    @overload
    def EndpointConfiguration(resource_name: str,
                              args: EndpointConfigurationArgs,
                              opts: Optional[ResourceOptions] = None)
    
    @overload
    def EndpointConfiguration(resource_name: str,
                              opts: Optional[ResourceOptions] = None,
                              production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
                              async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
                              data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
                              kms_key_arn: Optional[str] = None,
                              name: Optional[str] = None,
                              name_prefix: Optional[str] = None,
                              shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
                              tags: Optional[Mapping[str, str]] = None)
    func NewEndpointConfiguration(ctx *Context, name string, args EndpointConfigurationArgs, opts ...ResourceOption) (*EndpointConfiguration, error)
    public EndpointConfiguration(string name, EndpointConfigurationArgs args, CustomResourceOptions? opts = null)
    public EndpointConfiguration(String name, EndpointConfigurationArgs args)
    public EndpointConfiguration(String name, EndpointConfigurationArgs args, CustomResourceOptions options)
    
    type: aws:sagemaker:EndpointConfiguration
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args EndpointConfigurationArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Constructor example

    The following reference example uses placeholder values for all input properties.

    var endpointConfigurationResource = new Aws.Sagemaker.EndpointConfiguration("endpointConfigurationResource", new()
    {
        ProductionVariants = new[]
        {
            new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
            {
                ModelName = "string",
                InitialVariantWeight = 0,
                ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantManagedInstanceScalingArgs
                {
                    MaxInstanceCount = 0,
                    MinInstanceCount = 0,
                    Status = "string",
                },
                EnableSsmAccess = false,
                InferenceAmiVersion = "string",
                InitialInstanceCount = 0,
                AcceleratorType = "string",
                InstanceType = "string",
                CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantCoreDumpConfigArgs
                {
                    DestinationS3Uri = "string",
                    KmsKeyId = "string",
                },
                ModelDataDownloadTimeoutInSeconds = 0,
                ContainerStartupHealthCheckTimeoutInSeconds = 0,
                RoutingConfigs = new[]
                {
                    new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantRoutingConfigArgs
                    {
                        RoutingStrategy = "string",
                    },
                },
                ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantServerlessConfigArgs
                {
                    MaxConcurrency = 0,
                    MemorySizeInMb = 0,
                    ProvisionedConcurrency = 0,
                },
                VariantName = "string",
                VolumeSizeInGb = 0,
            },
        },
        AsyncInferenceConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigArgs
        {
            OutputConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs
            {
                S3OutputPath = "string",
                KmsKeyId = "string",
                NotificationConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs
                {
                    ErrorTopic = "string",
                    IncludeInferenceResponseIns = new[]
                    {
                        "string",
                    },
                    SuccessTopic = "string",
                },
                S3FailurePath = "string",
            },
            ClientConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigClientConfigArgs
            {
                MaxConcurrentInvocationsPerInstance = 0,
            },
        },
        DataCaptureConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigArgs
        {
            CaptureOptions = new[]
            {
                new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureOptionArgs
                {
                    CaptureMode = "string",
                },
            },
            DestinationS3Uri = "string",
            InitialSamplingPercentage = 0,
            CaptureContentTypeHeader = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs
            {
                CsvContentTypes = new[]
                {
                    "string",
                },
                JsonContentTypes = new[]
                {
                    "string",
                },
            },
            EnableCapture = false,
            KmsKeyId = "string",
        },
        KmsKeyArn = "string",
        Name = "string",
        NamePrefix = "string",
        ShadowProductionVariants = new[]
        {
            new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantArgs
            {
                ModelName = "string",
                InitialVariantWeight = 0,
                ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs
                {
                    MaxInstanceCount = 0,
                    MinInstanceCount = 0,
                    Status = "string",
                },
                EnableSsmAccess = false,
                InferenceAmiVersion = "string",
                InitialInstanceCount = 0,
                AcceleratorType = "string",
                InstanceType = "string",
                CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs
                {
                    DestinationS3Uri = "string",
                    KmsKeyId = "string",
                },
                ModelDataDownloadTimeoutInSeconds = 0,
                ContainerStartupHealthCheckTimeoutInSeconds = 0,
                RoutingConfigs = new[]
                {
                    new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantRoutingConfigArgs
                    {
                        RoutingStrategy = "string",
                    },
                },
                ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantServerlessConfigArgs
                {
                    MaxConcurrency = 0,
                    MemorySizeInMb = 0,
                    ProvisionedConcurrency = 0,
                },
                VariantName = "string",
                VolumeSizeInGb = 0,
            },
        },
        Tags = 
        {
            { "string", "string" },
        },
    });
    
    example, err := sagemaker.NewEndpointConfiguration(ctx, "endpointConfigurationResource", &sagemaker.EndpointConfigurationArgs{
    	ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
    		&sagemaker.EndpointConfigurationProductionVariantArgs{
    			ModelName:            pulumi.String("string"),
    			InitialVariantWeight: pulumi.Float64(0),
    			ManagedInstanceScaling: &sagemaker.EndpointConfigurationProductionVariantManagedInstanceScalingArgs{
    				MaxInstanceCount: pulumi.Int(0),
    				MinInstanceCount: pulumi.Int(0),
    				Status:           pulumi.String("string"),
    			},
    			EnableSsmAccess:      pulumi.Bool(false),
    			InferenceAmiVersion:  pulumi.String("string"),
    			InitialInstanceCount: pulumi.Int(0),
    			AcceleratorType:      pulumi.String("string"),
    			InstanceType:         pulumi.String("string"),
    			CoreDumpConfig: &sagemaker.EndpointConfigurationProductionVariantCoreDumpConfigArgs{
    				DestinationS3Uri: pulumi.String("string"),
    				KmsKeyId:         pulumi.String("string"),
    			},
    			ModelDataDownloadTimeoutInSeconds:           pulumi.Int(0),
    			ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
    			RoutingConfigs: sagemaker.EndpointConfigurationProductionVariantRoutingConfigArray{
    				&sagemaker.EndpointConfigurationProductionVariantRoutingConfigArgs{
    					RoutingStrategy: pulumi.String("string"),
    				},
    			},
    			ServerlessConfig: &sagemaker.EndpointConfigurationProductionVariantServerlessConfigArgs{
    				MaxConcurrency:         pulumi.Int(0),
    				MemorySizeInMb:         pulumi.Int(0),
    				ProvisionedConcurrency: pulumi.Int(0),
    			},
    			VariantName:    pulumi.String("string"),
    			VolumeSizeInGb: pulumi.Int(0),
    		},
    	},
    	AsyncInferenceConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigArgs{
    		OutputConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs{
    			S3OutputPath: pulumi.String("string"),
    			KmsKeyId:     pulumi.String("string"),
    			NotificationConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs{
    				ErrorTopic: pulumi.String("string"),
    				IncludeInferenceResponseIns: pulumi.StringArray{
    					pulumi.String("string"),
    				},
    				SuccessTopic: pulumi.String("string"),
    			},
    			S3FailurePath: pulumi.String("string"),
    		},
    		ClientConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigClientConfigArgs{
    			MaxConcurrentInvocationsPerInstance: pulumi.Int(0),
    		},
    	},
    	DataCaptureConfig: &sagemaker.EndpointConfigurationDataCaptureConfigArgs{
    		CaptureOptions: sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArray{
    			&sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArgs{
    				CaptureMode: pulumi.String("string"),
    			},
    		},
    		DestinationS3Uri:          pulumi.String("string"),
    		InitialSamplingPercentage: pulumi.Int(0),
    		CaptureContentTypeHeader: &sagemaker.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs{
    			CsvContentTypes: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			JsonContentTypes: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    		},
    		EnableCapture: pulumi.Bool(false),
    		KmsKeyId:      pulumi.String("string"),
    	},
    	KmsKeyArn:  pulumi.String("string"),
    	Name:       pulumi.String("string"),
    	NamePrefix: pulumi.String("string"),
    	ShadowProductionVariants: sagemaker.EndpointConfigurationShadowProductionVariantArray{
    		&sagemaker.EndpointConfigurationShadowProductionVariantArgs{
    			ModelName:            pulumi.String("string"),
    			InitialVariantWeight: pulumi.Float64(0),
    			ManagedInstanceScaling: &sagemaker.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs{
    				MaxInstanceCount: pulumi.Int(0),
    				MinInstanceCount: pulumi.Int(0),
    				Status:           pulumi.String("string"),
    			},
    			EnableSsmAccess:      pulumi.Bool(false),
    			InferenceAmiVersion:  pulumi.String("string"),
    			InitialInstanceCount: pulumi.Int(0),
    			AcceleratorType:      pulumi.String("string"),
    			InstanceType:         pulumi.String("string"),
    			CoreDumpConfig: &sagemaker.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs{
    				DestinationS3Uri: pulumi.String("string"),
    				KmsKeyId:         pulumi.String("string"),
    			},
    			ModelDataDownloadTimeoutInSeconds:           pulumi.Int(0),
    			ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
    			RoutingConfigs: sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArray{
    				&sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArgs{
    					RoutingStrategy: pulumi.String("string"),
    				},
    			},
    			ServerlessConfig: &sagemaker.EndpointConfigurationShadowProductionVariantServerlessConfigArgs{
    				MaxConcurrency:         pulumi.Int(0),
    				MemorySizeInMb:         pulumi.Int(0),
    				ProvisionedConcurrency: pulumi.Int(0),
    			},
    			VariantName:    pulumi.String("string"),
    			VolumeSizeInGb: pulumi.Int(0),
    		},
    	},
    	Tags: pulumi.StringMap{
    		"string": pulumi.String("string"),
    	},
    })
    
    var endpointConfigurationResource = new EndpointConfiguration("endpointConfigurationResource", EndpointConfigurationArgs.builder()
        .productionVariants(EndpointConfigurationProductionVariantArgs.builder()
            .modelName("string")
            .initialVariantWeight(0)
            .managedInstanceScaling(EndpointConfigurationProductionVariantManagedInstanceScalingArgs.builder()
                .maxInstanceCount(0)
                .minInstanceCount(0)
                .status("string")
                .build())
            .enableSsmAccess(false)
            .inferenceAmiVersion("string")
            .initialInstanceCount(0)
            .acceleratorType("string")
            .instanceType("string")
            .coreDumpConfig(EndpointConfigurationProductionVariantCoreDumpConfigArgs.builder()
                .destinationS3Uri("string")
                .kmsKeyId("string")
                .build())
            .modelDataDownloadTimeoutInSeconds(0)
            .containerStartupHealthCheckTimeoutInSeconds(0)
            .routingConfigs(EndpointConfigurationProductionVariantRoutingConfigArgs.builder()
                .routingStrategy("string")
                .build())
            .serverlessConfig(EndpointConfigurationProductionVariantServerlessConfigArgs.builder()
                .maxConcurrency(0)
                .memorySizeInMb(0)
                .provisionedConcurrency(0)
                .build())
            .variantName("string")
            .volumeSizeInGb(0)
            .build())
        .asyncInferenceConfig(EndpointConfigurationAsyncInferenceConfigArgs.builder()
            .outputConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigArgs.builder()
                .s3OutputPath("string")
                .kmsKeyId("string")
                .notificationConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs.builder()
                    .errorTopic("string")
                    .includeInferenceResponseIns("string")
                    .successTopic("string")
                    .build())
                .s3FailurePath("string")
                .build())
            .clientConfig(EndpointConfigurationAsyncInferenceConfigClientConfigArgs.builder()
                .maxConcurrentInvocationsPerInstance(0)
                .build())
            .build())
        .dataCaptureConfig(EndpointConfigurationDataCaptureConfigArgs.builder()
            .captureOptions(EndpointConfigurationDataCaptureConfigCaptureOptionArgs.builder()
                .captureMode("string")
                .build())
            .destinationS3Uri("string")
            .initialSamplingPercentage(0)
            .captureContentTypeHeader(EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs.builder()
                .csvContentTypes("string")
                .jsonContentTypes("string")
                .build())
            .enableCapture(false)
            .kmsKeyId("string")
            .build())
        .kmsKeyArn("string")
        .name("string")
        .namePrefix("string")
        .shadowProductionVariants(EndpointConfigurationShadowProductionVariantArgs.builder()
            .modelName("string")
            .initialVariantWeight(0)
            .managedInstanceScaling(EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs.builder()
                .maxInstanceCount(0)
                .minInstanceCount(0)
                .status("string")
                .build())
            .enableSsmAccess(false)
            .inferenceAmiVersion("string")
            .initialInstanceCount(0)
            .acceleratorType("string")
            .instanceType("string")
            .coreDumpConfig(EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs.builder()
                .destinationS3Uri("string")
                .kmsKeyId("string")
                .build())
            .modelDataDownloadTimeoutInSeconds(0)
            .containerStartupHealthCheckTimeoutInSeconds(0)
            .routingConfigs(EndpointConfigurationShadowProductionVariantRoutingConfigArgs.builder()
                .routingStrategy("string")
                .build())
            .serverlessConfig(EndpointConfigurationShadowProductionVariantServerlessConfigArgs.builder()
                .maxConcurrency(0)
                .memorySizeInMb(0)
                .provisionedConcurrency(0)
                .build())
            .variantName("string")
            .volumeSizeInGb(0)
            .build())
        .tags(Map.of("string", "string"))
        .build());
    
    endpoint_configuration_resource = aws.sagemaker.EndpointConfiguration("endpointConfigurationResource",
        production_variants=[{
            "model_name": "string",
            "initial_variant_weight": 0,
            "managed_instance_scaling": {
                "max_instance_count": 0,
                "min_instance_count": 0,
                "status": "string",
            },
            "enable_ssm_access": False,
            "inference_ami_version": "string",
            "initial_instance_count": 0,
            "accelerator_type": "string",
            "instance_type": "string",
            "core_dump_config": {
                "destination_s3_uri": "string",
                "kms_key_id": "string",
            },
            "model_data_download_timeout_in_seconds": 0,
            "container_startup_health_check_timeout_in_seconds": 0,
            "routing_configs": [{
                "routing_strategy": "string",
            }],
            "serverless_config": {
                "max_concurrency": 0,
                "memory_size_in_mb": 0,
                "provisioned_concurrency": 0,
            },
            "variant_name": "string",
            "volume_size_in_gb": 0,
        }],
        async_inference_config={
            "output_config": {
                "s3_output_path": "string",
                "kms_key_id": "string",
                "notification_config": {
                    "error_topic": "string",
                    "include_inference_response_ins": ["string"],
                    "success_topic": "string",
                },
                "s3_failure_path": "string",
            },
            "client_config": {
                "max_concurrent_invocations_per_instance": 0,
            },
        },
        data_capture_config={
            "capture_options": [{
                "capture_mode": "string",
            }],
            "destination_s3_uri": "string",
            "initial_sampling_percentage": 0,
            "capture_content_type_header": {
                "csv_content_types": ["string"],
                "json_content_types": ["string"],
            },
            "enable_capture": False,
            "kms_key_id": "string",
        },
        kms_key_arn="string",
        name="string",
        name_prefix="string",
        shadow_production_variants=[{
            "model_name": "string",
            "initial_variant_weight": 0,
            "managed_instance_scaling": {
                "max_instance_count": 0,
                "min_instance_count": 0,
                "status": "string",
            },
            "enable_ssm_access": False,
            "inference_ami_version": "string",
            "initial_instance_count": 0,
            "accelerator_type": "string",
            "instance_type": "string",
            "core_dump_config": {
                "destination_s3_uri": "string",
                "kms_key_id": "string",
            },
            "model_data_download_timeout_in_seconds": 0,
            "container_startup_health_check_timeout_in_seconds": 0,
            "routing_configs": [{
                "routing_strategy": "string",
            }],
            "serverless_config": {
                "max_concurrency": 0,
                "memory_size_in_mb": 0,
                "provisioned_concurrency": 0,
            },
            "variant_name": "string",
            "volume_size_in_gb": 0,
        }],
        tags={
            "string": "string",
        })
    
    const endpointConfigurationResource = new aws.sagemaker.EndpointConfiguration("endpointConfigurationResource", {
        productionVariants: [{
            modelName: "string",
            initialVariantWeight: 0,
            managedInstanceScaling: {
                maxInstanceCount: 0,
                minInstanceCount: 0,
                status: "string",
            },
            enableSsmAccess: false,
            inferenceAmiVersion: "string",
            initialInstanceCount: 0,
            acceleratorType: "string",
            instanceType: "string",
            coreDumpConfig: {
                destinationS3Uri: "string",
                kmsKeyId: "string",
            },
            modelDataDownloadTimeoutInSeconds: 0,
            containerStartupHealthCheckTimeoutInSeconds: 0,
            routingConfigs: [{
                routingStrategy: "string",
            }],
            serverlessConfig: {
                maxConcurrency: 0,
                memorySizeInMb: 0,
                provisionedConcurrency: 0,
            },
            variantName: "string",
            volumeSizeInGb: 0,
        }],
        asyncInferenceConfig: {
            outputConfig: {
                s3OutputPath: "string",
                kmsKeyId: "string",
                notificationConfig: {
                    errorTopic: "string",
                    includeInferenceResponseIns: ["string"],
                    successTopic: "string",
                },
                s3FailurePath: "string",
            },
            clientConfig: {
                maxConcurrentInvocationsPerInstance: 0,
            },
        },
        dataCaptureConfig: {
            captureOptions: [{
                captureMode: "string",
            }],
            destinationS3Uri: "string",
            initialSamplingPercentage: 0,
            captureContentTypeHeader: {
                csvContentTypes: ["string"],
                jsonContentTypes: ["string"],
            },
            enableCapture: false,
            kmsKeyId: "string",
        },
        kmsKeyArn: "string",
        name: "string",
        namePrefix: "string",
        shadowProductionVariants: [{
            modelName: "string",
            initialVariantWeight: 0,
            managedInstanceScaling: {
                maxInstanceCount: 0,
                minInstanceCount: 0,
                status: "string",
            },
            enableSsmAccess: false,
            inferenceAmiVersion: "string",
            initialInstanceCount: 0,
            acceleratorType: "string",
            instanceType: "string",
            coreDumpConfig: {
                destinationS3Uri: "string",
                kmsKeyId: "string",
            },
            modelDataDownloadTimeoutInSeconds: 0,
            containerStartupHealthCheckTimeoutInSeconds: 0,
            routingConfigs: [{
                routingStrategy: "string",
            }],
            serverlessConfig: {
                maxConcurrency: 0,
                memorySizeInMb: 0,
                provisionedConcurrency: 0,
            },
            variantName: "string",
            volumeSizeInGb: 0,
        }],
        tags: {
            string: "string",
        },
    });
    
    type: aws:sagemaker:EndpointConfiguration
    properties:
        asyncInferenceConfig:
            clientConfig:
                maxConcurrentInvocationsPerInstance: 0
            outputConfig:
                kmsKeyId: string
                notificationConfig:
                    errorTopic: string
                    includeInferenceResponseIns:
                        - string
                    successTopic: string
                s3FailurePath: string
                s3OutputPath: string
        dataCaptureConfig:
            captureContentTypeHeader:
                csvContentTypes:
                    - string
                jsonContentTypes:
                    - string
            captureOptions:
                - captureMode: string
            destinationS3Uri: string
            enableCapture: false
            initialSamplingPercentage: 0
            kmsKeyId: string
        kmsKeyArn: string
        name: string
        namePrefix: string
        productionVariants:
            - acceleratorType: string
              containerStartupHealthCheckTimeoutInSeconds: 0
              coreDumpConfig:
                destinationS3Uri: string
                kmsKeyId: string
              enableSsmAccess: false
              inferenceAmiVersion: string
              initialInstanceCount: 0
              initialVariantWeight: 0
              instanceType: string
              managedInstanceScaling:
                maxInstanceCount: 0
                minInstanceCount: 0
                status: string
              modelDataDownloadTimeoutInSeconds: 0
              modelName: string
              routingConfigs:
                - routingStrategy: string
              serverlessConfig:
                maxConcurrency: 0
                memorySizeInMb: 0
                provisionedConcurrency: 0
              variantName: string
              volumeSizeInGb: 0
        shadowProductionVariants:
            - acceleratorType: string
              containerStartupHealthCheckTimeoutInSeconds: 0
              coreDumpConfig:
                destinationS3Uri: string
                kmsKeyId: string
              enableSsmAccess: false
              inferenceAmiVersion: string
              initialInstanceCount: 0
              initialVariantWeight: 0
              instanceType: string
              managedInstanceScaling:
                maxInstanceCount: 0
                minInstanceCount: 0
                status: string
              modelDataDownloadTimeoutInSeconds: 0
              modelName: string
              routingConfigs:
                - routingStrategy: string
              serverlessConfig:
                maxConcurrency: 0
                memorySizeInMb: 0
                provisionedConcurrency: 0
              variantName: string
              volumeSizeInGb: 0
        tags:
            string: string
    

    EndpointConfiguration Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

    The EndpointConfiguration resource accepts the following input properties:

    ProductionVariants List<EndpointConfigurationProductionVariant>
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    DataCaptureConfig EndpointConfigurationDataCaptureConfig
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    KmsKeyArn string
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    Name string
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    NamePrefix string
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    ShadowProductionVariants List<EndpointConfigurationShadowProductionVariant>
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    Tags Dictionary<string, string>
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    ProductionVariants []EndpointConfigurationProductionVariantArgs
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfigArgs
    Specifies configuration for how an endpoint performs asynchronous inference.
    DataCaptureConfig EndpointConfigurationDataCaptureConfigArgs
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    KmsKeyArn string
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    Name string
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    NamePrefix string
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    ShadowProductionVariants []EndpointConfigurationShadowProductionVariantArgs
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    Tags map[string]string
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    productionVariants List<EndpointConfigurationProductionVariant>
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    dataCaptureConfig EndpointConfigurationDataCaptureConfig
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    kmsKeyArn String
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name String
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix String
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    shadowProductionVariants List<EndpointConfigurationShadowProductionVariant>
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    tags Map<String,String>
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    productionVariants EndpointConfigurationProductionVariant[]
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    dataCaptureConfig EndpointConfigurationDataCaptureConfig
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    kmsKeyArn string
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name string
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix string
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    shadowProductionVariants EndpointConfigurationShadowProductionVariant[]
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    tags {[key: string]: string}
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    production_variants Sequence[EndpointConfigurationProductionVariantArgs]
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    async_inference_config EndpointConfigurationAsyncInferenceConfigArgs
    Specifies configuration for how an endpoint performs asynchronous inference.
    data_capture_config EndpointConfigurationDataCaptureConfigArgs
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    kms_key_arn str
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name str
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    name_prefix str
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    shadow_production_variants Sequence[EndpointConfigurationShadowProductionVariantArgs]
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    tags Mapping[str, str]
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    productionVariants List<Property Map>
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    asyncInferenceConfig Property Map
    Specifies configuration for how an endpoint performs asynchronous inference.
    dataCaptureConfig Property Map
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    kmsKeyArn String
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name String
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix String
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    shadowProductionVariants List<Property Map>
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    tags Map<String>
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the EndpointConfiguration resource produces the following output properties:

    Arn string
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    Id string
    The provider-assigned unique ID for this managed resource.
    TagsAll Dictionary<string, string>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    Arn string
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    Id string
    The provider-assigned unique ID for this managed resource.
    TagsAll map[string]string
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn String
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    id String
    The provider-assigned unique ID for this managed resource.
    tagsAll Map<String,String>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn string
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    id string
    The provider-assigned unique ID for this managed resource.
    tagsAll {[key: string]: string}
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn str
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    id str
    The provider-assigned unique ID for this managed resource.
    tags_all Mapping[str, str]
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn String
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    id String
    The provider-assigned unique ID for this managed resource.
    tagsAll Map<String>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    Look up Existing EndpointConfiguration Resource

    Get an existing EndpointConfiguration resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: EndpointConfigurationState, opts?: CustomResourceOptions): EndpointConfiguration
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            arn: Optional[str] = None,
            async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
            data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
            kms_key_arn: Optional[str] = None,
            name: Optional[str] = None,
            name_prefix: Optional[str] = None,
            production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
            shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
            tags: Optional[Mapping[str, str]] = None,
            tags_all: Optional[Mapping[str, str]] = None) -> EndpointConfiguration
    func GetEndpointConfiguration(ctx *Context, name string, id IDInput, state *EndpointConfigurationState, opts ...ResourceOption) (*EndpointConfiguration, error)
    public static EndpointConfiguration Get(string name, Input<string> id, EndpointConfigurationState? state, CustomResourceOptions? opts = null)
    public static EndpointConfiguration get(String name, Output<String> id, EndpointConfigurationState state, CustomResourceOptions options)
    Resource lookup is not supported in YAML
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    Arn string
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    DataCaptureConfig EndpointConfigurationDataCaptureConfig
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    KmsKeyArn string
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    Name string
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    NamePrefix string
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    ProductionVariants List<EndpointConfigurationProductionVariant>
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    ShadowProductionVariants List<EndpointConfigurationShadowProductionVariant>
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    Tags Dictionary<string, string>
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TagsAll Dictionary<string, string>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    Arn string
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    AsyncInferenceConfig EndpointConfigurationAsyncInferenceConfigArgs
    Specifies configuration for how an endpoint performs asynchronous inference.
    DataCaptureConfig EndpointConfigurationDataCaptureConfigArgs
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    KmsKeyArn string
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    Name string
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    NamePrefix string
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    ProductionVariants []EndpointConfigurationProductionVariantArgs
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    ShadowProductionVariants []EndpointConfigurationShadowProductionVariantArgs
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    Tags map[string]string
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TagsAll map[string]string
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn String
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    dataCaptureConfig EndpointConfigurationDataCaptureConfig
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    kmsKeyArn String
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name String
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix String
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    productionVariants List<EndpointConfigurationProductionVariant>
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    shadowProductionVariants List<EndpointConfigurationShadowProductionVariant>
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    tags Map<String,String>
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll Map<String,String>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn string
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    asyncInferenceConfig EndpointConfigurationAsyncInferenceConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    dataCaptureConfig EndpointConfigurationDataCaptureConfig
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    kmsKeyArn string
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name string
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix string
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    productionVariants EndpointConfigurationProductionVariant[]
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    shadowProductionVariants EndpointConfigurationShadowProductionVariant[]
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    tags {[key: string]: string}
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll {[key: string]: string}
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn str
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    async_inference_config EndpointConfigurationAsyncInferenceConfigArgs
    Specifies configuration for how an endpoint performs asynchronous inference.
    data_capture_config EndpointConfigurationDataCaptureConfigArgs
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    kms_key_arn str
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name str
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    name_prefix str
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    production_variants Sequence[EndpointConfigurationProductionVariantArgs]
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    shadow_production_variants Sequence[EndpointConfigurationShadowProductionVariantArgs]
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    tags Mapping[str, str]
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tags_all Mapping[str, str]
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn String
    The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
    asyncInferenceConfig Property Map
    Specifies configuration for how an endpoint performs asynchronous inference.
    dataCaptureConfig Property Map
    Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
    kmsKeyArn String
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
    name String
    The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with name_prefix.
    namePrefix String
    Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with name.
    productionVariants List<Property Map>
    An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
    shadowProductionVariants List<Property Map>
    Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
    tags Map<String>
    A mapping of tags to assign to the resource. If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll Map<String>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    Supporting Types

    EndpointConfigurationAsyncInferenceConfig, EndpointConfigurationAsyncInferenceConfigArgs

    OutputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig
    Specifies the configuration for asynchronous inference invocation outputs.
    ClientConfig EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
    OutputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig
    Specifies the configuration for asynchronous inference invocation outputs.
    ClientConfig EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
    outputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig
    Specifies the configuration for asynchronous inference invocation outputs.
    clientConfig EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
    outputConfig EndpointConfigurationAsyncInferenceConfigOutputConfig
    Specifies the configuration for asynchronous inference invocation outputs.
    clientConfig EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
    output_config EndpointConfigurationAsyncInferenceConfigOutputConfig
    Specifies the configuration for asynchronous inference invocation outputs.
    client_config EndpointConfigurationAsyncInferenceConfigClientConfig
    Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
    outputConfig Property Map
    Specifies the configuration for asynchronous inference invocation outputs.
    clientConfig Property Map
    Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.

    EndpointConfigurationAsyncInferenceConfigClientConfig, EndpointConfigurationAsyncInferenceConfigClientConfigArgs

    MaxConcurrentInvocationsPerInstance int
    The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
    MaxConcurrentInvocationsPerInstance int
    The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
    maxConcurrentInvocationsPerInstance Integer
    The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
    maxConcurrentInvocationsPerInstance number
    The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
    max_concurrent_invocations_per_instance int
    The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
    maxConcurrentInvocationsPerInstance Number
    The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.

    EndpointConfigurationAsyncInferenceConfigOutputConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigArgs

    S3OutputPath string
    The Amazon S3 location to upload inference responses to.
    KmsKeyId string
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
    NotificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Specifies the configuration for notifications of inference results for asynchronous inference.
    S3FailurePath string
    The Amazon S3 location to upload failure inference responses to.
    S3OutputPath string
    The Amazon S3 location to upload inference responses to.
    KmsKeyId string
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
    NotificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Specifies the configuration for notifications of inference results for asynchronous inference.
    S3FailurePath string
    The Amazon S3 location to upload failure inference responses to.
    s3OutputPath String
    The Amazon S3 location to upload inference responses to.
    kmsKeyId String
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
    notificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Specifies the configuration for notifications of inference results for asynchronous inference.
    s3FailurePath String
    The Amazon S3 location to upload failure inference responses to.
    s3OutputPath string
    The Amazon S3 location to upload inference responses to.
    kmsKeyId string
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
    notificationConfig EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Specifies the configuration for notifications of inference results for asynchronous inference.
    s3FailurePath string
    The Amazon S3 location to upload failure inference responses to.
    s3_output_path str
    The Amazon S3 location to upload inference responses to.
    kms_key_id str
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
    notification_config EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig
    Specifies the configuration for notifications of inference results for asynchronous inference.
    s3_failure_path str
    The Amazon S3 location to upload failure inference responses to.
    s3OutputPath String
    The Amazon S3 location to upload inference responses to.
    kmsKeyId String
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
    notificationConfig Property Map
    Specifies the configuration for notifications of inference results for asynchronous inference.
    s3FailurePath String
    The Amazon S3 location to upload failure inference responses to.

    EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs

    ErrorTopic string
    Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    IncludeInferenceResponseIns List<string>
    The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    SuccessTopic string
    Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    ErrorTopic string
    Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    IncludeInferenceResponseIns []string
    The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    SuccessTopic string
    Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    errorTopic String
    Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    includeInferenceResponseIns List<String>
    The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    successTopic String
    Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    errorTopic string
    Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    includeInferenceResponseIns string[]
    The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    successTopic string
    Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    error_topic str
    Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    include_inference_response_ins Sequence[str]
    The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    success_topic str
    Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
    errorTopic String
    Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
    includeInferenceResponseIns List<String>
    The Amazon SNS topics where you want the inference response to be included. Valid values are SUCCESS_NOTIFICATION_TOPIC and ERROR_NOTIFICATION_TOPIC.
    successTopic String
    Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.

    EndpointConfigurationDataCaptureConfig, EndpointConfigurationDataCaptureConfigArgs

    CaptureOptions List<EndpointConfigurationDataCaptureConfigCaptureOption>
    Specifies what data to capture. Fields are documented below.
    DestinationS3Uri string
    The URL for S3 location where the captured data is stored.
    InitialSamplingPercentage int
    Portion of data to capture. Should be between 0 and 100.
    CaptureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    The content type headers to capture. Fields are documented below.
    EnableCapture bool
    Flag to enable data capture. Defaults to false.
    KmsKeyId string
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
    CaptureOptions []EndpointConfigurationDataCaptureConfigCaptureOption
    Specifies what data to capture. Fields are documented below.
    DestinationS3Uri string
    The URL for S3 location where the captured data is stored.
    InitialSamplingPercentage int
    Portion of data to capture. Should be between 0 and 100.
    CaptureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    The content type headers to capture. Fields are documented below.
    EnableCapture bool
    Flag to enable data capture. Defaults to false.
    KmsKeyId string
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
    captureOptions List<EndpointConfigurationDataCaptureConfigCaptureOption>
    Specifies what data to capture. Fields are documented below.
    destinationS3Uri String
    The URL for S3 location where the captured data is stored.
    initialSamplingPercentage Integer
    Portion of data to capture. Should be between 0 and 100.
    captureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    The content type headers to capture. Fields are documented below.
    enableCapture Boolean
    Flag to enable data capture. Defaults to false.
    kmsKeyId String
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
    captureOptions EndpointConfigurationDataCaptureConfigCaptureOption[]
    Specifies what data to capture. Fields are documented below.
    destinationS3Uri string
    The URL for S3 location where the captured data is stored.
    initialSamplingPercentage number
    Portion of data to capture. Should be between 0 and 100.
    captureContentTypeHeader EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    The content type headers to capture. Fields are documented below.
    enableCapture boolean
    Flag to enable data capture. Defaults to false.
    kmsKeyId string
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
    capture_options Sequence[EndpointConfigurationDataCaptureConfigCaptureOption]
    Specifies what data to capture. Fields are documented below.
    destination_s3_uri str
    The URL for S3 location where the captured data is stored.
    initial_sampling_percentage int
    Portion of data to capture. Should be between 0 and 100.
    capture_content_type_header EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader
    The content type headers to capture. Fields are documented below.
    enable_capture bool
    Flag to enable data capture. Defaults to false.
    kms_key_id str
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
    captureOptions List<Property Map>
    Specifies what data to capture. Fields are documented below.
    destinationS3Uri String
    The URL for S3 location where the captured data is stored.
    initialSamplingPercentage Number
    Portion of data to capture. Should be between 0 and 100.
    captureContentTypeHeader Property Map
    The content type headers to capture. Fields are documented below.
    enableCapture Boolean
    Flag to enable data capture. Defaults to false.
    kmsKeyId String
    Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.

    EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader, EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs

    CsvContentTypes List<string>
    The CSV content type headers to capture.
    JsonContentTypes List<string>
    The JSON content type headers to capture.
    CsvContentTypes []string
    The CSV content type headers to capture.
    JsonContentTypes []string
    The JSON content type headers to capture.
    csvContentTypes List<String>
    The CSV content type headers to capture.
    jsonContentTypes List<String>
    The JSON content type headers to capture.
    csvContentTypes string[]
    The CSV content type headers to capture.
    jsonContentTypes string[]
    The JSON content type headers to capture.
    csv_content_types Sequence[str]
    The CSV content type headers to capture.
    json_content_types Sequence[str]
    The JSON content type headers to capture.
    csvContentTypes List<String>
    The CSV content type headers to capture.
    jsonContentTypes List<String>
    The JSON content type headers to capture.

    EndpointConfigurationDataCaptureConfigCaptureOption, EndpointConfigurationDataCaptureConfigCaptureOptionArgs

    CaptureMode string
    Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.
    CaptureMode string
    Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.
    captureMode String
    Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.
    captureMode string
    Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.
    capture_mode str
    Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.
    captureMode String
    Specifies the data to be captured. Should be one of Input, Output or InputAndOutput.

    EndpointConfigurationProductionVariant, EndpointConfigurationProductionVariantArgs

    ModelName string
    The name of the model to use.
    AcceleratorType string
    The size of the Elastic Inference (EI) instance to use for the production variant.
    ContainerStartupHealthCheckTimeoutInSeconds int
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    CoreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    EnableSsmAccess bool
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    InferenceAmiVersion string
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    InitialInstanceCount int
    Initial number of instances used for auto-scaling.
    InitialVariantWeight double
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    InstanceType string
    The type of instance to start.
    ManagedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    ModelDataDownloadTimeoutInSeconds int
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    RoutingConfigs List<EndpointConfigurationProductionVariantRoutingConfig>
    Sets how the endpoint routes incoming traffic. See routing_config below.
    ServerlessConfig EndpointConfigurationProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    VariantName string
    The name of the variant. If omitted, this provider will assign a random, unique name.
    VolumeSizeInGb int
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    ModelName string
    The name of the model to use.
    AcceleratorType string
    The size of the Elastic Inference (EI) instance to use for the production variant.
    ContainerStartupHealthCheckTimeoutInSeconds int
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    CoreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    EnableSsmAccess bool
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    InferenceAmiVersion string
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    InitialInstanceCount int
    Initial number of instances used for auto-scaling.
    InitialVariantWeight float64
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    InstanceType string
    The type of instance to start.
    ManagedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    ModelDataDownloadTimeoutInSeconds int
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    RoutingConfigs []EndpointConfigurationProductionVariantRoutingConfig
    Sets how the endpoint routes incoming traffic. See routing_config below.
    ServerlessConfig EndpointConfigurationProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    VariantName string
    The name of the variant. If omitted, this provider will assign a random, unique name.
    VolumeSizeInGb int
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    modelName String
    The name of the model to use.
    acceleratorType String
    The size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds Integer
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    enableSsmAccess Boolean
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    inferenceAmiVersion String
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    initialInstanceCount Integer
    Initial number of instances used for auto-scaling.
    initialVariantWeight Double
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    instanceType String
    The type of instance to start.
    managedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds Integer
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    routingConfigs List<EndpointConfigurationProductionVariantRoutingConfig>
    Sets how the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig EndpointConfigurationProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    variantName String
    The name of the variant. If omitted, this provider will assign a random, unique name.
    volumeSizeInGb Integer
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    modelName string
    The name of the model to use.
    acceleratorType string
    The size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds number
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig EndpointConfigurationProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    enableSsmAccess boolean
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    inferenceAmiVersion string
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    initialInstanceCount number
    Initial number of instances used for auto-scaling.
    initialVariantWeight number
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    instanceType string
    The type of instance to start.
    managedInstanceScaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds number
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    routingConfigs EndpointConfigurationProductionVariantRoutingConfig[]
    Sets how the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig EndpointConfigurationProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    variantName string
    The name of the variant. If omitted, this provider will assign a random, unique name.
    volumeSizeInGb number
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    model_name str
    The name of the model to use.
    accelerator_type str
    The size of the Elastic Inference (EI) instance to use for the production variant.
    container_startup_health_check_timeout_in_seconds int
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    core_dump_config EndpointConfigurationProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    enable_ssm_access bool
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    inference_ami_version str
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    initial_instance_count int
    Initial number of instances used for auto-scaling.
    initial_variant_weight float
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    instance_type str
    The type of instance to start.
    managed_instance_scaling EndpointConfigurationProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    model_data_download_timeout_in_seconds int
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    routing_configs Sequence[EndpointConfigurationProductionVariantRoutingConfig]
    Sets how the endpoint routes incoming traffic. See routing_config below.
    serverless_config EndpointConfigurationProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    variant_name str
    The name of the variant. If omitted, this provider will assign a random, unique name.
    volume_size_in_gb int
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    modelName String
    The name of the model to use.
    acceleratorType String
    The size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds Number
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig Property Map
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    enableSsmAccess Boolean
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    inferenceAmiVersion String
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    initialInstanceCount Number
    Initial number of instances used for auto-scaling.
    initialVariantWeight Number
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    instanceType String
    The type of instance to start.
    managedInstanceScaling Property Map
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds Number
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    routingConfigs List<Property Map>
    Sets how the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig Property Map
    Specifies configuration for how an endpoint performs asynchronous inference.
    variantName String
    The name of the variant. If omitted, this provider will assign a random, unique name.
    volumeSizeInGb Number
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

    EndpointConfigurationProductionVariantCoreDumpConfig, EndpointConfigurationProductionVariantCoreDumpConfigArgs

    DestinationS3Uri string
    The Amazon S3 bucket to send the core dump to.
    KmsKeyId string
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    DestinationS3Uri string
    The Amazon S3 bucket to send the core dump to.
    KmsKeyId string
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    destinationS3Uri String
    The Amazon S3 bucket to send the core dump to.
    kmsKeyId String
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    destinationS3Uri string
    The Amazon S3 bucket to send the core dump to.
    kmsKeyId string
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    destination_s3_uri str
    The Amazon S3 bucket to send the core dump to.
    kms_key_id str
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    destinationS3Uri String
    The Amazon S3 bucket to send the core dump to.
    kmsKeyId String
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

    EndpointConfigurationProductionVariantManagedInstanceScaling, EndpointConfigurationProductionVariantManagedInstanceScalingArgs

    MaxInstanceCount int
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    MinInstanceCount int
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    Status string
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    MaxInstanceCount int
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    MinInstanceCount int
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    Status string
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount Integer
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount Integer
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status String
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount number
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount number
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status string
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    max_instance_count int
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    min_instance_count int
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status str
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount Number
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount Number
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status String
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

    EndpointConfigurationProductionVariantRoutingConfig, EndpointConfigurationProductionVariantRoutingConfigArgs

    RoutingStrategy string
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    RoutingStrategy string
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy String
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy string
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routing_strategy str
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy String
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

    EndpointConfigurationProductionVariantServerlessConfig, EndpointConfigurationProductionVariantServerlessConfigArgs

    MaxConcurrency int
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    MemorySizeInMb int
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    ProvisionedConcurrency int
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    MaxConcurrency int
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    MemorySizeInMb int
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    ProvisionedConcurrency int
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency Integer
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb Integer
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency Integer
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency number
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb number
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency number
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    max_concurrency int
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memory_size_in_mb int
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisioned_concurrency int
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency Number
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb Number
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency Number
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

    EndpointConfigurationShadowProductionVariant, EndpointConfigurationShadowProductionVariantArgs

    ModelName string
    The name of the model to use.
    AcceleratorType string
    The size of the Elastic Inference (EI) instance to use for the production variant.
    ContainerStartupHealthCheckTimeoutInSeconds int
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    CoreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    EnableSsmAccess bool
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    InferenceAmiVersion string
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    InitialInstanceCount int
    Initial number of instances used for auto-scaling.
    InitialVariantWeight double
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    InstanceType string
    The type of instance to start.
    ManagedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    ModelDataDownloadTimeoutInSeconds int
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    RoutingConfigs List<EndpointConfigurationShadowProductionVariantRoutingConfig>
    Sets how the endpoint routes incoming traffic. See routing_config below.
    ServerlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    VariantName string
    The name of the variant. If omitted, this provider will assign a random, unique name.
    VolumeSizeInGb int
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    ModelName string
    The name of the model to use.
    AcceleratorType string
    The size of the Elastic Inference (EI) instance to use for the production variant.
    ContainerStartupHealthCheckTimeoutInSeconds int
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    CoreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    EnableSsmAccess bool
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    InferenceAmiVersion string
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    InitialInstanceCount int
    Initial number of instances used for auto-scaling.
    InitialVariantWeight float64
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    InstanceType string
    The type of instance to start.
    ManagedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    ModelDataDownloadTimeoutInSeconds int
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    RoutingConfigs []EndpointConfigurationShadowProductionVariantRoutingConfig
    Sets how the endpoint routes incoming traffic. See routing_config below.
    ServerlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    VariantName string
    The name of the variant. If omitted, this provider will assign a random, unique name.
    VolumeSizeInGb int
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    modelName String
    The name of the model to use.
    acceleratorType String
    The size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds Integer
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    enableSsmAccess Boolean
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    inferenceAmiVersion String
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    initialInstanceCount Integer
    Initial number of instances used for auto-scaling.
    initialVariantWeight Double
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    instanceType String
    The type of instance to start.
    managedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds Integer
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    routingConfigs List<EndpointConfigurationShadowProductionVariantRoutingConfig>
    Sets how the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    variantName String
    The name of the variant. If omitted, this provider will assign a random, unique name.
    volumeSizeInGb Integer
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    modelName string
    The name of the model to use.
    acceleratorType string
    The size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds number
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    enableSsmAccess boolean
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    inferenceAmiVersion string
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    initialInstanceCount number
    Initial number of instances used for auto-scaling.
    initialVariantWeight number
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    instanceType string
    The type of instance to start.
    managedInstanceScaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds number
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    routingConfigs EndpointConfigurationShadowProductionVariantRoutingConfig[]
    Sets how the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig EndpointConfigurationShadowProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    variantName string
    The name of the variant. If omitted, this provider will assign a random, unique name.
    volumeSizeInGb number
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    model_name str
    The name of the model to use.
    accelerator_type str
    The size of the Elastic Inference (EI) instance to use for the production variant.
    container_startup_health_check_timeout_in_seconds int
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    core_dump_config EndpointConfigurationShadowProductionVariantCoreDumpConfig
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    enable_ssm_access bool
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    inference_ami_version str
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    initial_instance_count int
    Initial number of instances used for auto-scaling.
    initial_variant_weight float
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    instance_type str
    The type of instance to start.
    managed_instance_scaling EndpointConfigurationShadowProductionVariantManagedInstanceScaling
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    model_data_download_timeout_in_seconds int
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    routing_configs Sequence[EndpointConfigurationShadowProductionVariantRoutingConfig]
    Sets how the endpoint routes incoming traffic. See routing_config below.
    serverless_config EndpointConfigurationShadowProductionVariantServerlessConfig
    Specifies configuration for how an endpoint performs asynchronous inference.
    variant_name str
    The name of the variant. If omitted, this provider will assign a random, unique name.
    volume_size_in_gb int
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.
    modelName String
    The name of the model to use.
    acceleratorType String
    The size of the Elastic Inference (EI) instance to use for the production variant.
    containerStartupHealthCheckTimeoutInSeconds Number
    The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between 60 and 3600.
    coreDumpConfig Property Map
    Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
    enableSsmAccess Boolean
    You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
    inferenceAmiVersion String
    Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
    initialInstanceCount Number
    Initial number of instances used for auto-scaling.
    initialVariantWeight Number
    Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to 1.0.
    instanceType String
    The type of instance to start.
    managedInstanceScaling Property Map
    Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
    modelDataDownloadTimeoutInSeconds Number
    The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between 60 and 3600.
    routingConfigs List<Property Map>
    Sets how the endpoint routes incoming traffic. See routing_config below.
    serverlessConfig Property Map
    Specifies configuration for how an endpoint performs asynchronous inference.
    variantName String
    The name of the variant. If omitted, this provider will assign a random, unique name.
    volumeSizeInGb Number
    The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between 1 and 512.

    EndpointConfigurationShadowProductionVariantCoreDumpConfig, EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs

    DestinationS3Uri string
    The Amazon S3 bucket to send the core dump to.
    KmsKeyId string
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    DestinationS3Uri string
    The Amazon S3 bucket to send the core dump to.
    KmsKeyId string
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    destinationS3Uri String
    The Amazon S3 bucket to send the core dump to.
    kmsKeyId String
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    destinationS3Uri string
    The Amazon S3 bucket to send the core dump to.
    kmsKeyId string
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    destination_s3_uri str
    The Amazon S3 bucket to send the core dump to.
    kms_key_id str
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
    destinationS3Uri String
    The Amazon S3 bucket to send the core dump to.
    kmsKeyId String
    The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.

    EndpointConfigurationShadowProductionVariantManagedInstanceScaling, EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs

    MaxInstanceCount int
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    MinInstanceCount int
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    Status string
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    MaxInstanceCount int
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    MinInstanceCount int
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    Status string
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount Integer
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount Integer
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status String
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount number
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount number
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status string
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    max_instance_count int
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    min_instance_count int
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status str
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.
    maxInstanceCount Number
    The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
    minInstanceCount Number
    The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
    status String
    Indicates whether managed instance scaling is enabled. Valid values are ENABLED and DISABLED.

    EndpointConfigurationShadowProductionVariantRoutingConfig, EndpointConfigurationShadowProductionVariantRoutingConfigArgs

    RoutingStrategy string
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    RoutingStrategy string
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy String
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy string
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routing_strategy str
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.
    routingStrategy String
    Sets how the endpoint routes incoming traffic. Valid values are LEAST_OUTSTANDING_REQUESTS and RANDOM. LEAST_OUTSTANDING_REQUESTS routes requests to the specific instances that have more capacity to process them. RANDOM routes each request to a randomly chosen instance.

    EndpointConfigurationShadowProductionVariantServerlessConfig, EndpointConfigurationShadowProductionVariantServerlessConfigArgs

    MaxConcurrency int
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    MemorySizeInMb int
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    ProvisionedConcurrency int
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    MaxConcurrency int
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    MemorySizeInMb int
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    ProvisionedConcurrency int
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency Integer
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb Integer
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency Integer
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency number
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb number
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency number
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    max_concurrency int
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memory_size_in_mb int
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisioned_concurrency int
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.
    maxConcurrency Number
    The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between 1 and 200.
    memorySizeInMb Number
    The memory size of your serverless endpoint. Valid values are in 1 GB increments: 1024 MB, 2048 MB, 3072 MB, 4096 MB, 5120 MB, or 6144 MB.
    provisionedConcurrency Number
    The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to max_concurrency. Valid values are between 1 and 200.

    Import

    Using pulumi import, import endpoint configurations using the name. For example:

    $ pulumi import aws:sagemaker/endpointConfiguration:EndpointConfiguration test_endpoint_config endpoint-config-foo
    

    To learn more about importing existing cloud resources, see Importing resources.

    Package Details

    Repository
    AWS Classic pulumi/pulumi-aws
    License
    Apache-2.0
    Notes
    This Pulumi package is based on the aws Terraform Provider.
    aws logo
    AWS v6.60.0 published on Tuesday, Nov 19, 2024 by Pulumi