aws.sagemaker.EndpointConfiguration
Explore with Pulumi AI
Provides a SageMaker endpoint configuration resource.
Example Usage
Basic usage:
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const ec = new aws.sagemaker.EndpointConfiguration("ec", {
name: "my-endpoint-config",
productionVariants: [{
variantName: "variant-1",
modelName: m.name,
initialInstanceCount: 1,
instanceType: "ml.t2.medium",
}],
tags: {
Name: "foo",
},
});
import pulumi
import pulumi_aws as aws
ec = aws.sagemaker.EndpointConfiguration("ec",
name="my-endpoint-config",
production_variants=[{
"variant_name": "variant-1",
"model_name": m["name"],
"initial_instance_count": 1,
"instance_type": "ml.t2.medium",
}],
tags={
"Name": "foo",
})
package main
import (
"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/sagemaker"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := sagemaker.NewEndpointConfiguration(ctx, "ec", &sagemaker.EndpointConfigurationArgs{
Name: pulumi.String("my-endpoint-config"),
ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
&sagemaker.EndpointConfigurationProductionVariantArgs{
VariantName: pulumi.String("variant-1"),
ModelName: pulumi.Any(m.Name),
InitialInstanceCount: pulumi.Int(1),
InstanceType: pulumi.String("ml.t2.medium"),
},
},
Tags: pulumi.StringMap{
"Name": pulumi.String("foo"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() =>
{
var ec = new Aws.Sagemaker.EndpointConfiguration("ec", new()
{
Name = "my-endpoint-config",
ProductionVariants = new[]
{
new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
{
VariantName = "variant-1",
ModelName = m.Name,
InitialInstanceCount = 1,
InstanceType = "ml.t2.medium",
},
},
Tags =
{
{ "Name", "foo" },
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.sagemaker.EndpointConfiguration;
import com.pulumi.aws.sagemaker.EndpointConfigurationArgs;
import com.pulumi.aws.sagemaker.inputs.EndpointConfigurationProductionVariantArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var ec = new EndpointConfiguration("ec", EndpointConfigurationArgs.builder()
.name("my-endpoint-config")
.productionVariants(EndpointConfigurationProductionVariantArgs.builder()
.variantName("variant-1")
.modelName(m.name())
.initialInstanceCount(1)
.instanceType("ml.t2.medium")
.build())
.tags(Map.of("Name", "foo"))
.build());
}
}
resources:
ec:
type: aws:sagemaker:EndpointConfiguration
properties:
name: my-endpoint-config
productionVariants:
- variantName: variant-1
modelName: ${m.name}
initialInstanceCount: 1
instanceType: ml.t2.medium
tags:
Name: foo
Create EndpointConfiguration Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new EndpointConfiguration(name: string, args: EndpointConfigurationArgs, opts?: CustomResourceOptions);
@overload
def EndpointConfiguration(resource_name: str,
args: EndpointConfigurationArgs,
opts: Optional[ResourceOptions] = None)
@overload
def EndpointConfiguration(resource_name: str,
opts: Optional[ResourceOptions] = None,
production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
kms_key_arn: Optional[str] = None,
name: Optional[str] = None,
name_prefix: Optional[str] = None,
shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
tags: Optional[Mapping[str, str]] = None)
func NewEndpointConfiguration(ctx *Context, name string, args EndpointConfigurationArgs, opts ...ResourceOption) (*EndpointConfiguration, error)
public EndpointConfiguration(string name, EndpointConfigurationArgs args, CustomResourceOptions? opts = null)
public EndpointConfiguration(String name, EndpointConfigurationArgs args)
public EndpointConfiguration(String name, EndpointConfigurationArgs args, CustomResourceOptions options)
type: aws:sagemaker:EndpointConfiguration
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args EndpointConfigurationArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args EndpointConfigurationArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args EndpointConfigurationArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args EndpointConfigurationArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args EndpointConfigurationArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var endpointConfigurationResource = new Aws.Sagemaker.EndpointConfiguration("endpointConfigurationResource", new()
{
ProductionVariants = new[]
{
new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantArgs
{
ModelName = "string",
InitialVariantWeight = 0,
ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantManagedInstanceScalingArgs
{
MaxInstanceCount = 0,
MinInstanceCount = 0,
Status = "string",
},
EnableSsmAccess = false,
InferenceAmiVersion = "string",
InitialInstanceCount = 0,
AcceleratorType = "string",
InstanceType = "string",
CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantCoreDumpConfigArgs
{
DestinationS3Uri = "string",
KmsKeyId = "string",
},
ModelDataDownloadTimeoutInSeconds = 0,
ContainerStartupHealthCheckTimeoutInSeconds = 0,
RoutingConfigs = new[]
{
new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantRoutingConfigArgs
{
RoutingStrategy = "string",
},
},
ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationProductionVariantServerlessConfigArgs
{
MaxConcurrency = 0,
MemorySizeInMb = 0,
ProvisionedConcurrency = 0,
},
VariantName = "string",
VolumeSizeInGb = 0,
},
},
AsyncInferenceConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigArgs
{
OutputConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs
{
S3OutputPath = "string",
KmsKeyId = "string",
NotificationConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs
{
ErrorTopic = "string",
IncludeInferenceResponseIns = new[]
{
"string",
},
SuccessTopic = "string",
},
S3FailurePath = "string",
},
ClientConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationAsyncInferenceConfigClientConfigArgs
{
MaxConcurrentInvocationsPerInstance = 0,
},
},
DataCaptureConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigArgs
{
CaptureOptions = new[]
{
new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureOptionArgs
{
CaptureMode = "string",
},
},
DestinationS3Uri = "string",
InitialSamplingPercentage = 0,
CaptureContentTypeHeader = new Aws.Sagemaker.Inputs.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs
{
CsvContentTypes = new[]
{
"string",
},
JsonContentTypes = new[]
{
"string",
},
},
EnableCapture = false,
KmsKeyId = "string",
},
KmsKeyArn = "string",
Name = "string",
NamePrefix = "string",
ShadowProductionVariants = new[]
{
new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantArgs
{
ModelName = "string",
InitialVariantWeight = 0,
ManagedInstanceScaling = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs
{
MaxInstanceCount = 0,
MinInstanceCount = 0,
Status = "string",
},
EnableSsmAccess = false,
InferenceAmiVersion = "string",
InitialInstanceCount = 0,
AcceleratorType = "string",
InstanceType = "string",
CoreDumpConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs
{
DestinationS3Uri = "string",
KmsKeyId = "string",
},
ModelDataDownloadTimeoutInSeconds = 0,
ContainerStartupHealthCheckTimeoutInSeconds = 0,
RoutingConfigs = new[]
{
new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantRoutingConfigArgs
{
RoutingStrategy = "string",
},
},
ServerlessConfig = new Aws.Sagemaker.Inputs.EndpointConfigurationShadowProductionVariantServerlessConfigArgs
{
MaxConcurrency = 0,
MemorySizeInMb = 0,
ProvisionedConcurrency = 0,
},
VariantName = "string",
VolumeSizeInGb = 0,
},
},
Tags =
{
{ "string", "string" },
},
});
example, err := sagemaker.NewEndpointConfiguration(ctx, "endpointConfigurationResource", &sagemaker.EndpointConfigurationArgs{
ProductionVariants: sagemaker.EndpointConfigurationProductionVariantArray{
&sagemaker.EndpointConfigurationProductionVariantArgs{
ModelName: pulumi.String("string"),
InitialVariantWeight: pulumi.Float64(0),
ManagedInstanceScaling: &sagemaker.EndpointConfigurationProductionVariantManagedInstanceScalingArgs{
MaxInstanceCount: pulumi.Int(0),
MinInstanceCount: pulumi.Int(0),
Status: pulumi.String("string"),
},
EnableSsmAccess: pulumi.Bool(false),
InferenceAmiVersion: pulumi.String("string"),
InitialInstanceCount: pulumi.Int(0),
AcceleratorType: pulumi.String("string"),
InstanceType: pulumi.String("string"),
CoreDumpConfig: &sagemaker.EndpointConfigurationProductionVariantCoreDumpConfigArgs{
DestinationS3Uri: pulumi.String("string"),
KmsKeyId: pulumi.String("string"),
},
ModelDataDownloadTimeoutInSeconds: pulumi.Int(0),
ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
RoutingConfigs: sagemaker.EndpointConfigurationProductionVariantRoutingConfigArray{
&sagemaker.EndpointConfigurationProductionVariantRoutingConfigArgs{
RoutingStrategy: pulumi.String("string"),
},
},
ServerlessConfig: &sagemaker.EndpointConfigurationProductionVariantServerlessConfigArgs{
MaxConcurrency: pulumi.Int(0),
MemorySizeInMb: pulumi.Int(0),
ProvisionedConcurrency: pulumi.Int(0),
},
VariantName: pulumi.String("string"),
VolumeSizeInGb: pulumi.Int(0),
},
},
AsyncInferenceConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigArgs{
OutputConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigArgs{
S3OutputPath: pulumi.String("string"),
KmsKeyId: pulumi.String("string"),
NotificationConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs{
ErrorTopic: pulumi.String("string"),
IncludeInferenceResponseIns: pulumi.StringArray{
pulumi.String("string"),
},
SuccessTopic: pulumi.String("string"),
},
S3FailurePath: pulumi.String("string"),
},
ClientConfig: &sagemaker.EndpointConfigurationAsyncInferenceConfigClientConfigArgs{
MaxConcurrentInvocationsPerInstance: pulumi.Int(0),
},
},
DataCaptureConfig: &sagemaker.EndpointConfigurationDataCaptureConfigArgs{
CaptureOptions: sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArray{
&sagemaker.EndpointConfigurationDataCaptureConfigCaptureOptionArgs{
CaptureMode: pulumi.String("string"),
},
},
DestinationS3Uri: pulumi.String("string"),
InitialSamplingPercentage: pulumi.Int(0),
CaptureContentTypeHeader: &sagemaker.EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs{
CsvContentTypes: pulumi.StringArray{
pulumi.String("string"),
},
JsonContentTypes: pulumi.StringArray{
pulumi.String("string"),
},
},
EnableCapture: pulumi.Bool(false),
KmsKeyId: pulumi.String("string"),
},
KmsKeyArn: pulumi.String("string"),
Name: pulumi.String("string"),
NamePrefix: pulumi.String("string"),
ShadowProductionVariants: sagemaker.EndpointConfigurationShadowProductionVariantArray{
&sagemaker.EndpointConfigurationShadowProductionVariantArgs{
ModelName: pulumi.String("string"),
InitialVariantWeight: pulumi.Float64(0),
ManagedInstanceScaling: &sagemaker.EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs{
MaxInstanceCount: pulumi.Int(0),
MinInstanceCount: pulumi.Int(0),
Status: pulumi.String("string"),
},
EnableSsmAccess: pulumi.Bool(false),
InferenceAmiVersion: pulumi.String("string"),
InitialInstanceCount: pulumi.Int(0),
AcceleratorType: pulumi.String("string"),
InstanceType: pulumi.String("string"),
CoreDumpConfig: &sagemaker.EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs{
DestinationS3Uri: pulumi.String("string"),
KmsKeyId: pulumi.String("string"),
},
ModelDataDownloadTimeoutInSeconds: pulumi.Int(0),
ContainerStartupHealthCheckTimeoutInSeconds: pulumi.Int(0),
RoutingConfigs: sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArray{
&sagemaker.EndpointConfigurationShadowProductionVariantRoutingConfigArgs{
RoutingStrategy: pulumi.String("string"),
},
},
ServerlessConfig: &sagemaker.EndpointConfigurationShadowProductionVariantServerlessConfigArgs{
MaxConcurrency: pulumi.Int(0),
MemorySizeInMb: pulumi.Int(0),
ProvisionedConcurrency: pulumi.Int(0),
},
VariantName: pulumi.String("string"),
VolumeSizeInGb: pulumi.Int(0),
},
},
Tags: pulumi.StringMap{
"string": pulumi.String("string"),
},
})
var endpointConfigurationResource = new EndpointConfiguration("endpointConfigurationResource", EndpointConfigurationArgs.builder()
.productionVariants(EndpointConfigurationProductionVariantArgs.builder()
.modelName("string")
.initialVariantWeight(0)
.managedInstanceScaling(EndpointConfigurationProductionVariantManagedInstanceScalingArgs.builder()
.maxInstanceCount(0)
.minInstanceCount(0)
.status("string")
.build())
.enableSsmAccess(false)
.inferenceAmiVersion("string")
.initialInstanceCount(0)
.acceleratorType("string")
.instanceType("string")
.coreDumpConfig(EndpointConfigurationProductionVariantCoreDumpConfigArgs.builder()
.destinationS3Uri("string")
.kmsKeyId("string")
.build())
.modelDataDownloadTimeoutInSeconds(0)
.containerStartupHealthCheckTimeoutInSeconds(0)
.routingConfigs(EndpointConfigurationProductionVariantRoutingConfigArgs.builder()
.routingStrategy("string")
.build())
.serverlessConfig(EndpointConfigurationProductionVariantServerlessConfigArgs.builder()
.maxConcurrency(0)
.memorySizeInMb(0)
.provisionedConcurrency(0)
.build())
.variantName("string")
.volumeSizeInGb(0)
.build())
.asyncInferenceConfig(EndpointConfigurationAsyncInferenceConfigArgs.builder()
.outputConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigArgs.builder()
.s3OutputPath("string")
.kmsKeyId("string")
.notificationConfig(EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs.builder()
.errorTopic("string")
.includeInferenceResponseIns("string")
.successTopic("string")
.build())
.s3FailurePath("string")
.build())
.clientConfig(EndpointConfigurationAsyncInferenceConfigClientConfigArgs.builder()
.maxConcurrentInvocationsPerInstance(0)
.build())
.build())
.dataCaptureConfig(EndpointConfigurationDataCaptureConfigArgs.builder()
.captureOptions(EndpointConfigurationDataCaptureConfigCaptureOptionArgs.builder()
.captureMode("string")
.build())
.destinationS3Uri("string")
.initialSamplingPercentage(0)
.captureContentTypeHeader(EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs.builder()
.csvContentTypes("string")
.jsonContentTypes("string")
.build())
.enableCapture(false)
.kmsKeyId("string")
.build())
.kmsKeyArn("string")
.name("string")
.namePrefix("string")
.shadowProductionVariants(EndpointConfigurationShadowProductionVariantArgs.builder()
.modelName("string")
.initialVariantWeight(0)
.managedInstanceScaling(EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs.builder()
.maxInstanceCount(0)
.minInstanceCount(0)
.status("string")
.build())
.enableSsmAccess(false)
.inferenceAmiVersion("string")
.initialInstanceCount(0)
.acceleratorType("string")
.instanceType("string")
.coreDumpConfig(EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs.builder()
.destinationS3Uri("string")
.kmsKeyId("string")
.build())
.modelDataDownloadTimeoutInSeconds(0)
.containerStartupHealthCheckTimeoutInSeconds(0)
.routingConfigs(EndpointConfigurationShadowProductionVariantRoutingConfigArgs.builder()
.routingStrategy("string")
.build())
.serverlessConfig(EndpointConfigurationShadowProductionVariantServerlessConfigArgs.builder()
.maxConcurrency(0)
.memorySizeInMb(0)
.provisionedConcurrency(0)
.build())
.variantName("string")
.volumeSizeInGb(0)
.build())
.tags(Map.of("string", "string"))
.build());
endpoint_configuration_resource = aws.sagemaker.EndpointConfiguration("endpointConfigurationResource",
production_variants=[{
"model_name": "string",
"initial_variant_weight": 0,
"managed_instance_scaling": {
"max_instance_count": 0,
"min_instance_count": 0,
"status": "string",
},
"enable_ssm_access": False,
"inference_ami_version": "string",
"initial_instance_count": 0,
"accelerator_type": "string",
"instance_type": "string",
"core_dump_config": {
"destination_s3_uri": "string",
"kms_key_id": "string",
},
"model_data_download_timeout_in_seconds": 0,
"container_startup_health_check_timeout_in_seconds": 0,
"routing_configs": [{
"routing_strategy": "string",
}],
"serverless_config": {
"max_concurrency": 0,
"memory_size_in_mb": 0,
"provisioned_concurrency": 0,
},
"variant_name": "string",
"volume_size_in_gb": 0,
}],
async_inference_config={
"output_config": {
"s3_output_path": "string",
"kms_key_id": "string",
"notification_config": {
"error_topic": "string",
"include_inference_response_ins": ["string"],
"success_topic": "string",
},
"s3_failure_path": "string",
},
"client_config": {
"max_concurrent_invocations_per_instance": 0,
},
},
data_capture_config={
"capture_options": [{
"capture_mode": "string",
}],
"destination_s3_uri": "string",
"initial_sampling_percentage": 0,
"capture_content_type_header": {
"csv_content_types": ["string"],
"json_content_types": ["string"],
},
"enable_capture": False,
"kms_key_id": "string",
},
kms_key_arn="string",
name="string",
name_prefix="string",
shadow_production_variants=[{
"model_name": "string",
"initial_variant_weight": 0,
"managed_instance_scaling": {
"max_instance_count": 0,
"min_instance_count": 0,
"status": "string",
},
"enable_ssm_access": False,
"inference_ami_version": "string",
"initial_instance_count": 0,
"accelerator_type": "string",
"instance_type": "string",
"core_dump_config": {
"destination_s3_uri": "string",
"kms_key_id": "string",
},
"model_data_download_timeout_in_seconds": 0,
"container_startup_health_check_timeout_in_seconds": 0,
"routing_configs": [{
"routing_strategy": "string",
}],
"serverless_config": {
"max_concurrency": 0,
"memory_size_in_mb": 0,
"provisioned_concurrency": 0,
},
"variant_name": "string",
"volume_size_in_gb": 0,
}],
tags={
"string": "string",
})
const endpointConfigurationResource = new aws.sagemaker.EndpointConfiguration("endpointConfigurationResource", {
productionVariants: [{
modelName: "string",
initialVariantWeight: 0,
managedInstanceScaling: {
maxInstanceCount: 0,
minInstanceCount: 0,
status: "string",
},
enableSsmAccess: false,
inferenceAmiVersion: "string",
initialInstanceCount: 0,
acceleratorType: "string",
instanceType: "string",
coreDumpConfig: {
destinationS3Uri: "string",
kmsKeyId: "string",
},
modelDataDownloadTimeoutInSeconds: 0,
containerStartupHealthCheckTimeoutInSeconds: 0,
routingConfigs: [{
routingStrategy: "string",
}],
serverlessConfig: {
maxConcurrency: 0,
memorySizeInMb: 0,
provisionedConcurrency: 0,
},
variantName: "string",
volumeSizeInGb: 0,
}],
asyncInferenceConfig: {
outputConfig: {
s3OutputPath: "string",
kmsKeyId: "string",
notificationConfig: {
errorTopic: "string",
includeInferenceResponseIns: ["string"],
successTopic: "string",
},
s3FailurePath: "string",
},
clientConfig: {
maxConcurrentInvocationsPerInstance: 0,
},
},
dataCaptureConfig: {
captureOptions: [{
captureMode: "string",
}],
destinationS3Uri: "string",
initialSamplingPercentage: 0,
captureContentTypeHeader: {
csvContentTypes: ["string"],
jsonContentTypes: ["string"],
},
enableCapture: false,
kmsKeyId: "string",
},
kmsKeyArn: "string",
name: "string",
namePrefix: "string",
shadowProductionVariants: [{
modelName: "string",
initialVariantWeight: 0,
managedInstanceScaling: {
maxInstanceCount: 0,
minInstanceCount: 0,
status: "string",
},
enableSsmAccess: false,
inferenceAmiVersion: "string",
initialInstanceCount: 0,
acceleratorType: "string",
instanceType: "string",
coreDumpConfig: {
destinationS3Uri: "string",
kmsKeyId: "string",
},
modelDataDownloadTimeoutInSeconds: 0,
containerStartupHealthCheckTimeoutInSeconds: 0,
routingConfigs: [{
routingStrategy: "string",
}],
serverlessConfig: {
maxConcurrency: 0,
memorySizeInMb: 0,
provisionedConcurrency: 0,
},
variantName: "string",
volumeSizeInGb: 0,
}],
tags: {
string: "string",
},
});
type: aws:sagemaker:EndpointConfiguration
properties:
asyncInferenceConfig:
clientConfig:
maxConcurrentInvocationsPerInstance: 0
outputConfig:
kmsKeyId: string
notificationConfig:
errorTopic: string
includeInferenceResponseIns:
- string
successTopic: string
s3FailurePath: string
s3OutputPath: string
dataCaptureConfig:
captureContentTypeHeader:
csvContentTypes:
- string
jsonContentTypes:
- string
captureOptions:
- captureMode: string
destinationS3Uri: string
enableCapture: false
initialSamplingPercentage: 0
kmsKeyId: string
kmsKeyArn: string
name: string
namePrefix: string
productionVariants:
- acceleratorType: string
containerStartupHealthCheckTimeoutInSeconds: 0
coreDumpConfig:
destinationS3Uri: string
kmsKeyId: string
enableSsmAccess: false
inferenceAmiVersion: string
initialInstanceCount: 0
initialVariantWeight: 0
instanceType: string
managedInstanceScaling:
maxInstanceCount: 0
minInstanceCount: 0
status: string
modelDataDownloadTimeoutInSeconds: 0
modelName: string
routingConfigs:
- routingStrategy: string
serverlessConfig:
maxConcurrency: 0
memorySizeInMb: 0
provisionedConcurrency: 0
variantName: string
volumeSizeInGb: 0
shadowProductionVariants:
- acceleratorType: string
containerStartupHealthCheckTimeoutInSeconds: 0
coreDumpConfig:
destinationS3Uri: string
kmsKeyId: string
enableSsmAccess: false
inferenceAmiVersion: string
initialInstanceCount: 0
initialVariantWeight: 0
instanceType: string
managedInstanceScaling:
maxInstanceCount: 0
minInstanceCount: 0
status: string
modelDataDownloadTimeoutInSeconds: 0
modelName: string
routingConfigs:
- routingStrategy: string
serverlessConfig:
maxConcurrency: 0
memorySizeInMb: 0
provisionedConcurrency: 0
variantName: string
volumeSizeInGb: 0
tags:
string: string
EndpointConfiguration Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The EndpointConfiguration resource accepts the following input properties:
- Production
Variants List<EndpointConfiguration Production Variant> - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- Async
Inference EndpointConfig Configuration Async Inference Config - Specifies configuration for how an endpoint performs asynchronous inference.
- Data
Capture EndpointConfig Configuration Data Capture Config - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- Kms
Key stringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- Name string
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - Name
Prefix string - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - Shadow
Production List<EndpointVariants Configuration Shadow Production Variant> - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- Dictionary<string, string>
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- Production
Variants []EndpointConfiguration Production Variant Args - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- Async
Inference EndpointConfig Configuration Async Inference Config Args - Specifies configuration for how an endpoint performs asynchronous inference.
- Data
Capture EndpointConfig Configuration Data Capture Config Args - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- Kms
Key stringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- Name string
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - Name
Prefix string - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - Shadow
Production []EndpointVariants Configuration Shadow Production Variant Args - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- map[string]string
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- production
Variants List<EndpointConfiguration Production Variant> - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- async
Inference EndpointConfig Configuration Async Inference Config - Specifies configuration for how an endpoint performs asynchronous inference.
- data
Capture EndpointConfig Configuration Data Capture Config - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- kms
Key StringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- name String
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - name
Prefix String - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - shadow
Production List<EndpointVariants Configuration Shadow Production Variant> - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- Map<String,String>
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- production
Variants EndpointConfiguration Production Variant[] - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- async
Inference EndpointConfig Configuration Async Inference Config - Specifies configuration for how an endpoint performs asynchronous inference.
- data
Capture EndpointConfig Configuration Data Capture Config - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- kms
Key stringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- name string
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - name
Prefix string - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - shadow
Production EndpointVariants Configuration Shadow Production Variant[] - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- {[key: string]: string}
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- production_
variants Sequence[EndpointConfiguration Production Variant Args] - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- async_
inference_ Endpointconfig Configuration Async Inference Config Args - Specifies configuration for how an endpoint performs asynchronous inference.
- data_
capture_ Endpointconfig Configuration Data Capture Config Args - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- kms_
key_ strarn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- name str
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - name_
prefix str - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - shadow_
production_ Sequence[Endpointvariants Configuration Shadow Production Variant Args] - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- Mapping[str, str]
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
- production
Variants List<Property Map> - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- async
Inference Property MapConfig - Specifies configuration for how an endpoint performs asynchronous inference.
- data
Capture Property MapConfig - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- kms
Key StringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- name String
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - name
Prefix String - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - shadow
Production List<Property Map>Variants - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- Map<String>
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level.
Outputs
All input properties are implicitly available as output properties. Additionally, the EndpointConfiguration resource produces the following output properties:
Look up Existing EndpointConfiguration Resource
Get an existing EndpointConfiguration resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: EndpointConfigurationState, opts?: CustomResourceOptions): EndpointConfiguration
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
arn: Optional[str] = None,
async_inference_config: Optional[EndpointConfigurationAsyncInferenceConfigArgs] = None,
data_capture_config: Optional[EndpointConfigurationDataCaptureConfigArgs] = None,
kms_key_arn: Optional[str] = None,
name: Optional[str] = None,
name_prefix: Optional[str] = None,
production_variants: Optional[Sequence[EndpointConfigurationProductionVariantArgs]] = None,
shadow_production_variants: Optional[Sequence[EndpointConfigurationShadowProductionVariantArgs]] = None,
tags: Optional[Mapping[str, str]] = None,
tags_all: Optional[Mapping[str, str]] = None) -> EndpointConfiguration
func GetEndpointConfiguration(ctx *Context, name string, id IDInput, state *EndpointConfigurationState, opts ...ResourceOption) (*EndpointConfiguration, error)
public static EndpointConfiguration Get(string name, Input<string> id, EndpointConfigurationState? state, CustomResourceOptions? opts = null)
public static EndpointConfiguration get(String name, Output<String> id, EndpointConfigurationState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Arn string
- The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
- Async
Inference EndpointConfig Configuration Async Inference Config - Specifies configuration for how an endpoint performs asynchronous inference.
- Data
Capture EndpointConfig Configuration Data Capture Config - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- Kms
Key stringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- Name string
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - Name
Prefix string - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - Production
Variants List<EndpointConfiguration Production Variant> - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- Shadow
Production List<EndpointVariants Configuration Shadow Production Variant> - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- Dictionary<string, string>
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level. - Dictionary<string, string>
- A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.
- Arn string
- The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
- Async
Inference EndpointConfig Configuration Async Inference Config Args - Specifies configuration for how an endpoint performs asynchronous inference.
- Data
Capture EndpointConfig Configuration Data Capture Config Args - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- Kms
Key stringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- Name string
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - Name
Prefix string - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - Production
Variants []EndpointConfiguration Production Variant Args - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- Shadow
Production []EndpointVariants Configuration Shadow Production Variant Args - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- map[string]string
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level. - map[string]string
- A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.
- arn String
- The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
- async
Inference EndpointConfig Configuration Async Inference Config - Specifies configuration for how an endpoint performs asynchronous inference.
- data
Capture EndpointConfig Configuration Data Capture Config - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- kms
Key StringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- name String
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - name
Prefix String - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - production
Variants List<EndpointConfiguration Production Variant> - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- shadow
Production List<EndpointVariants Configuration Shadow Production Variant> - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- Map<String,String>
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level. - Map<String,String>
- A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.
- arn string
- The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
- async
Inference EndpointConfig Configuration Async Inference Config - Specifies configuration for how an endpoint performs asynchronous inference.
- data
Capture EndpointConfig Configuration Data Capture Config - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- kms
Key stringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- name string
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - name
Prefix string - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - production
Variants EndpointConfiguration Production Variant[] - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- shadow
Production EndpointVariants Configuration Shadow Production Variant[] - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- {[key: string]: string}
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level. - {[key: string]: string}
- A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.
- arn str
- The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
- async_
inference_ Endpointconfig Configuration Async Inference Config Args - Specifies configuration for how an endpoint performs asynchronous inference.
- data_
capture_ Endpointconfig Configuration Data Capture Config Args - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- kms_
key_ strarn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- name str
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - name_
prefix str - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - production_
variants Sequence[EndpointConfiguration Production Variant Args] - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- shadow_
production_ Sequence[Endpointvariants Configuration Shadow Production Variant Args] - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- Mapping[str, str]
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level. - Mapping[str, str]
- A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.
- arn String
- The Amazon Resource Name (ARN) assigned by AWS to this endpoint configuration.
- async
Inference Property MapConfig - Specifies configuration for how an endpoint performs asynchronous inference.
- data
Capture Property MapConfig - Specifies the parameters to capture input/output of SageMaker models endpoints. Fields are documented below.
- kms
Key StringArn - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt data on the storage volume attached to the ML compute instance that hosts the endpoint.
- name String
- The name of the endpoint configuration. If omitted, this provider will assign a random, unique name. Conflicts with
name_prefix
. - name
Prefix String - Creates a unique endpoint configuration name beginning with the specified prefix. Conflicts with
name
. - production
Variants List<Property Map> - An list of ProductionVariant objects, one for each model that you want to host at this endpoint. Fields are documented below.
- shadow
Production List<Property Map>Variants - Array of ProductionVariant objects. There is one for each model that you want to host at this endpoint in shadow mode with production traffic replicated from the model specified on ProductionVariants. If you use this field, you can only specify one variant for ProductionVariants and one variant for ShadowProductionVariants. Fields are documented below.
- Map<String>
- A mapping of tags to assign to the resource. If configured with a provider
default_tags
configuration block present, tags with matching keys will overwrite those defined at the provider-level. - Map<String>
- A map of tags assigned to the resource, including those inherited from the provider
default_tags
configuration block.
Supporting Types
EndpointConfigurationAsyncInferenceConfig, EndpointConfigurationAsyncInferenceConfigArgs
- Output
Config EndpointConfiguration Async Inference Config Output Config - Specifies the configuration for asynchronous inference invocation outputs.
- Client
Config EndpointConfiguration Async Inference Config Client Config - Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
- Output
Config EndpointConfiguration Async Inference Config Output Config - Specifies the configuration for asynchronous inference invocation outputs.
- Client
Config EndpointConfiguration Async Inference Config Client Config - Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
- output
Config EndpointConfiguration Async Inference Config Output Config - Specifies the configuration for asynchronous inference invocation outputs.
- client
Config EndpointConfiguration Async Inference Config Client Config - Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
- output
Config EndpointConfiguration Async Inference Config Output Config - Specifies the configuration for asynchronous inference invocation outputs.
- client
Config EndpointConfiguration Async Inference Config Client Config - Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
- output_
config EndpointConfiguration Async Inference Config Output Config - Specifies the configuration for asynchronous inference invocation outputs.
- client_
config EndpointConfiguration Async Inference Config Client Config - Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
- output
Config Property Map - Specifies the configuration for asynchronous inference invocation outputs.
- client
Config Property Map - Configures the behavior of the client used by Amazon SageMaker to interact with the model container during asynchronous inference.
EndpointConfigurationAsyncInferenceConfigClientConfig, EndpointConfigurationAsyncInferenceConfigClientConfigArgs
- Max
Concurrent intInvocations Per Instance - The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
- Max
Concurrent intInvocations Per Instance - The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
- max
Concurrent IntegerInvocations Per Instance - The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
- max
Concurrent numberInvocations Per Instance - The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
- max_
concurrent_ intinvocations_ per_ instance - The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
- max
Concurrent NumberInvocations Per Instance - The maximum number of concurrent requests sent by the SageMaker client to the model container. If no value is provided, Amazon SageMaker will choose an optimal value for you.
EndpointConfigurationAsyncInferenceConfigOutputConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigArgs
- S3Output
Path string - The Amazon S3 location to upload inference responses to.
- Kms
Key stringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
- Notification
Config EndpointConfiguration Async Inference Config Output Config Notification Config - Specifies the configuration for notifications of inference results for asynchronous inference.
- S3Failure
Path string - The Amazon S3 location to upload failure inference responses to.
- S3Output
Path string - The Amazon S3 location to upload inference responses to.
- Kms
Key stringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
- Notification
Config EndpointConfiguration Async Inference Config Output Config Notification Config - Specifies the configuration for notifications of inference results for asynchronous inference.
- S3Failure
Path string - The Amazon S3 location to upload failure inference responses to.
- s3Output
Path String - The Amazon S3 location to upload inference responses to.
- kms
Key StringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
- notification
Config EndpointConfiguration Async Inference Config Output Config Notification Config - Specifies the configuration for notifications of inference results for asynchronous inference.
- s3Failure
Path String - The Amazon S3 location to upload failure inference responses to.
- s3Output
Path string - The Amazon S3 location to upload inference responses to.
- kms
Key stringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
- notification
Config EndpointConfiguration Async Inference Config Output Config Notification Config - Specifies the configuration for notifications of inference results for asynchronous inference.
- s3Failure
Path string - The Amazon S3 location to upload failure inference responses to.
- s3_
output_ strpath - The Amazon S3 location to upload inference responses to.
- kms_
key_ strid - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
- notification_
config EndpointConfiguration Async Inference Config Output Config Notification Config - Specifies the configuration for notifications of inference results for asynchronous inference.
- s3_
failure_ strpath - The Amazon S3 location to upload failure inference responses to.
- s3Output
Path String - The Amazon S3 location to upload inference responses to.
- kms
Key StringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that Amazon SageMaker uses to encrypt the asynchronous inference output in Amazon S3.
- notification
Config Property Map - Specifies the configuration for notifications of inference results for asynchronous inference.
- s3Failure
Path String - The Amazon S3 location to upload failure inference responses to.
EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfig, EndpointConfigurationAsyncInferenceConfigOutputConfigNotificationConfigArgs
- Error
Topic string - Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
- Include
Inference List<string>Response Ins - The Amazon SNS topics where you want the inference response to be included. Valid values are
SUCCESS_NOTIFICATION_TOPIC
andERROR_NOTIFICATION_TOPIC
. - Success
Topic string - Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
- Error
Topic string - Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
- Include
Inference []stringResponse Ins - The Amazon SNS topics where you want the inference response to be included. Valid values are
SUCCESS_NOTIFICATION_TOPIC
andERROR_NOTIFICATION_TOPIC
. - Success
Topic string - Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
- error
Topic String - Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
- include
Inference List<String>Response Ins - The Amazon SNS topics where you want the inference response to be included. Valid values are
SUCCESS_NOTIFICATION_TOPIC
andERROR_NOTIFICATION_TOPIC
. - success
Topic String - Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
- error
Topic string - Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
- include
Inference string[]Response Ins - The Amazon SNS topics where you want the inference response to be included. Valid values are
SUCCESS_NOTIFICATION_TOPIC
andERROR_NOTIFICATION_TOPIC
. - success
Topic string - Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
- error_
topic str - Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
- include_
inference_ Sequence[str]response_ ins - The Amazon SNS topics where you want the inference response to be included. Valid values are
SUCCESS_NOTIFICATION_TOPIC
andERROR_NOTIFICATION_TOPIC
. - success_
topic str - Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
- error
Topic String - Amazon SNS topic to post a notification to when inference fails. If no topic is provided, no notification is sent on failure.
- include
Inference List<String>Response Ins - The Amazon SNS topics where you want the inference response to be included. Valid values are
SUCCESS_NOTIFICATION_TOPIC
andERROR_NOTIFICATION_TOPIC
. - success
Topic String - Amazon SNS topic to post a notification to when inference completes successfully. If no topic is provided, no notification is sent on success.
EndpointConfigurationDataCaptureConfig, EndpointConfigurationDataCaptureConfigArgs
- Capture
Options List<EndpointConfiguration Data Capture Config Capture Option> - Specifies what data to capture. Fields are documented below.
- Destination
S3Uri string - The URL for S3 location where the captured data is stored.
- Initial
Sampling intPercentage - Portion of data to capture. Should be between 0 and 100.
- Capture
Content EndpointType Header Configuration Data Capture Config Capture Content Type Header - The content type headers to capture. Fields are documented below.
- Enable
Capture bool - Flag to enable data capture. Defaults to
false
. - Kms
Key stringId - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
- Capture
Options []EndpointConfiguration Data Capture Config Capture Option - Specifies what data to capture. Fields are documented below.
- Destination
S3Uri string - The URL for S3 location where the captured data is stored.
- Initial
Sampling intPercentage - Portion of data to capture. Should be between 0 and 100.
- Capture
Content EndpointType Header Configuration Data Capture Config Capture Content Type Header - The content type headers to capture. Fields are documented below.
- Enable
Capture bool - Flag to enable data capture. Defaults to
false
. - Kms
Key stringId - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
- capture
Options List<EndpointConfiguration Data Capture Config Capture Option> - Specifies what data to capture. Fields are documented below.
- destination
S3Uri String - The URL for S3 location where the captured data is stored.
- initial
Sampling IntegerPercentage - Portion of data to capture. Should be between 0 and 100.
- capture
Content EndpointType Header Configuration Data Capture Config Capture Content Type Header - The content type headers to capture. Fields are documented below.
- enable
Capture Boolean - Flag to enable data capture. Defaults to
false
. - kms
Key StringId - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
- capture
Options EndpointConfiguration Data Capture Config Capture Option[] - Specifies what data to capture. Fields are documented below.
- destination
S3Uri string - The URL for S3 location where the captured data is stored.
- initial
Sampling numberPercentage - Portion of data to capture. Should be between 0 and 100.
- capture
Content EndpointType Header Configuration Data Capture Config Capture Content Type Header - The content type headers to capture. Fields are documented below.
- enable
Capture boolean - Flag to enable data capture. Defaults to
false
. - kms
Key stringId - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
- capture_
options Sequence[EndpointConfiguration Data Capture Config Capture Option] - Specifies what data to capture. Fields are documented below.
- destination_
s3_ struri - The URL for S3 location where the captured data is stored.
- initial_
sampling_ intpercentage - Portion of data to capture. Should be between 0 and 100.
- capture_
content_ Endpointtype_ header Configuration Data Capture Config Capture Content Type Header - The content type headers to capture. Fields are documented below.
- enable_
capture bool - Flag to enable data capture. Defaults to
false
. - kms_
key_ strid - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
- capture
Options List<Property Map> - Specifies what data to capture. Fields are documented below.
- destination
S3Uri String - The URL for S3 location where the captured data is stored.
- initial
Sampling NumberPercentage - Portion of data to capture. Should be between 0 and 100.
- capture
Content Property MapType Header - The content type headers to capture. Fields are documented below.
- enable
Capture Boolean - Flag to enable data capture. Defaults to
false
. - kms
Key StringId - Amazon Resource Name (ARN) of a AWS Key Management Service key that Amazon SageMaker uses to encrypt the captured data on Amazon S3.
EndpointConfigurationDataCaptureConfigCaptureContentTypeHeader, EndpointConfigurationDataCaptureConfigCaptureContentTypeHeaderArgs
- Csv
Content List<string>Types - The CSV content type headers to capture.
- Json
Content List<string>Types - The JSON content type headers to capture.
- Csv
Content []stringTypes - The CSV content type headers to capture.
- Json
Content []stringTypes - The JSON content type headers to capture.
- csv
Content List<String>Types - The CSV content type headers to capture.
- json
Content List<String>Types - The JSON content type headers to capture.
- csv
Content string[]Types - The CSV content type headers to capture.
- json
Content string[]Types - The JSON content type headers to capture.
- csv_
content_ Sequence[str]types - The CSV content type headers to capture.
- json_
content_ Sequence[str]types - The JSON content type headers to capture.
- csv
Content List<String>Types - The CSV content type headers to capture.
- json
Content List<String>Types - The JSON content type headers to capture.
EndpointConfigurationDataCaptureConfigCaptureOption, EndpointConfigurationDataCaptureConfigCaptureOptionArgs
- Capture
Mode string - Specifies the data to be captured. Should be one of
Input
,Output
orInputAndOutput
.
- Capture
Mode string - Specifies the data to be captured. Should be one of
Input
,Output
orInputAndOutput
.
- capture
Mode String - Specifies the data to be captured. Should be one of
Input
,Output
orInputAndOutput
.
- capture
Mode string - Specifies the data to be captured. Should be one of
Input
,Output
orInputAndOutput
.
- capture_
mode str - Specifies the data to be captured. Should be one of
Input
,Output
orInputAndOutput
.
- capture
Mode String - Specifies the data to be captured. Should be one of
Input
,Output
orInputAndOutput
.
EndpointConfigurationProductionVariant, EndpointConfigurationProductionVariantArgs
- Model
Name string - The name of the model to use.
- Accelerator
Type string - The size of the Elastic Inference (EI) instance to use for the production variant.
- Container
Startup intHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - Core
Dump EndpointConfig Configuration Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- Enable
Ssm boolAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- Inference
Ami stringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- Initial
Instance intCount - Initial number of instances used for auto-scaling.
- Initial
Variant doubleWeight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - Instance
Type string - The type of instance to start.
- Managed
Instance EndpointScaling Configuration Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- Model
Data intDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - Routing
Configs List<EndpointConfiguration Production Variant Routing Config> - Sets how the endpoint routes incoming traffic. See routing_config below.
- Serverless
Config EndpointConfiguration Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- Variant
Name string - The name of the variant. If omitted, this provider will assign a random, unique name.
- Volume
Size intIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- Model
Name string - The name of the model to use.
- Accelerator
Type string - The size of the Elastic Inference (EI) instance to use for the production variant.
- Container
Startup intHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - Core
Dump EndpointConfig Configuration Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- Enable
Ssm boolAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- Inference
Ami stringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- Initial
Instance intCount - Initial number of instances used for auto-scaling.
- Initial
Variant float64Weight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - Instance
Type string - The type of instance to start.
- Managed
Instance EndpointScaling Configuration Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- Model
Data intDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - Routing
Configs []EndpointConfiguration Production Variant Routing Config - Sets how the endpoint routes incoming traffic. See routing_config below.
- Serverless
Config EndpointConfiguration Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- Variant
Name string - The name of the variant. If omitted, this provider will assign a random, unique name.
- Volume
Size intIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- model
Name String - The name of the model to use.
- accelerator
Type String - The size of the Elastic Inference (EI) instance to use for the production variant.
- container
Startup IntegerHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - core
Dump EndpointConfig Configuration Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- enable
Ssm BooleanAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- inference
Ami StringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- initial
Instance IntegerCount - Initial number of instances used for auto-scaling.
- initial
Variant DoubleWeight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - instance
Type String - The type of instance to start.
- managed
Instance EndpointScaling Configuration Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- model
Data IntegerDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - routing
Configs List<EndpointConfiguration Production Variant Routing Config> - Sets how the endpoint routes incoming traffic. See routing_config below.
- serverless
Config EndpointConfiguration Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- variant
Name String - The name of the variant. If omitted, this provider will assign a random, unique name.
- volume
Size IntegerIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- model
Name string - The name of the model to use.
- accelerator
Type string - The size of the Elastic Inference (EI) instance to use for the production variant.
- container
Startup numberHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - core
Dump EndpointConfig Configuration Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- enable
Ssm booleanAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- inference
Ami stringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- initial
Instance numberCount - Initial number of instances used for auto-scaling.
- initial
Variant numberWeight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - instance
Type string - The type of instance to start.
- managed
Instance EndpointScaling Configuration Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- model
Data numberDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - routing
Configs EndpointConfiguration Production Variant Routing Config[] - Sets how the endpoint routes incoming traffic. See routing_config below.
- serverless
Config EndpointConfiguration Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- variant
Name string - The name of the variant. If omitted, this provider will assign a random, unique name.
- volume
Size numberIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- model_
name str - The name of the model to use.
- accelerator_
type str - The size of the Elastic Inference (EI) instance to use for the production variant.
- container_
startup_ inthealth_ check_ timeout_ in_ seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - core_
dump_ Endpointconfig Configuration Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- enable_
ssm_ boolaccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- inference_
ami_ strversion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- initial_
instance_ intcount - Initial number of instances used for auto-scaling.
- initial_
variant_ floatweight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - instance_
type str - The type of instance to start.
- managed_
instance_ Endpointscaling Configuration Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- model_
data_ intdownload_ timeout_ in_ seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - routing_
configs Sequence[EndpointConfiguration Production Variant Routing Config] - Sets how the endpoint routes incoming traffic. See routing_config below.
- serverless_
config EndpointConfiguration Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- variant_
name str - The name of the variant. If omitted, this provider will assign a random, unique name.
- volume_
size_ intin_ gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- model
Name String - The name of the model to use.
- accelerator
Type String - The size of the Elastic Inference (EI) instance to use for the production variant.
- container
Startup NumberHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - core
Dump Property MapConfig - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- enable
Ssm BooleanAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- inference
Ami StringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- initial
Instance NumberCount - Initial number of instances used for auto-scaling.
- initial
Variant NumberWeight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - instance
Type String - The type of instance to start.
- managed
Instance Property MapScaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- model
Data NumberDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - routing
Configs List<Property Map> - Sets how the endpoint routes incoming traffic. See routing_config below.
- serverless
Config Property Map - Specifies configuration for how an endpoint performs asynchronous inference.
- variant
Name String - The name of the variant. If omitted, this provider will assign a random, unique name.
- volume
Size NumberIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
EndpointConfigurationProductionVariantCoreDumpConfig, EndpointConfigurationProductionVariantCoreDumpConfigArgs
- Destination
S3Uri string - The Amazon S3 bucket to send the core dump to.
- Kms
Key stringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- Destination
S3Uri string - The Amazon S3 bucket to send the core dump to.
- Kms
Key stringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- destination
S3Uri String - The Amazon S3 bucket to send the core dump to.
- kms
Key StringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- destination
S3Uri string - The Amazon S3 bucket to send the core dump to.
- kms
Key stringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- destination_
s3_ struri - The Amazon S3 bucket to send the core dump to.
- kms_
key_ strid - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- destination
S3Uri String - The Amazon S3 bucket to send the core dump to.
- kms
Key StringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
EndpointConfigurationProductionVariantManagedInstanceScaling, EndpointConfigurationProductionVariantManagedInstanceScalingArgs
- Max
Instance intCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- Min
Instance intCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- Status string
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- Max
Instance intCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- Min
Instance intCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- Status string
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- max
Instance IntegerCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- min
Instance IntegerCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- status String
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- max
Instance numberCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- min
Instance numberCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- status string
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- max_
instance_ intcount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- min_
instance_ intcount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- status str
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- max
Instance NumberCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- min
Instance NumberCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- status String
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
EndpointConfigurationProductionVariantRoutingConfig, EndpointConfigurationProductionVariantRoutingConfigArgs
- Routing
Strategy string - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- Routing
Strategy string - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- routing
Strategy String - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- routing
Strategy string - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- routing_
strategy str - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- routing
Strategy String - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
EndpointConfigurationProductionVariantServerlessConfig, EndpointConfigurationProductionVariantServerlessConfigArgs
- Max
Concurrency int - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - Memory
Size intIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - Provisioned
Concurrency int - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- Max
Concurrency int - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - Memory
Size intIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - Provisioned
Concurrency int - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- max
Concurrency Integer - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - memory
Size IntegerIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - provisioned
Concurrency Integer - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- max
Concurrency number - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - memory
Size numberIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - provisioned
Concurrency number - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- max_
concurrency int - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - memory_
size_ intin_ mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - provisioned_
concurrency int - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- max
Concurrency Number - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - memory
Size NumberIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - provisioned
Concurrency Number - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
EndpointConfigurationShadowProductionVariant, EndpointConfigurationShadowProductionVariantArgs
- Model
Name string - The name of the model to use.
- Accelerator
Type string - The size of the Elastic Inference (EI) instance to use for the production variant.
- Container
Startup intHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - Core
Dump EndpointConfig Configuration Shadow Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- Enable
Ssm boolAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- Inference
Ami stringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- Initial
Instance intCount - Initial number of instances used for auto-scaling.
- Initial
Variant doubleWeight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - Instance
Type string - The type of instance to start.
- Managed
Instance EndpointScaling Configuration Shadow Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- Model
Data intDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - Routing
Configs List<EndpointConfiguration Shadow Production Variant Routing Config> - Sets how the endpoint routes incoming traffic. See routing_config below.
- Serverless
Config EndpointConfiguration Shadow Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- Variant
Name string - The name of the variant. If omitted, this provider will assign a random, unique name.
- Volume
Size intIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- Model
Name string - The name of the model to use.
- Accelerator
Type string - The size of the Elastic Inference (EI) instance to use for the production variant.
- Container
Startup intHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - Core
Dump EndpointConfig Configuration Shadow Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- Enable
Ssm boolAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- Inference
Ami stringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- Initial
Instance intCount - Initial number of instances used for auto-scaling.
- Initial
Variant float64Weight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - Instance
Type string - The type of instance to start.
- Managed
Instance EndpointScaling Configuration Shadow Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- Model
Data intDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - Routing
Configs []EndpointConfiguration Shadow Production Variant Routing Config - Sets how the endpoint routes incoming traffic. See routing_config below.
- Serverless
Config EndpointConfiguration Shadow Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- Variant
Name string - The name of the variant. If omitted, this provider will assign a random, unique name.
- Volume
Size intIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- model
Name String - The name of the model to use.
- accelerator
Type String - The size of the Elastic Inference (EI) instance to use for the production variant.
- container
Startup IntegerHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - core
Dump EndpointConfig Configuration Shadow Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- enable
Ssm BooleanAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- inference
Ami StringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- initial
Instance IntegerCount - Initial number of instances used for auto-scaling.
- initial
Variant DoubleWeight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - instance
Type String - The type of instance to start.
- managed
Instance EndpointScaling Configuration Shadow Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- model
Data IntegerDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - routing
Configs List<EndpointConfiguration Shadow Production Variant Routing Config> - Sets how the endpoint routes incoming traffic. See routing_config below.
- serverless
Config EndpointConfiguration Shadow Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- variant
Name String - The name of the variant. If omitted, this provider will assign a random, unique name.
- volume
Size IntegerIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- model
Name string - The name of the model to use.
- accelerator
Type string - The size of the Elastic Inference (EI) instance to use for the production variant.
- container
Startup numberHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - core
Dump EndpointConfig Configuration Shadow Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- enable
Ssm booleanAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- inference
Ami stringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- initial
Instance numberCount - Initial number of instances used for auto-scaling.
- initial
Variant numberWeight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - instance
Type string - The type of instance to start.
- managed
Instance EndpointScaling Configuration Shadow Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- model
Data numberDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - routing
Configs EndpointConfiguration Shadow Production Variant Routing Config[] - Sets how the endpoint routes incoming traffic. See routing_config below.
- serverless
Config EndpointConfiguration Shadow Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- variant
Name string - The name of the variant. If omitted, this provider will assign a random, unique name.
- volume
Size numberIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- model_
name str - The name of the model to use.
- accelerator_
type str - The size of the Elastic Inference (EI) instance to use for the production variant.
- container_
startup_ inthealth_ check_ timeout_ in_ seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - core_
dump_ Endpointconfig Configuration Shadow Production Variant Core Dump Config - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- enable_
ssm_ boolaccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- inference_
ami_ strversion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- initial_
instance_ intcount - Initial number of instances used for auto-scaling.
- initial_
variant_ floatweight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - instance_
type str - The type of instance to start.
- managed_
instance_ Endpointscaling Configuration Shadow Production Variant Managed Instance Scaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- model_
data_ intdownload_ timeout_ in_ seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - routing_
configs Sequence[EndpointConfiguration Shadow Production Variant Routing Config] - Sets how the endpoint routes incoming traffic. See routing_config below.
- serverless_
config EndpointConfiguration Shadow Production Variant Serverless Config - Specifies configuration for how an endpoint performs asynchronous inference.
- variant_
name str - The name of the variant. If omitted, this provider will assign a random, unique name.
- volume_
size_ intin_ gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
- model
Name String - The name of the model to use.
- accelerator
Type String - The size of the Elastic Inference (EI) instance to use for the production variant.
- container
Startup NumberHealth Check Timeout In Seconds - The timeout value, in seconds, for your inference container to pass health check by SageMaker Hosting. For more information about health check, see How Your Container Should Respond to Health Check (Ping) Requests. Valid values between
60
and3600
. - core
Dump Property MapConfig - Specifies configuration for a core dump from the model container when the process crashes. Fields are documented below.
- enable
Ssm BooleanAccess - You can use this parameter to turn on native Amazon Web Services Systems Manager (SSM) access for a production variant behind an endpoint. By default, SSM access is disabled for all production variants behind an endpoints.
- inference
Ami StringVersion - Specifies an option from a collection of preconfigured Amazon Machine Image (AMI) images. Each image is configured by Amazon Web Services with a set of software and driver versions. Amazon Web Services optimizes these configurations for different machine learning workloads.
- initial
Instance NumberCount - Initial number of instances used for auto-scaling.
- initial
Variant NumberWeight - Determines initial traffic distribution among all of the models that you specify in the endpoint configuration. If unspecified, it defaults to
1.0
. - instance
Type String - The type of instance to start.
- managed
Instance Property MapScaling - Settings that control the range in the number of instances that the endpoint provisions as it scales up or down to accommodate traffic.
- model
Data NumberDownload Timeout In Seconds - The timeout value, in seconds, to download and extract the model that you want to host from Amazon S3 to the individual inference instance associated with this production variant. Valid values between
60
and3600
. - routing
Configs List<Property Map> - Sets how the endpoint routes incoming traffic. See routing_config below.
- serverless
Config Property Map - Specifies configuration for how an endpoint performs asynchronous inference.
- variant
Name String - The name of the variant. If omitted, this provider will assign a random, unique name.
- volume
Size NumberIn Gb - The size, in GB, of the ML storage volume attached to individual inference instance associated with the production variant. Valid values between
1
and512
.
EndpointConfigurationShadowProductionVariantCoreDumpConfig, EndpointConfigurationShadowProductionVariantCoreDumpConfigArgs
- Destination
S3Uri string - The Amazon S3 bucket to send the core dump to.
- Kms
Key stringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- Destination
S3Uri string - The Amazon S3 bucket to send the core dump to.
- Kms
Key stringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- destination
S3Uri String - The Amazon S3 bucket to send the core dump to.
- kms
Key StringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- destination
S3Uri string - The Amazon S3 bucket to send the core dump to.
- kms
Key stringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- destination_
s3_ struri - The Amazon S3 bucket to send the core dump to.
- kms_
key_ strid - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
- destination
S3Uri String - The Amazon S3 bucket to send the core dump to.
- kms
Key StringId - The Amazon Web Services Key Management Service (Amazon Web Services KMS) key that SageMaker uses to encrypt the core dump data at rest using Amazon S3 server-side encryption.
EndpointConfigurationShadowProductionVariantManagedInstanceScaling, EndpointConfigurationShadowProductionVariantManagedInstanceScalingArgs
- Max
Instance intCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- Min
Instance intCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- Status string
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- Max
Instance intCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- Min
Instance intCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- Status string
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- max
Instance IntegerCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- min
Instance IntegerCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- status String
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- max
Instance numberCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- min
Instance numberCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- status string
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- max_
instance_ intcount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- min_
instance_ intcount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- status str
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
- max
Instance NumberCount - The maximum number of instances that the endpoint can provision when it scales up to accommodate an increase in traffic.
- min
Instance NumberCount - The minimum number of instances that the endpoint must retain when it scales down to accommodate a decrease in traffic.
- status String
- Indicates whether managed instance scaling is enabled. Valid values are
ENABLED
andDISABLED
.
EndpointConfigurationShadowProductionVariantRoutingConfig, EndpointConfigurationShadowProductionVariantRoutingConfigArgs
- Routing
Strategy string - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- Routing
Strategy string - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- routing
Strategy String - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- routing
Strategy string - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- routing_
strategy str - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
- routing
Strategy String - Sets how the endpoint routes incoming traffic. Valid values are
LEAST_OUTSTANDING_REQUESTS
andRANDOM
.LEAST_OUTSTANDING_REQUESTS
routes requests to the specific instances that have more capacity to process them.RANDOM
routes each request to a randomly chosen instance.
EndpointConfigurationShadowProductionVariantServerlessConfig, EndpointConfigurationShadowProductionVariantServerlessConfigArgs
- Max
Concurrency int - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - Memory
Size intIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - Provisioned
Concurrency int - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- Max
Concurrency int - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - Memory
Size intIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - Provisioned
Concurrency int - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- max
Concurrency Integer - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - memory
Size IntegerIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - provisioned
Concurrency Integer - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- max
Concurrency number - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - memory
Size numberIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - provisioned
Concurrency number - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- max_
concurrency int - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - memory_
size_ intin_ mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - provisioned_
concurrency int - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
- max
Concurrency Number - The maximum number of concurrent invocations your serverless endpoint can process. Valid values are between
1
and200
. - memory
Size NumberIn Mb - The memory size of your serverless endpoint. Valid values are in 1 GB increments:
1024
MB,2048
MB,3072
MB,4096
MB,5120
MB, or6144
MB. - provisioned
Concurrency Number - The amount of provisioned concurrency to allocate for the serverless endpoint. Should be less than or equal to
max_concurrency
. Valid values are between1
and200
.
Import
Using pulumi import
, import endpoint configurations using the name
. For example:
$ pulumi import aws:sagemaker/endpointConfiguration:EndpointConfiguration test_endpoint_config endpoint-config-foo
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- AWS Classic pulumi/pulumi-aws
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
aws
Terraform Provider.