1. Packages
  2. Yandex
  3. API Docs
  4. DataprocCluster
Yandex v0.13.0 published on Tuesday, Feb 22, 2022 by Pulumi

yandex.DataprocCluster

Explore with Pulumi AI

yandex logo
Yandex v0.13.0 published on Tuesday, Feb 22, 2022 by Pulumi

    Manages a Data Proc cluster. For more information, see the official documentation.

    Example Usage

    using System.IO;
    using Pulumi;
    using Yandex = Pulumi.Yandex;
    
    class MyStack : Stack
    {
        public MyStack()
        {
            var fooVpcNetwork = new Yandex.VpcNetwork("fooVpcNetwork", new Yandex.VpcNetworkArgs
            {
            });
            var fooVpcSubnet = new Yandex.VpcSubnet("fooVpcSubnet", new Yandex.VpcSubnetArgs
            {
                Zone = "ru-central1-b",
                NetworkId = fooVpcNetwork.Id,
                V4CidrBlocks = 
                {
                    "10.1.0.0/24",
                },
            });
            var dataprocIamServiceAccount = new Yandex.IamServiceAccount("dataprocIamServiceAccount", new Yandex.IamServiceAccountArgs
            {
                Description = "service account to manage Dataproc Cluster",
            });
            var fooResourcemanagerFolder = Output.Create(Yandex.GetResourcemanagerFolder.InvokeAsync(new Yandex.GetResourcemanagerFolderArgs
            {
                FolderId = "some_folder_id",
            }));
            var dataprocResourcemanagerFolderIamBinding = new Yandex.ResourcemanagerFolderIamBinding("dataprocResourcemanagerFolderIamBinding", new Yandex.ResourcemanagerFolderIamBindingArgs
            {
                FolderId = fooResourcemanagerFolder.Apply(fooResourcemanagerFolder => fooResourcemanagerFolder.Id),
                Role = "mdb.dataproc.agent",
                Members = 
                {
                    dataprocIamServiceAccount.Id.Apply(id => $"serviceAccount:{id}"),
                },
            });
            // required in order to create bucket
            var bucket_creator = new Yandex.ResourcemanagerFolderIamBinding("bucket-creator", new Yandex.ResourcemanagerFolderIamBindingArgs
            {
                FolderId = fooResourcemanagerFolder.Apply(fooResourcemanagerFolder => fooResourcemanagerFolder.Id),
                Role = "editor",
                Members = 
                {
                    dataprocIamServiceAccount.Id.Apply(id => $"serviceAccount:{id}"),
                },
            });
            var fooIamServiceAccountStaticAccessKey = new Yandex.IamServiceAccountStaticAccessKey("fooIamServiceAccountStaticAccessKey", new Yandex.IamServiceAccountStaticAccessKeyArgs
            {
                ServiceAccountId = dataprocIamServiceAccount.Id,
            });
            var fooStorageBucket = new Yandex.StorageBucket("fooStorageBucket", new Yandex.StorageBucketArgs
            {
                Bucket = "foo",
                AccessKey = fooIamServiceAccountStaticAccessKey.AccessKey,
                SecretKey = fooIamServiceAccountStaticAccessKey.SecretKey,
            }, new CustomResourceOptions
            {
                DependsOn = 
                {
                    bucket_creator,
                },
            });
            var fooDataprocCluster = new Yandex.DataprocCluster("fooDataprocCluster", new Yandex.DataprocClusterArgs
            {
                Bucket = fooStorageBucket.Bucket,
                Description = "Dataproc Cluster created by Terraform",
                Labels = 
                {
                    { "created_by", "terraform" },
                },
                ServiceAccountId = dataprocIamServiceAccount.Id,
                ZoneId = "ru-central1-b",
                ClusterConfig = new Yandex.Inputs.DataprocClusterClusterConfigArgs
                {
                    Hadoop = new Yandex.Inputs.DataprocClusterClusterConfigHadoopArgs
                    {
                        Services = 
                        {
                            "HDFS",
                            "YARN",
                            "SPARK",
                            "TEZ",
                            "MAPREDUCE",
                            "HIVE",
                        },
                        Properties = 
                        {
                            { "yarn:yarn.resourcemanager.am.max-attempts", "5" },
                        },
                        SshPublicKeys = 
                        {
                            File.ReadAllText("~/.ssh/id_rsa.pub"),
                        },
                    },
                    SubclusterSpecs = 
                    {
                        new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecArgs
                        {
                            Name = "main",
                            Role = "MASTERNODE",
                            Resources = new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecResourcesArgs
                            {
                                ResourcePresetId = "s2.small",
                                DiskTypeId = "network-hdd",
                                DiskSize = 20,
                            },
                            SubnetId = fooVpcSubnet.Id,
                            HostsCount = 1,
                        },
                        new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecArgs
                        {
                            Name = "data",
                            Role = "DATANODE",
                            Resources = new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecResourcesArgs
                            {
                                ResourcePresetId = "s2.small",
                                DiskTypeId = "network-hdd",
                                DiskSize = 20,
                            },
                            SubnetId = fooVpcSubnet.Id,
                            HostsCount = 2,
                        },
                        new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecArgs
                        {
                            Name = "compute",
                            Role = "COMPUTENODE",
                            Resources = new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecResourcesArgs
                            {
                                ResourcePresetId = "s2.small",
                                DiskTypeId = "network-hdd",
                                DiskSize = 20,
                            },
                            SubnetId = fooVpcSubnet.Id,
                            HostsCount = 2,
                        },
                        new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecArgs
                        {
                            Name = "compute_autoscaling",
                            Role = "COMPUTENODE",
                            Resources = new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecResourcesArgs
                            {
                                ResourcePresetId = "s2.small",
                                DiskTypeId = "network-hdd",
                                DiskSize = 20,
                            },
                            SubnetId = fooVpcSubnet.Id,
                            HostsCount = 2,
                            AutoscalingConfig = new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecAutoscalingConfigArgs
                            {
                                MaxHostsCount = 10,
                                MeasurementDuration = 60,
                                WarmupDuration = 60,
                                StabilizationDuration = 120,
                                Preemptible = false,
                                DecommissionTimeout = 60,
                            },
                        },
                    },
                },
            }, new CustomResourceOptions
            {
                DependsOn = 
                {
                    dataprocResourcemanagerFolderIamBinding,
                },
            });
        }
    
    }
    
    package main
    
    import (
    	"fmt"
    	"io/ioutil"
    
    	"github.com/pulumi/pulumi-yandex/sdk/go/yandex"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func readFileOrPanic(path string) pulumi.StringPtrInput {
    	data, err := ioutil.ReadFile(path)
    	if err != nil {
    		panic(err.Error())
    	}
    	return pulumi.String(string(data))
    }
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		fooVpcNetwork, err := yandex.NewVpcNetwork(ctx, "fooVpcNetwork", nil)
    		if err != nil {
    			return err
    		}
    		fooVpcSubnet, err := yandex.NewVpcSubnet(ctx, "fooVpcSubnet", &yandex.VpcSubnetArgs{
    			Zone:      pulumi.String("ru-central1-b"),
    			NetworkId: fooVpcNetwork.ID(),
    			V4CidrBlocks: pulumi.StringArray{
    				pulumi.String("10.1.0.0/24"),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		dataprocIamServiceAccount, err := yandex.NewIamServiceAccount(ctx, "dataprocIamServiceAccount", &yandex.IamServiceAccountArgs{
    			Description: pulumi.String("service account to manage Dataproc Cluster"),
    		})
    		if err != nil {
    			return err
    		}
    		opt0 := "some_folder_id"
    		fooResourcemanagerFolder, err := yandex.LookupResourcemanagerFolder(ctx, &GetResourcemanagerFolderArgs{
    			FolderId: &opt0,
    		}, nil)
    		if err != nil {
    			return err
    		}
    		dataprocResourcemanagerFolderIamBinding, err := yandex.NewResourcemanagerFolderIamBinding(ctx, "dataprocResourcemanagerFolderIamBinding", &yandex.ResourcemanagerFolderIamBindingArgs{
    			FolderId: pulumi.String(fooResourcemanagerFolder.Id),
    			Role:     pulumi.String("mdb.dataproc.agent"),
    			Members: pulumi.StringArray{
    				dataprocIamServiceAccount.ID().ApplyT(func(id string) (string, error) {
    					return fmt.Sprintf("%v%v", "serviceAccount:", id), nil
    				}).(pulumi.StringOutput),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		_, err = yandex.NewResourcemanagerFolderIamBinding(ctx, "bucket-creator", &yandex.ResourcemanagerFolderIamBindingArgs{
    			FolderId: pulumi.String(fooResourcemanagerFolder.Id),
    			Role:     pulumi.String("editor"),
    			Members: pulumi.StringArray{
    				dataprocIamServiceAccount.ID().ApplyT(func(id string) (string, error) {
    					return fmt.Sprintf("%v%v", "serviceAccount:", id), nil
    				}).(pulumi.StringOutput),
    			},
    		})
    		if err != nil {
    			return err
    		}
    		fooIamServiceAccountStaticAccessKey, err := yandex.NewIamServiceAccountStaticAccessKey(ctx, "fooIamServiceAccountStaticAccessKey", &yandex.IamServiceAccountStaticAccessKeyArgs{
    			ServiceAccountId: dataprocIamServiceAccount.ID(),
    		})
    		if err != nil {
    			return err
    		}
    		fooStorageBucket, err := yandex.NewStorageBucket(ctx, "fooStorageBucket", &yandex.StorageBucketArgs{
    			Bucket:    pulumi.String("foo"),
    			AccessKey: fooIamServiceAccountStaticAccessKey.AccessKey,
    			SecretKey: fooIamServiceAccountStaticAccessKey.SecretKey,
    		}, pulumi.DependsOn([]pulumi.Resource{
    			bucket_creator,
    		}))
    		if err != nil {
    			return err
    		}
    		_, err = yandex.NewDataprocCluster(ctx, "fooDataprocCluster", &yandex.DataprocClusterArgs{
    			Bucket:      fooStorageBucket.Bucket,
    			Description: pulumi.String("Dataproc Cluster created by Terraform"),
    			Labels: pulumi.StringMap{
    				"created_by": pulumi.String("terraform"),
    			},
    			ServiceAccountId: dataprocIamServiceAccount.ID(),
    			ZoneId:           pulumi.String("ru-central1-b"),
    			ClusterConfig: &DataprocClusterClusterConfigArgs{
    				Hadoop: &DataprocClusterClusterConfigHadoopArgs{
    					Services: pulumi.StringArray{
    						pulumi.String("HDFS"),
    						pulumi.String("YARN"),
    						pulumi.String("SPARK"),
    						pulumi.String("TEZ"),
    						pulumi.String("MAPREDUCE"),
    						pulumi.String("HIVE"),
    					},
    					Properties: pulumi.StringMap{
    						"yarn:yarn.resourcemanager.am.max-attempts": pulumi.String("5"),
    					},
    					SshPublicKeys: pulumi.StringArray{
    						readFileOrPanic("~/.ssh/id_rsa.pub"),
    					},
    				},
    				SubclusterSpecs: DataprocClusterClusterConfigSubclusterSpecArray{
    					&DataprocClusterClusterConfigSubclusterSpecArgs{
    						Name: pulumi.String("main"),
    						Role: pulumi.String("MASTERNODE"),
    						Resources: &DataprocClusterClusterConfigSubclusterSpecResourcesArgs{
    							ResourcePresetId: pulumi.String("s2.small"),
    							DiskTypeId:       pulumi.String("network-hdd"),
    							DiskSize:         pulumi.Int(20),
    						},
    						SubnetId:   fooVpcSubnet.ID(),
    						HostsCount: pulumi.Int(1),
    					},
    					&DataprocClusterClusterConfigSubclusterSpecArgs{
    						Name: pulumi.String("data"),
    						Role: pulumi.String("DATANODE"),
    						Resources: &DataprocClusterClusterConfigSubclusterSpecResourcesArgs{
    							ResourcePresetId: pulumi.String("s2.small"),
    							DiskTypeId:       pulumi.String("network-hdd"),
    							DiskSize:         pulumi.Int(20),
    						},
    						SubnetId:   fooVpcSubnet.ID(),
    						HostsCount: pulumi.Int(2),
    					},
    					&DataprocClusterClusterConfigSubclusterSpecArgs{
    						Name: pulumi.String("compute"),
    						Role: pulumi.String("COMPUTENODE"),
    						Resources: &DataprocClusterClusterConfigSubclusterSpecResourcesArgs{
    							ResourcePresetId: pulumi.String("s2.small"),
    							DiskTypeId:       pulumi.String("network-hdd"),
    							DiskSize:         pulumi.Int(20),
    						},
    						SubnetId:   fooVpcSubnet.ID(),
    						HostsCount: pulumi.Int(2),
    					},
    					&DataprocClusterClusterConfigSubclusterSpecArgs{
    						Name: pulumi.String("compute_autoscaling"),
    						Role: pulumi.String("COMPUTENODE"),
    						Resources: &DataprocClusterClusterConfigSubclusterSpecResourcesArgs{
    							ResourcePresetId: pulumi.String("s2.small"),
    							DiskTypeId:       pulumi.String("network-hdd"),
    							DiskSize:         pulumi.Int(20),
    						},
    						SubnetId:   fooVpcSubnet.ID(),
    						HostsCount: pulumi.Int(2),
    						AutoscalingConfig: &DataprocClusterClusterConfigSubclusterSpecAutoscalingConfigArgs{
    							MaxHostsCount:         pulumi.Int(10),
    							MeasurementDuration:   pulumi.Int(60),
    							WarmupDuration:        pulumi.Int(60),
    							StabilizationDuration: pulumi.Int(120),
    							Preemptible:           pulumi.Bool(false),
    							DecommissionTimeout:   pulumi.Int(60),
    						},
    					},
    				},
    			},
    		}, pulumi.DependsOn([]pulumi.Resource{
    			dataprocResourcemanagerFolderIamBinding,
    		}))
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    

    Coming soon!

    import pulumi
    import pulumi_yandex as yandex
    
    foo_vpc_network = yandex.VpcNetwork("fooVpcNetwork")
    foo_vpc_subnet = yandex.VpcSubnet("fooVpcSubnet",
        zone="ru-central1-b",
        network_id=foo_vpc_network.id,
        v4_cidr_blocks=["10.1.0.0/24"])
    dataproc_iam_service_account = yandex.IamServiceAccount("dataprocIamServiceAccount", description="service account to manage Dataproc Cluster")
    foo_resourcemanager_folder = yandex.get_resourcemanager_folder(folder_id="some_folder_id")
    dataproc_resourcemanager_folder_iam_binding = yandex.ResourcemanagerFolderIamBinding("dataprocResourcemanagerFolderIamBinding",
        folder_id=foo_resourcemanager_folder.id,
        role="mdb.dataproc.agent",
        members=[dataproc_iam_service_account.id.apply(lambda id: f"serviceAccount:{id}")])
    # required in order to create bucket
    bucket_creator = yandex.ResourcemanagerFolderIamBinding("bucket-creator",
        folder_id=foo_resourcemanager_folder.id,
        role="editor",
        members=[dataproc_iam_service_account.id.apply(lambda id: f"serviceAccount:{id}")])
    foo_iam_service_account_static_access_key = yandex.IamServiceAccountStaticAccessKey("fooIamServiceAccountStaticAccessKey", service_account_id=dataproc_iam_service_account.id)
    foo_storage_bucket = yandex.StorageBucket("fooStorageBucket",
        bucket="foo",
        access_key=foo_iam_service_account_static_access_key.access_key,
        secret_key=foo_iam_service_account_static_access_key.secret_key,
        opts=pulumi.ResourceOptions(depends_on=[bucket_creator]))
    foo_dataproc_cluster = yandex.DataprocCluster("fooDataprocCluster",
        bucket=foo_storage_bucket.bucket,
        description="Dataproc Cluster created by Terraform",
        labels={
            "created_by": "terraform",
        },
        service_account_id=dataproc_iam_service_account.id,
        zone_id="ru-central1-b",
        cluster_config=yandex.DataprocClusterClusterConfigArgs(
            hadoop=yandex.DataprocClusterClusterConfigHadoopArgs(
                services=[
                    "HDFS",
                    "YARN",
                    "SPARK",
                    "TEZ",
                    "MAPREDUCE",
                    "HIVE",
                ],
                properties={
                    "yarn:yarn.resourcemanager.am.max-attempts": "5",
                },
                ssh_public_keys=[(lambda path: open(path).read())("~/.ssh/id_rsa.pub")],
            ),
            subcluster_specs=[
                yandex.DataprocClusterClusterConfigSubclusterSpecArgs(
                    name="main",
                    role="MASTERNODE",
                    resources=yandex.DataprocClusterClusterConfigSubclusterSpecResourcesArgs(
                        resource_preset_id="s2.small",
                        disk_type_id="network-hdd",
                        disk_size=20,
                    ),
                    subnet_id=foo_vpc_subnet.id,
                    hosts_count=1,
                ),
                yandex.DataprocClusterClusterConfigSubclusterSpecArgs(
                    name="data",
                    role="DATANODE",
                    resources=yandex.DataprocClusterClusterConfigSubclusterSpecResourcesArgs(
                        resource_preset_id="s2.small",
                        disk_type_id="network-hdd",
                        disk_size=20,
                    ),
                    subnet_id=foo_vpc_subnet.id,
                    hosts_count=2,
                ),
                yandex.DataprocClusterClusterConfigSubclusterSpecArgs(
                    name="compute",
                    role="COMPUTENODE",
                    resources=yandex.DataprocClusterClusterConfigSubclusterSpecResourcesArgs(
                        resource_preset_id="s2.small",
                        disk_type_id="network-hdd",
                        disk_size=20,
                    ),
                    subnet_id=foo_vpc_subnet.id,
                    hosts_count=2,
                ),
                yandex.DataprocClusterClusterConfigSubclusterSpecArgs(
                    name="compute_autoscaling",
                    role="COMPUTENODE",
                    resources=yandex.DataprocClusterClusterConfigSubclusterSpecResourcesArgs(
                        resource_preset_id="s2.small",
                        disk_type_id="network-hdd",
                        disk_size=20,
                    ),
                    subnet_id=foo_vpc_subnet.id,
                    hosts_count=2,
                    autoscaling_config=yandex.DataprocClusterClusterConfigSubclusterSpecAutoscalingConfigArgs(
                        max_hosts_count=10,
                        measurement_duration=60,
                        warmup_duration=60,
                        stabilization_duration=120,
                        preemptible=False,
                        decommission_timeout=60,
                    ),
                ),
            ],
        ),
        opts=pulumi.ResourceOptions(depends_on=[dataproc_resourcemanager_folder_iam_binding]))
    
    import * as pulumi from "@pulumi/pulumi";
    import * as yandex from "@pulumi/yandex";
    import * from "fs";
    
    const fooVpcNetwork = new yandex.VpcNetwork("fooVpcNetwork", {});
    const fooVpcSubnet = new yandex.VpcSubnet("fooVpcSubnet", {
        zone: "ru-central1-b",
        networkId: fooVpcNetwork.id,
        v4CidrBlocks: ["10.1.0.0/24"],
    });
    const dataprocIamServiceAccount = new yandex.IamServiceAccount("dataprocIamServiceAccount", {description: "service account to manage Dataproc Cluster"});
    const fooResourcemanagerFolder = yandex.getResourcemanagerFolder({
        folderId: "some_folder_id",
    });
    const dataprocResourcemanagerFolderIamBinding = new yandex.ResourcemanagerFolderIamBinding("dataprocResourcemanagerFolderIamBinding", {
        folderId: fooResourcemanagerFolder.then(fooResourcemanagerFolder => fooResourcemanagerFolder.id),
        role: "mdb.dataproc.agent",
        members: [pulumi.interpolate`serviceAccount:${dataprocIamServiceAccount.id}`],
    });
    // required in order to create bucket
    const bucket_creator = new yandex.ResourcemanagerFolderIamBinding("bucket-creator", {
        folderId: fooResourcemanagerFolder.then(fooResourcemanagerFolder => fooResourcemanagerFolder.id),
        role: "editor",
        members: [pulumi.interpolate`serviceAccount:${dataprocIamServiceAccount.id}`],
    });
    const fooIamServiceAccountStaticAccessKey = new yandex.IamServiceAccountStaticAccessKey("fooIamServiceAccountStaticAccessKey", {serviceAccountId: dataprocIamServiceAccount.id});
    const fooStorageBucket = new yandex.StorageBucket("fooStorageBucket", {
        bucket: "foo",
        accessKey: fooIamServiceAccountStaticAccessKey.accessKey,
        secretKey: fooIamServiceAccountStaticAccessKey.secretKey,
    }, {
        dependsOn: [bucket_creator],
    });
    const fooDataprocCluster = new yandex.DataprocCluster("fooDataprocCluster", {
        bucket: fooStorageBucket.bucket,
        description: "Dataproc Cluster created by Terraform",
        labels: {
            created_by: "terraform",
        },
        serviceAccountId: dataprocIamServiceAccount.id,
        zoneId: "ru-central1-b",
        clusterConfig: {
            hadoop: {
                services: [
                    "HDFS",
                    "YARN",
                    "SPARK",
                    "TEZ",
                    "MAPREDUCE",
                    "HIVE",
                ],
                properties: {
                    "yarn:yarn.resourcemanager.am.max-attempts": 5,
                },
                sshPublicKeys: [fs.readFileSync("~/.ssh/id_rsa.pub")],
            },
            subclusterSpecs: [
                {
                    name: "main",
                    role: "MASTERNODE",
                    resources: {
                        resourcePresetId: "s2.small",
                        diskTypeId: "network-hdd",
                        diskSize: 20,
                    },
                    subnetId: fooVpcSubnet.id,
                    hostsCount: 1,
                },
                {
                    name: "data",
                    role: "DATANODE",
                    resources: {
                        resourcePresetId: "s2.small",
                        diskTypeId: "network-hdd",
                        diskSize: 20,
                    },
                    subnetId: fooVpcSubnet.id,
                    hostsCount: 2,
                },
                {
                    name: "compute",
                    role: "COMPUTENODE",
                    resources: {
                        resourcePresetId: "s2.small",
                        diskTypeId: "network-hdd",
                        diskSize: 20,
                    },
                    subnetId: fooVpcSubnet.id,
                    hostsCount: 2,
                },
                {
                    name: "compute_autoscaling",
                    role: "COMPUTENODE",
                    resources: {
                        resourcePresetId: "s2.small",
                        diskTypeId: "network-hdd",
                        diskSize: 20,
                    },
                    subnetId: fooVpcSubnet.id,
                    hostsCount: 2,
                    autoscalingConfig: {
                        maxHostsCount: 10,
                        measurementDuration: 60,
                        warmupDuration: 60,
                        stabilizationDuration: 120,
                        preemptible: false,
                        decommissionTimeout: 60,
                    },
                },
            ],
        },
    }, {
        dependsOn: [dataprocResourcemanagerFolderIamBinding],
    });
    

    Coming soon!

    Create DataprocCluster Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new DataprocCluster(name: string, args: DataprocClusterArgs, opts?: CustomResourceOptions);
    @overload
    def DataprocCluster(resource_name: str,
                        args: DataprocClusterArgs,
                        opts: Optional[ResourceOptions] = None)
    
    @overload
    def DataprocCluster(resource_name: str,
                        opts: Optional[ResourceOptions] = None,
                        cluster_config: Optional[DataprocClusterClusterConfigArgs] = None,
                        service_account_id: Optional[str] = None,
                        bucket: Optional[str] = None,
                        deletion_protection: Optional[bool] = None,
                        description: Optional[str] = None,
                        folder_id: Optional[str] = None,
                        host_group_ids: Optional[Sequence[str]] = None,
                        labels: Optional[Mapping[str, str]] = None,
                        name: Optional[str] = None,
                        security_group_ids: Optional[Sequence[str]] = None,
                        ui_proxy: Optional[bool] = None,
                        zone_id: Optional[str] = None)
    func NewDataprocCluster(ctx *Context, name string, args DataprocClusterArgs, opts ...ResourceOption) (*DataprocCluster, error)
    public DataprocCluster(string name, DataprocClusterArgs args, CustomResourceOptions? opts = null)
    public DataprocCluster(String name, DataprocClusterArgs args)
    public DataprocCluster(String name, DataprocClusterArgs args, CustomResourceOptions options)
    
    type: yandex:DataprocCluster
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args DataprocClusterArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args DataprocClusterArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args DataprocClusterArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args DataprocClusterArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args DataprocClusterArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Constructor example

    The following reference example uses placeholder values for all input properties.

    var dataprocClusterResource = new Yandex.DataprocCluster("dataprocClusterResource", new()
    {
        ClusterConfig = new Yandex.Inputs.DataprocClusterClusterConfigArgs
        {
            SubclusterSpecs = new[]
            {
                new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecArgs
                {
                    HostsCount = 0,
                    Name = "string",
                    Resources = new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecResourcesArgs
                    {
                        DiskSize = 0,
                        ResourcePresetId = "string",
                        DiskTypeId = "string",
                    },
                    Role = "string",
                    SubnetId = "string",
                    AutoscalingConfig = new Yandex.Inputs.DataprocClusterClusterConfigSubclusterSpecAutoscalingConfigArgs
                    {
                        MaxHostsCount = 0,
                        CpuUtilizationTarget = 0,
                        DecommissionTimeout = 0,
                        MeasurementDuration = 0,
                        Preemptible = false,
                        StabilizationDuration = 0,
                        WarmupDuration = 0,
                    },
                    Id = "string",
                },
            },
            Hadoop = new Yandex.Inputs.DataprocClusterClusterConfigHadoopArgs
            {
                Properties = 
                {
                    { "string", "string" },
                },
                Services = new[]
                {
                    "string",
                },
                SshPublicKeys = new[]
                {
                    "string",
                },
            },
            VersionId = "string",
        },
        ServiceAccountId = "string",
        Bucket = "string",
        DeletionProtection = false,
        Description = "string",
        FolderId = "string",
        HostGroupIds = new[]
        {
            "string",
        },
        Labels = 
        {
            { "string", "string" },
        },
        Name = "string",
        SecurityGroupIds = new[]
        {
            "string",
        },
        UiProxy = false,
        ZoneId = "string",
    });
    
    example, err := yandex.NewDataprocCluster(ctx, "dataprocClusterResource", &yandex.DataprocClusterArgs{
    	ClusterConfig: &yandex.DataprocClusterClusterConfigArgs{
    		SubclusterSpecs: yandex.DataprocClusterClusterConfigSubclusterSpecArray{
    			&yandex.DataprocClusterClusterConfigSubclusterSpecArgs{
    				HostsCount: pulumi.Int(0),
    				Name:       pulumi.String("string"),
    				Resources: &yandex.DataprocClusterClusterConfigSubclusterSpecResourcesArgs{
    					DiskSize:         pulumi.Int(0),
    					ResourcePresetId: pulumi.String("string"),
    					DiskTypeId:       pulumi.String("string"),
    				},
    				Role:     pulumi.String("string"),
    				SubnetId: pulumi.String("string"),
    				AutoscalingConfig: &yandex.DataprocClusterClusterConfigSubclusterSpecAutoscalingConfigArgs{
    					MaxHostsCount:         pulumi.Int(0),
    					CpuUtilizationTarget:  pulumi.Float64(0),
    					DecommissionTimeout:   pulumi.Int(0),
    					MeasurementDuration:   pulumi.Int(0),
    					Preemptible:           pulumi.Bool(false),
    					StabilizationDuration: pulumi.Int(0),
    					WarmupDuration:        pulumi.Int(0),
    				},
    				Id: pulumi.String("string"),
    			},
    		},
    		Hadoop: &yandex.DataprocClusterClusterConfigHadoopArgs{
    			Properties: pulumi.StringMap{
    				"string": pulumi.String("string"),
    			},
    			Services: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			SshPublicKeys: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    		},
    		VersionId: pulumi.String("string"),
    	},
    	ServiceAccountId:   pulumi.String("string"),
    	Bucket:             pulumi.String("string"),
    	DeletionProtection: pulumi.Bool(false),
    	Description:        pulumi.String("string"),
    	FolderId:           pulumi.String("string"),
    	HostGroupIds: pulumi.StringArray{
    		pulumi.String("string"),
    	},
    	Labels: pulumi.StringMap{
    		"string": pulumi.String("string"),
    	},
    	Name: pulumi.String("string"),
    	SecurityGroupIds: pulumi.StringArray{
    		pulumi.String("string"),
    	},
    	UiProxy: pulumi.Bool(false),
    	ZoneId:  pulumi.String("string"),
    })
    
    var dataprocClusterResource = new DataprocCluster("dataprocClusterResource", DataprocClusterArgs.builder()
        .clusterConfig(DataprocClusterClusterConfigArgs.builder()
            .subclusterSpecs(DataprocClusterClusterConfigSubclusterSpecArgs.builder()
                .hostsCount(0)
                .name("string")
                .resources(DataprocClusterClusterConfigSubclusterSpecResourcesArgs.builder()
                    .diskSize(0)
                    .resourcePresetId("string")
                    .diskTypeId("string")
                    .build())
                .role("string")
                .subnetId("string")
                .autoscalingConfig(DataprocClusterClusterConfigSubclusterSpecAutoscalingConfigArgs.builder()
                    .maxHostsCount(0)
                    .cpuUtilizationTarget(0)
                    .decommissionTimeout(0)
                    .measurementDuration(0)
                    .preemptible(false)
                    .stabilizationDuration(0)
                    .warmupDuration(0)
                    .build())
                .id("string")
                .build())
            .hadoop(DataprocClusterClusterConfigHadoopArgs.builder()
                .properties(Map.of("string", "string"))
                .services("string")
                .sshPublicKeys("string")
                .build())
            .versionId("string")
            .build())
        .serviceAccountId("string")
        .bucket("string")
        .deletionProtection(false)
        .description("string")
        .folderId("string")
        .hostGroupIds("string")
        .labels(Map.of("string", "string"))
        .name("string")
        .securityGroupIds("string")
        .uiProxy(false)
        .zoneId("string")
        .build());
    
    dataproc_cluster_resource = yandex.DataprocCluster("dataprocClusterResource",
        cluster_config={
            "subcluster_specs": [{
                "hosts_count": 0,
                "name": "string",
                "resources": {
                    "disk_size": 0,
                    "resource_preset_id": "string",
                    "disk_type_id": "string",
                },
                "role": "string",
                "subnet_id": "string",
                "autoscaling_config": {
                    "max_hosts_count": 0,
                    "cpu_utilization_target": 0,
                    "decommission_timeout": 0,
                    "measurement_duration": 0,
                    "preemptible": False,
                    "stabilization_duration": 0,
                    "warmup_duration": 0,
                },
                "id": "string",
            }],
            "hadoop": {
                "properties": {
                    "string": "string",
                },
                "services": ["string"],
                "ssh_public_keys": ["string"],
            },
            "version_id": "string",
        },
        service_account_id="string",
        bucket="string",
        deletion_protection=False,
        description="string",
        folder_id="string",
        host_group_ids=["string"],
        labels={
            "string": "string",
        },
        name="string",
        security_group_ids=["string"],
        ui_proxy=False,
        zone_id="string")
    
    const dataprocClusterResource = new yandex.DataprocCluster("dataprocClusterResource", {
        clusterConfig: {
            subclusterSpecs: [{
                hostsCount: 0,
                name: "string",
                resources: {
                    diskSize: 0,
                    resourcePresetId: "string",
                    diskTypeId: "string",
                },
                role: "string",
                subnetId: "string",
                autoscalingConfig: {
                    maxHostsCount: 0,
                    cpuUtilizationTarget: 0,
                    decommissionTimeout: 0,
                    measurementDuration: 0,
                    preemptible: false,
                    stabilizationDuration: 0,
                    warmupDuration: 0,
                },
                id: "string",
            }],
            hadoop: {
                properties: {
                    string: "string",
                },
                services: ["string"],
                sshPublicKeys: ["string"],
            },
            versionId: "string",
        },
        serviceAccountId: "string",
        bucket: "string",
        deletionProtection: false,
        description: "string",
        folderId: "string",
        hostGroupIds: ["string"],
        labels: {
            string: "string",
        },
        name: "string",
        securityGroupIds: ["string"],
        uiProxy: false,
        zoneId: "string",
    });
    
    type: yandex:DataprocCluster
    properties:
        bucket: string
        clusterConfig:
            hadoop:
                properties:
                    string: string
                services:
                    - string
                sshPublicKeys:
                    - string
            subclusterSpecs:
                - autoscalingConfig:
                    cpuUtilizationTarget: 0
                    decommissionTimeout: 0
                    maxHostsCount: 0
                    measurementDuration: 0
                    preemptible: false
                    stabilizationDuration: 0
                    warmupDuration: 0
                  hostsCount: 0
                  id: string
                  name: string
                  resources:
                    diskSize: 0
                    diskTypeId: string
                    resourcePresetId: string
                  role: string
                  subnetId: string
            versionId: string
        deletionProtection: false
        description: string
        folderId: string
        hostGroupIds:
            - string
        labels:
            string: string
        name: string
        securityGroupIds:
            - string
        serviceAccountId: string
        uiProxy: false
        zoneId: string
    

    DataprocCluster Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

    The DataprocCluster resource accepts the following input properties:

    ClusterConfig DataprocClusterClusterConfig
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    ServiceAccountId string
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    Bucket string
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    DeletionProtection bool
    Inhibits deletion of the cluster. Can be either true or false.
    Description string
    Description of the Data Proc cluster.
    FolderId string
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    HostGroupIds List<string>
    A list of host group IDs to place VMs of the cluster on.
    Labels Dictionary<string, string>
    A set of key/value label pairs to assign to the Data Proc cluster.
    Name string
    Name of the Data Proc subcluster.
    SecurityGroupIds List<string>
    A list of security group IDs that the cluster belongs to.
    UiProxy bool
    Whether to enable UI Proxy feature.
    ZoneId string
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    ClusterConfig DataprocClusterClusterConfigArgs
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    ServiceAccountId string
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    Bucket string
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    DeletionProtection bool
    Inhibits deletion of the cluster. Can be either true or false.
    Description string
    Description of the Data Proc cluster.
    FolderId string
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    HostGroupIds []string
    A list of host group IDs to place VMs of the cluster on.
    Labels map[string]string
    A set of key/value label pairs to assign to the Data Proc cluster.
    Name string
    Name of the Data Proc subcluster.
    SecurityGroupIds []string
    A list of security group IDs that the cluster belongs to.
    UiProxy bool
    Whether to enable UI Proxy feature.
    ZoneId string
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    clusterConfig DataprocClusterClusterConfig
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    serviceAccountId String
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    bucket String
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    deletionProtection Boolean
    Inhibits deletion of the cluster. Can be either true or false.
    description String
    Description of the Data Proc cluster.
    folderId String
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    hostGroupIds List<String>
    A list of host group IDs to place VMs of the cluster on.
    labels Map<String,String>
    A set of key/value label pairs to assign to the Data Proc cluster.
    name String
    Name of the Data Proc subcluster.
    securityGroupIds List<String>
    A list of security group IDs that the cluster belongs to.
    uiProxy Boolean
    Whether to enable UI Proxy feature.
    zoneId String
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    clusterConfig DataprocClusterClusterConfig
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    serviceAccountId string
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    bucket string
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    deletionProtection boolean
    Inhibits deletion of the cluster. Can be either true or false.
    description string
    Description of the Data Proc cluster.
    folderId string
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    hostGroupIds string[]
    A list of host group IDs to place VMs of the cluster on.
    labels {[key: string]: string}
    A set of key/value label pairs to assign to the Data Proc cluster.
    name string
    Name of the Data Proc subcluster.
    securityGroupIds string[]
    A list of security group IDs that the cluster belongs to.
    uiProxy boolean
    Whether to enable UI Proxy feature.
    zoneId string
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    cluster_config DataprocClusterClusterConfigArgs
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    service_account_id str
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    bucket str
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    deletion_protection bool
    Inhibits deletion of the cluster. Can be either true or false.
    description str
    Description of the Data Proc cluster.
    folder_id str
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    host_group_ids Sequence[str]
    A list of host group IDs to place VMs of the cluster on.
    labels Mapping[str, str]
    A set of key/value label pairs to assign to the Data Proc cluster.
    name str
    Name of the Data Proc subcluster.
    security_group_ids Sequence[str]
    A list of security group IDs that the cluster belongs to.
    ui_proxy bool
    Whether to enable UI Proxy feature.
    zone_id str
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    clusterConfig Property Map
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    serviceAccountId String
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    bucket String
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    deletionProtection Boolean
    Inhibits deletion of the cluster. Can be either true or false.
    description String
    Description of the Data Proc cluster.
    folderId String
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    hostGroupIds List<String>
    A list of host group IDs to place VMs of the cluster on.
    labels Map<String>
    A set of key/value label pairs to assign to the Data Proc cluster.
    name String
    Name of the Data Proc subcluster.
    securityGroupIds List<String>
    A list of security group IDs that the cluster belongs to.
    uiProxy Boolean
    Whether to enable UI Proxy feature.
    zoneId String
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the DataprocCluster resource produces the following output properties:

    CreatedAt string
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    Id string
    The provider-assigned unique ID for this managed resource.
    CreatedAt string
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    Id string
    The provider-assigned unique ID for this managed resource.
    createdAt String
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    id String
    The provider-assigned unique ID for this managed resource.
    createdAt string
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    id string
    The provider-assigned unique ID for this managed resource.
    created_at str
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    id str
    The provider-assigned unique ID for this managed resource.
    createdAt String
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    id String
    The provider-assigned unique ID for this managed resource.

    Look up Existing DataprocCluster Resource

    Get an existing DataprocCluster resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: DataprocClusterState, opts?: CustomResourceOptions): DataprocCluster
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            bucket: Optional[str] = None,
            cluster_config: Optional[DataprocClusterClusterConfigArgs] = None,
            created_at: Optional[str] = None,
            deletion_protection: Optional[bool] = None,
            description: Optional[str] = None,
            folder_id: Optional[str] = None,
            host_group_ids: Optional[Sequence[str]] = None,
            labels: Optional[Mapping[str, str]] = None,
            name: Optional[str] = None,
            security_group_ids: Optional[Sequence[str]] = None,
            service_account_id: Optional[str] = None,
            ui_proxy: Optional[bool] = None,
            zone_id: Optional[str] = None) -> DataprocCluster
    func GetDataprocCluster(ctx *Context, name string, id IDInput, state *DataprocClusterState, opts ...ResourceOption) (*DataprocCluster, error)
    public static DataprocCluster Get(string name, Input<string> id, DataprocClusterState? state, CustomResourceOptions? opts = null)
    public static DataprocCluster get(String name, Output<String> id, DataprocClusterState state, CustomResourceOptions options)
    Resource lookup is not supported in YAML
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    Bucket string
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    ClusterConfig DataprocClusterClusterConfig
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    CreatedAt string
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    DeletionProtection bool
    Inhibits deletion of the cluster. Can be either true or false.
    Description string
    Description of the Data Proc cluster.
    FolderId string
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    HostGroupIds List<string>
    A list of host group IDs to place VMs of the cluster on.
    Labels Dictionary<string, string>
    A set of key/value label pairs to assign to the Data Proc cluster.
    Name string
    Name of the Data Proc subcluster.
    SecurityGroupIds List<string>
    A list of security group IDs that the cluster belongs to.
    ServiceAccountId string
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    UiProxy bool
    Whether to enable UI Proxy feature.
    ZoneId string
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    Bucket string
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    ClusterConfig DataprocClusterClusterConfigArgs
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    CreatedAt string
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    DeletionProtection bool
    Inhibits deletion of the cluster. Can be either true or false.
    Description string
    Description of the Data Proc cluster.
    FolderId string
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    HostGroupIds []string
    A list of host group IDs to place VMs of the cluster on.
    Labels map[string]string
    A set of key/value label pairs to assign to the Data Proc cluster.
    Name string
    Name of the Data Proc subcluster.
    SecurityGroupIds []string
    A list of security group IDs that the cluster belongs to.
    ServiceAccountId string
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    UiProxy bool
    Whether to enable UI Proxy feature.
    ZoneId string
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    bucket String
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    clusterConfig DataprocClusterClusterConfig
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    createdAt String
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    deletionProtection Boolean
    Inhibits deletion of the cluster. Can be either true or false.
    description String
    Description of the Data Proc cluster.
    folderId String
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    hostGroupIds List<String>
    A list of host group IDs to place VMs of the cluster on.
    labels Map<String,String>
    A set of key/value label pairs to assign to the Data Proc cluster.
    name String
    Name of the Data Proc subcluster.
    securityGroupIds List<String>
    A list of security group IDs that the cluster belongs to.
    serviceAccountId String
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    uiProxy Boolean
    Whether to enable UI Proxy feature.
    zoneId String
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    bucket string
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    clusterConfig DataprocClusterClusterConfig
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    createdAt string
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    deletionProtection boolean
    Inhibits deletion of the cluster. Can be either true or false.
    description string
    Description of the Data Proc cluster.
    folderId string
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    hostGroupIds string[]
    A list of host group IDs to place VMs of the cluster on.
    labels {[key: string]: string}
    A set of key/value label pairs to assign to the Data Proc cluster.
    name string
    Name of the Data Proc subcluster.
    securityGroupIds string[]
    A list of security group IDs that the cluster belongs to.
    serviceAccountId string
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    uiProxy boolean
    Whether to enable UI Proxy feature.
    zoneId string
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    bucket str
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    cluster_config DataprocClusterClusterConfigArgs
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    created_at str
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    deletion_protection bool
    Inhibits deletion of the cluster. Can be either true or false.
    description str
    Description of the Data Proc cluster.
    folder_id str
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    host_group_ids Sequence[str]
    A list of host group IDs to place VMs of the cluster on.
    labels Mapping[str, str]
    A set of key/value label pairs to assign to the Data Proc cluster.
    name str
    Name of the Data Proc subcluster.
    security_group_ids Sequence[str]
    A list of security group IDs that the cluster belongs to.
    service_account_id str
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    ui_proxy bool
    Whether to enable UI Proxy feature.
    zone_id str
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.
    bucket String
    Name of the Object Storage bucket to use for Data Proc jobs. Data Proc Agent saves output of job driver's process to specified bucket. In order for this to work service account (specified by the service_account_id argument) should be given permission to create objects within this bucket.
    clusterConfig Property Map
    Configuration and resources for hosts that should be created with the cluster. The structure is documented below.
    createdAt String
    (Computed) The Data Proc cluster creation timestamp.

    • cluster_config.0.subcluster_spec.X.id - (Computed) ID of the subcluster.
    deletionProtection Boolean
    Inhibits deletion of the cluster. Can be either true or false.
    description String
    Description of the Data Proc cluster.
    folderId String
    ID of the folder to create a cluster in. If it is not provided, the default provider folder is used.
    hostGroupIds List<String>
    A list of host group IDs to place VMs of the cluster on.
    labels Map<String>
    A set of key/value label pairs to assign to the Data Proc cluster.
    name String
    Name of the Data Proc subcluster.
    securityGroupIds List<String>
    A list of security group IDs that the cluster belongs to.
    serviceAccountId String
    Service account to be used by the Data Proc agent to access resources of Yandex.Cloud. Selected service account should have mdb.dataproc.agent role on the folder where the Data Proc cluster will be located.
    uiProxy Boolean
    Whether to enable UI Proxy feature.
    zoneId String
    ID of the availability zone to create cluster in. If it is not provided, the default provider zone is used.

    Supporting Types

    DataprocClusterClusterConfig, DataprocClusterClusterConfigArgs

    SubclusterSpecs List<DataprocClusterClusterConfigSubclusterSpec>
    Configuration of the Data Proc subcluster. The structure is documented below.
    Hadoop DataprocClusterClusterConfigHadoop
    Data Proc specific options. The structure is documented below.
    VersionId string
    Version of Data Proc image.
    SubclusterSpecs []DataprocClusterClusterConfigSubclusterSpec
    Configuration of the Data Proc subcluster. The structure is documented below.
    Hadoop DataprocClusterClusterConfigHadoop
    Data Proc specific options. The structure is documented below.
    VersionId string
    Version of Data Proc image.
    subclusterSpecs List<DataprocClusterClusterConfigSubclusterSpec>
    Configuration of the Data Proc subcluster. The structure is documented below.
    hadoop DataprocClusterClusterConfigHadoop
    Data Proc specific options. The structure is documented below.
    versionId String
    Version of Data Proc image.
    subclusterSpecs DataprocClusterClusterConfigSubclusterSpec[]
    Configuration of the Data Proc subcluster. The structure is documented below.
    hadoop DataprocClusterClusterConfigHadoop
    Data Proc specific options. The structure is documented below.
    versionId string
    Version of Data Proc image.
    subcluster_specs Sequence[DataprocClusterClusterConfigSubclusterSpec]
    Configuration of the Data Proc subcluster. The structure is documented below.
    hadoop DataprocClusterClusterConfigHadoop
    Data Proc specific options. The structure is documented below.
    version_id str
    Version of Data Proc image.
    subclusterSpecs List<Property Map>
    Configuration of the Data Proc subcluster. The structure is documented below.
    hadoop Property Map
    Data Proc specific options. The structure is documented below.
    versionId String
    Version of Data Proc image.

    DataprocClusterClusterConfigHadoop, DataprocClusterClusterConfigHadoopArgs

    Properties Dictionary<string, string>
    A set of key/value pairs that are used to configure cluster services.
    Services List<string>
    List of services to run on Data Proc cluster.
    SshPublicKeys List<string>
    List of SSH public keys to put to the hosts of the cluster. For information on how to connect to the cluster, see the official documentation.
    Properties map[string]string
    A set of key/value pairs that are used to configure cluster services.
    Services []string
    List of services to run on Data Proc cluster.
    SshPublicKeys []string
    List of SSH public keys to put to the hosts of the cluster. For information on how to connect to the cluster, see the official documentation.
    properties Map<String,String>
    A set of key/value pairs that are used to configure cluster services.
    services List<String>
    List of services to run on Data Proc cluster.
    sshPublicKeys List<String>
    List of SSH public keys to put to the hosts of the cluster. For information on how to connect to the cluster, see the official documentation.
    properties {[key: string]: string}
    A set of key/value pairs that are used to configure cluster services.
    services string[]
    List of services to run on Data Proc cluster.
    sshPublicKeys string[]
    List of SSH public keys to put to the hosts of the cluster. For information on how to connect to the cluster, see the official documentation.
    properties Mapping[str, str]
    A set of key/value pairs that are used to configure cluster services.
    services Sequence[str]
    List of services to run on Data Proc cluster.
    ssh_public_keys Sequence[str]
    List of SSH public keys to put to the hosts of the cluster. For information on how to connect to the cluster, see the official documentation.
    properties Map<String>
    A set of key/value pairs that are used to configure cluster services.
    services List<String>
    List of services to run on Data Proc cluster.
    sshPublicKeys List<String>
    List of SSH public keys to put to the hosts of the cluster. For information on how to connect to the cluster, see the official documentation.

    DataprocClusterClusterConfigSubclusterSpec, DataprocClusterClusterConfigSubclusterSpecArgs

    HostsCount int
    Number of hosts within Data Proc subcluster.
    Name string
    Name of the Data Proc subcluster.
    Resources DataprocClusterClusterConfigSubclusterSpecResources
    Resources allocated to each host of the Data Proc subcluster. The structure is documented below.
    Role string
    Role of the subcluster in the Data Proc cluster.
    SubnetId string
    The ID of the subnet, to which hosts of the subcluster belong. Subnets of all the subclusters must belong to the same VPC network.
    AutoscalingConfig DataprocClusterClusterConfigSubclusterSpecAutoscalingConfig
    Autoscaling configuration for compute subclusters.
    Id string
    (Computed) ID of a new Data Proc cluster.
    HostsCount int
    Number of hosts within Data Proc subcluster.
    Name string
    Name of the Data Proc subcluster.
    Resources DataprocClusterClusterConfigSubclusterSpecResources
    Resources allocated to each host of the Data Proc subcluster. The structure is documented below.
    Role string
    Role of the subcluster in the Data Proc cluster.
    SubnetId string
    The ID of the subnet, to which hosts of the subcluster belong. Subnets of all the subclusters must belong to the same VPC network.
    AutoscalingConfig DataprocClusterClusterConfigSubclusterSpecAutoscalingConfig
    Autoscaling configuration for compute subclusters.
    Id string
    (Computed) ID of a new Data Proc cluster.
    hostsCount Integer
    Number of hosts within Data Proc subcluster.
    name String
    Name of the Data Proc subcluster.
    resources DataprocClusterClusterConfigSubclusterSpecResources
    Resources allocated to each host of the Data Proc subcluster. The structure is documented below.
    role String
    Role of the subcluster in the Data Proc cluster.
    subnetId String
    The ID of the subnet, to which hosts of the subcluster belong. Subnets of all the subclusters must belong to the same VPC network.
    autoscalingConfig DataprocClusterClusterConfigSubclusterSpecAutoscalingConfig
    Autoscaling configuration for compute subclusters.
    id String
    (Computed) ID of a new Data Proc cluster.
    hostsCount number
    Number of hosts within Data Proc subcluster.
    name string
    Name of the Data Proc subcluster.
    resources DataprocClusterClusterConfigSubclusterSpecResources
    Resources allocated to each host of the Data Proc subcluster. The structure is documented below.
    role string
    Role of the subcluster in the Data Proc cluster.
    subnetId string
    The ID of the subnet, to which hosts of the subcluster belong. Subnets of all the subclusters must belong to the same VPC network.
    autoscalingConfig DataprocClusterClusterConfigSubclusterSpecAutoscalingConfig
    Autoscaling configuration for compute subclusters.
    id string
    (Computed) ID of a new Data Proc cluster.
    hosts_count int
    Number of hosts within Data Proc subcluster.
    name str
    Name of the Data Proc subcluster.
    resources DataprocClusterClusterConfigSubclusterSpecResources
    Resources allocated to each host of the Data Proc subcluster. The structure is documented below.
    role str
    Role of the subcluster in the Data Proc cluster.
    subnet_id str
    The ID of the subnet, to which hosts of the subcluster belong. Subnets of all the subclusters must belong to the same VPC network.
    autoscaling_config DataprocClusterClusterConfigSubclusterSpecAutoscalingConfig
    Autoscaling configuration for compute subclusters.
    id str
    (Computed) ID of a new Data Proc cluster.
    hostsCount Number
    Number of hosts within Data Proc subcluster.
    name String
    Name of the Data Proc subcluster.
    resources Property Map
    Resources allocated to each host of the Data Proc subcluster. The structure is documented below.
    role String
    Role of the subcluster in the Data Proc cluster.
    subnetId String
    The ID of the subnet, to which hosts of the subcluster belong. Subnets of all the subclusters must belong to the same VPC network.
    autoscalingConfig Property Map
    Autoscaling configuration for compute subclusters.
    id String
    (Computed) ID of a new Data Proc cluster.

    DataprocClusterClusterConfigSubclusterSpecAutoscalingConfig, DataprocClusterClusterConfigSubclusterSpecAutoscalingConfigArgs

    MaxHostsCount int
    Maximum number of nodes in autoscaling subclusters.
    CpuUtilizationTarget double
    Defines an autoscaling rule based on the average CPU utilization of the instance group. If not set default autoscaling metric will be used.
    DecommissionTimeout int
    Timeout to gracefully decommission nodes during downscaling. In seconds.
    MeasurementDuration int
    Time in seconds allotted for averaging metrics.
    Preemptible bool
    Bool flag -- whether to use preemptible compute instances. Preemptible instances are stopped at least once every 24 hours, and can be stopped at any time if their resources are needed by Compute. For more information, see Preemptible Virtual Machines.
    StabilizationDuration int
    Minimum amount of time in seconds allotted for monitoring before Instance Groups can reduce the number of instances in the group. During this time, the group size doesn't decrease, even if the new metric values indicate that it should.
    WarmupDuration int
    The warmup time of the instance in seconds. During this time, traffic is sent to the instance, but instance metrics are not collected.
    MaxHostsCount int
    Maximum number of nodes in autoscaling subclusters.
    CpuUtilizationTarget float64
    Defines an autoscaling rule based on the average CPU utilization of the instance group. If not set default autoscaling metric will be used.
    DecommissionTimeout int
    Timeout to gracefully decommission nodes during downscaling. In seconds.
    MeasurementDuration int
    Time in seconds allotted for averaging metrics.
    Preemptible bool
    Bool flag -- whether to use preemptible compute instances. Preemptible instances are stopped at least once every 24 hours, and can be stopped at any time if their resources are needed by Compute. For more information, see Preemptible Virtual Machines.
    StabilizationDuration int
    Minimum amount of time in seconds allotted for monitoring before Instance Groups can reduce the number of instances in the group. During this time, the group size doesn't decrease, even if the new metric values indicate that it should.
    WarmupDuration int
    The warmup time of the instance in seconds. During this time, traffic is sent to the instance, but instance metrics are not collected.
    maxHostsCount Integer
    Maximum number of nodes in autoscaling subclusters.
    cpuUtilizationTarget Double
    Defines an autoscaling rule based on the average CPU utilization of the instance group. If not set default autoscaling metric will be used.
    decommissionTimeout Integer
    Timeout to gracefully decommission nodes during downscaling. In seconds.
    measurementDuration Integer
    Time in seconds allotted for averaging metrics.
    preemptible Boolean
    Bool flag -- whether to use preemptible compute instances. Preemptible instances are stopped at least once every 24 hours, and can be stopped at any time if their resources are needed by Compute. For more information, see Preemptible Virtual Machines.
    stabilizationDuration Integer
    Minimum amount of time in seconds allotted for monitoring before Instance Groups can reduce the number of instances in the group. During this time, the group size doesn't decrease, even if the new metric values indicate that it should.
    warmupDuration Integer
    The warmup time of the instance in seconds. During this time, traffic is sent to the instance, but instance metrics are not collected.
    maxHostsCount number
    Maximum number of nodes in autoscaling subclusters.
    cpuUtilizationTarget number
    Defines an autoscaling rule based on the average CPU utilization of the instance group. If not set default autoscaling metric will be used.
    decommissionTimeout number
    Timeout to gracefully decommission nodes during downscaling. In seconds.
    measurementDuration number
    Time in seconds allotted for averaging metrics.
    preemptible boolean
    Bool flag -- whether to use preemptible compute instances. Preemptible instances are stopped at least once every 24 hours, and can be stopped at any time if their resources are needed by Compute. For more information, see Preemptible Virtual Machines.
    stabilizationDuration number
    Minimum amount of time in seconds allotted for monitoring before Instance Groups can reduce the number of instances in the group. During this time, the group size doesn't decrease, even if the new metric values indicate that it should.
    warmupDuration number
    The warmup time of the instance in seconds. During this time, traffic is sent to the instance, but instance metrics are not collected.
    max_hosts_count int
    Maximum number of nodes in autoscaling subclusters.
    cpu_utilization_target float
    Defines an autoscaling rule based on the average CPU utilization of the instance group. If not set default autoscaling metric will be used.
    decommission_timeout int
    Timeout to gracefully decommission nodes during downscaling. In seconds.
    measurement_duration int
    Time in seconds allotted for averaging metrics.
    preemptible bool
    Bool flag -- whether to use preemptible compute instances. Preemptible instances are stopped at least once every 24 hours, and can be stopped at any time if their resources are needed by Compute. For more information, see Preemptible Virtual Machines.
    stabilization_duration int
    Minimum amount of time in seconds allotted for monitoring before Instance Groups can reduce the number of instances in the group. During this time, the group size doesn't decrease, even if the new metric values indicate that it should.
    warmup_duration int
    The warmup time of the instance in seconds. During this time, traffic is sent to the instance, but instance metrics are not collected.
    maxHostsCount Number
    Maximum number of nodes in autoscaling subclusters.
    cpuUtilizationTarget Number
    Defines an autoscaling rule based on the average CPU utilization of the instance group. If not set default autoscaling metric will be used.
    decommissionTimeout Number
    Timeout to gracefully decommission nodes during downscaling. In seconds.
    measurementDuration Number
    Time in seconds allotted for averaging metrics.
    preemptible Boolean
    Bool flag -- whether to use preemptible compute instances. Preemptible instances are stopped at least once every 24 hours, and can be stopped at any time if their resources are needed by Compute. For more information, see Preemptible Virtual Machines.
    stabilizationDuration Number
    Minimum amount of time in seconds allotted for monitoring before Instance Groups can reduce the number of instances in the group. During this time, the group size doesn't decrease, even if the new metric values indicate that it should.
    warmupDuration Number
    The warmup time of the instance in seconds. During this time, traffic is sent to the instance, but instance metrics are not collected.

    DataprocClusterClusterConfigSubclusterSpecResources, DataprocClusterClusterConfigSubclusterSpecResourcesArgs

    DiskSize int
    Volume of the storage available to a host, in gigabytes.
    ResourcePresetId string
    The ID of the preset for computational resources available to a host. All available presets are listed in the documentation.
    DiskTypeId string
    Type of the storage of a host. One of network-hdd (default) or network-ssd.
    DiskSize int
    Volume of the storage available to a host, in gigabytes.
    ResourcePresetId string
    The ID of the preset for computational resources available to a host. All available presets are listed in the documentation.
    DiskTypeId string
    Type of the storage of a host. One of network-hdd (default) or network-ssd.
    diskSize Integer
    Volume of the storage available to a host, in gigabytes.
    resourcePresetId String
    The ID of the preset for computational resources available to a host. All available presets are listed in the documentation.
    diskTypeId String
    Type of the storage of a host. One of network-hdd (default) or network-ssd.
    diskSize number
    Volume of the storage available to a host, in gigabytes.
    resourcePresetId string
    The ID of the preset for computational resources available to a host. All available presets are listed in the documentation.
    diskTypeId string
    Type of the storage of a host. One of network-hdd (default) or network-ssd.
    disk_size int
    Volume of the storage available to a host, in gigabytes.
    resource_preset_id str
    The ID of the preset for computational resources available to a host. All available presets are listed in the documentation.
    disk_type_id str
    Type of the storage of a host. One of network-hdd (default) or network-ssd.
    diskSize Number
    Volume of the storage available to a host, in gigabytes.
    resourcePresetId String
    The ID of the preset for computational resources available to a host. All available presets are listed in the documentation.
    diskTypeId String
    Type of the storage of a host. One of network-hdd (default) or network-ssd.

    Import

    A cluster can be imported using the id of the resource, e.g.

     $ pulumi import yandex:index/dataprocCluster:DataprocCluster foo cluster_id
    

    To learn more about importing existing cloud resources, see Importing resources.

    Package Details

    Repository
    Yandex pulumi/pulumi-yandex
    License
    Apache-2.0
    Notes
    This Pulumi package is based on the yandex Terraform Provider.
    yandex logo
    Yandex v0.13.0 published on Tuesday, Feb 22, 2022 by Pulumi