databricks.Cluster
Explore with Pulumi AI
Import
The resource cluster can be imported using cluster id.
bash
$ pulumi import databricks:index/cluster:Cluster this <cluster-id>
Create Cluster Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new Cluster(name: string, args: ClusterArgs, opts?: CustomResourceOptions);
@overload
def Cluster(resource_name: str,
args: ClusterArgs,
opts: Optional[ResourceOptions] = None)
@overload
def Cluster(resource_name: str,
opts: Optional[ResourceOptions] = None,
spark_version: Optional[str] = None,
idempotency_token: Optional[str] = None,
workload_type: Optional[ClusterWorkloadTypeArgs] = None,
gcp_attributes: Optional[ClusterGcpAttributesArgs] = None,
azure_attributes: Optional[ClusterAzureAttributesArgs] = None,
cluster_log_conf: Optional[ClusterClusterLogConfArgs] = None,
cluster_mount_infos: Optional[Sequence[ClusterClusterMountInfoArgs]] = None,
cluster_name: Optional[str] = None,
custom_tags: Optional[Mapping[str, str]] = None,
data_security_mode: Optional[str] = None,
docker_image: Optional[ClusterDockerImageArgs] = None,
driver_instance_pool_id: Optional[str] = None,
driver_node_type_id: Optional[str] = None,
enable_elastic_disk: Optional[bool] = None,
enable_local_disk_encryption: Optional[bool] = None,
aws_attributes: Optional[ClusterAwsAttributesArgs] = None,
init_scripts: Optional[Sequence[ClusterInitScriptArgs]] = None,
autotermination_minutes: Optional[int] = None,
instance_pool_id: Optional[str] = None,
is_pinned: Optional[bool] = None,
libraries: Optional[Sequence[ClusterLibraryArgs]] = None,
no_wait: Optional[bool] = None,
node_type_id: Optional[str] = None,
num_workers: Optional[int] = None,
policy_id: Optional[str] = None,
runtime_engine: Optional[str] = None,
single_user_name: Optional[str] = None,
spark_conf: Optional[Mapping[str, str]] = None,
spark_env_vars: Optional[Mapping[str, str]] = None,
autoscale: Optional[ClusterAutoscaleArgs] = None,
ssh_public_keys: Optional[Sequence[str]] = None,
apply_policy_default_values: Optional[bool] = None)
func NewCluster(ctx *Context, name string, args ClusterArgs, opts ...ResourceOption) (*Cluster, error)
public Cluster(string name, ClusterArgs args, CustomResourceOptions? opts = null)
public Cluster(String name, ClusterArgs args)
public Cluster(String name, ClusterArgs args, CustomResourceOptions options)
type: databricks:Cluster
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args ClusterArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var clusterResource = new Databricks.Cluster("clusterResource", new()
{
SparkVersion = "string",
IdempotencyToken = "string",
WorkloadType = new Databricks.Inputs.ClusterWorkloadTypeArgs
{
Clients = new Databricks.Inputs.ClusterWorkloadTypeClientsArgs
{
Jobs = false,
Notebooks = false,
},
},
GcpAttributes = new Databricks.Inputs.ClusterGcpAttributesArgs
{
Availability = "string",
BootDiskSize = 0,
GoogleServiceAccount = "string",
LocalSsdCount = 0,
UsePreemptibleExecutors = false,
ZoneId = "string",
},
AzureAttributes = new Databricks.Inputs.ClusterAzureAttributesArgs
{
Availability = "string",
FirstOnDemand = 0,
LogAnalyticsInfo = new Databricks.Inputs.ClusterAzureAttributesLogAnalyticsInfoArgs
{
LogAnalyticsPrimaryKey = "string",
LogAnalyticsWorkspaceId = "string",
},
SpotBidMaxPrice = 0,
},
ClusterLogConf = new Databricks.Inputs.ClusterClusterLogConfArgs
{
Dbfs = new Databricks.Inputs.ClusterClusterLogConfDbfsArgs
{
Destination = "string",
},
S3 = new Databricks.Inputs.ClusterClusterLogConfS3Args
{
Destination = "string",
CannedAcl = "string",
EnableEncryption = false,
EncryptionType = "string",
Endpoint = "string",
KmsKey = "string",
Region = "string",
},
},
ClusterMountInfos = new[]
{
new Databricks.Inputs.ClusterClusterMountInfoArgs
{
LocalMountDirPath = "string",
NetworkFilesystemInfo = new Databricks.Inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs
{
ServerAddress = "string",
MountOptions = "string",
},
RemoteMountDirPath = "string",
},
},
ClusterName = "string",
CustomTags =
{
{ "string", "string" },
},
DataSecurityMode = "string",
DockerImage = new Databricks.Inputs.ClusterDockerImageArgs
{
Url = "string",
BasicAuth = new Databricks.Inputs.ClusterDockerImageBasicAuthArgs
{
Password = "string",
Username = "string",
},
},
DriverInstancePoolId = "string",
DriverNodeTypeId = "string",
EnableElasticDisk = false,
EnableLocalDiskEncryption = false,
AwsAttributes = new Databricks.Inputs.ClusterAwsAttributesArgs
{
Availability = "string",
EbsVolumeCount = 0,
EbsVolumeIops = 0,
EbsVolumeSize = 0,
EbsVolumeThroughput = 0,
EbsVolumeType = "string",
FirstOnDemand = 0,
InstanceProfileArn = "string",
SpotBidPricePercent = 0,
ZoneId = "string",
},
InitScripts = new[]
{
new Databricks.Inputs.ClusterInitScriptArgs
{
Abfss = new Databricks.Inputs.ClusterInitScriptAbfssArgs
{
Destination = "string",
},
File = new Databricks.Inputs.ClusterInitScriptFileArgs
{
Destination = "string",
},
Gcs = new Databricks.Inputs.ClusterInitScriptGcsArgs
{
Destination = "string",
},
S3 = new Databricks.Inputs.ClusterInitScriptS3Args
{
Destination = "string",
CannedAcl = "string",
EnableEncryption = false,
EncryptionType = "string",
Endpoint = "string",
KmsKey = "string",
Region = "string",
},
Volumes = new Databricks.Inputs.ClusterInitScriptVolumesArgs
{
Destination = "string",
},
Workspace = new Databricks.Inputs.ClusterInitScriptWorkspaceArgs
{
Destination = "string",
},
},
},
AutoterminationMinutes = 0,
InstancePoolId = "string",
IsPinned = false,
Libraries = new[]
{
new Databricks.Inputs.ClusterLibraryArgs
{
Cran = new Databricks.Inputs.ClusterLibraryCranArgs
{
Package = "string",
Repo = "string",
},
Egg = "string",
Jar = "string",
Maven = new Databricks.Inputs.ClusterLibraryMavenArgs
{
Coordinates = "string",
Exclusions = new[]
{
"string",
},
Repo = "string",
},
Pypi = new Databricks.Inputs.ClusterLibraryPypiArgs
{
Package = "string",
Repo = "string",
},
Requirements = "string",
Whl = "string",
},
},
NoWait = false,
NodeTypeId = "string",
NumWorkers = 0,
PolicyId = "string",
RuntimeEngine = "string",
SingleUserName = "string",
SparkConf =
{
{ "string", "string" },
},
SparkEnvVars =
{
{ "string", "string" },
},
Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs
{
MaxWorkers = 0,
MinWorkers = 0,
},
SshPublicKeys = new[]
{
"string",
},
ApplyPolicyDefaultValues = false,
});
example, err := databricks.NewCluster(ctx, "clusterResource", &databricks.ClusterArgs{
SparkVersion: pulumi.String("string"),
IdempotencyToken: pulumi.String("string"),
WorkloadType: &databricks.ClusterWorkloadTypeArgs{
Clients: &databricks.ClusterWorkloadTypeClientsArgs{
Jobs: pulumi.Bool(false),
Notebooks: pulumi.Bool(false),
},
},
GcpAttributes: &databricks.ClusterGcpAttributesArgs{
Availability: pulumi.String("string"),
BootDiskSize: pulumi.Int(0),
GoogleServiceAccount: pulumi.String("string"),
LocalSsdCount: pulumi.Int(0),
UsePreemptibleExecutors: pulumi.Bool(false),
ZoneId: pulumi.String("string"),
},
AzureAttributes: &databricks.ClusterAzureAttributesArgs{
Availability: pulumi.String("string"),
FirstOnDemand: pulumi.Int(0),
LogAnalyticsInfo: &databricks.ClusterAzureAttributesLogAnalyticsInfoArgs{
LogAnalyticsPrimaryKey: pulumi.String("string"),
LogAnalyticsWorkspaceId: pulumi.String("string"),
},
SpotBidMaxPrice: pulumi.Float64(0),
},
ClusterLogConf: &databricks.ClusterClusterLogConfArgs{
Dbfs: &databricks.ClusterClusterLogConfDbfsArgs{
Destination: pulumi.String("string"),
},
S3: &databricks.ClusterClusterLogConfS3Args{
Destination: pulumi.String("string"),
CannedAcl: pulumi.String("string"),
EnableEncryption: pulumi.Bool(false),
EncryptionType: pulumi.String("string"),
Endpoint: pulumi.String("string"),
KmsKey: pulumi.String("string"),
Region: pulumi.String("string"),
},
},
ClusterMountInfos: databricks.ClusterClusterMountInfoArray{
&databricks.ClusterClusterMountInfoArgs{
LocalMountDirPath: pulumi.String("string"),
NetworkFilesystemInfo: &databricks.ClusterClusterMountInfoNetworkFilesystemInfoArgs{
ServerAddress: pulumi.String("string"),
MountOptions: pulumi.String("string"),
},
RemoteMountDirPath: pulumi.String("string"),
},
},
ClusterName: pulumi.String("string"),
CustomTags: pulumi.StringMap{
"string": pulumi.String("string"),
},
DataSecurityMode: pulumi.String("string"),
DockerImage: &databricks.ClusterDockerImageArgs{
Url: pulumi.String("string"),
BasicAuth: &databricks.ClusterDockerImageBasicAuthArgs{
Password: pulumi.String("string"),
Username: pulumi.String("string"),
},
},
DriverInstancePoolId: pulumi.String("string"),
DriverNodeTypeId: pulumi.String("string"),
EnableElasticDisk: pulumi.Bool(false),
EnableLocalDiskEncryption: pulumi.Bool(false),
AwsAttributes: &databricks.ClusterAwsAttributesArgs{
Availability: pulumi.String("string"),
EbsVolumeCount: pulumi.Int(0),
EbsVolumeIops: pulumi.Int(0),
EbsVolumeSize: pulumi.Int(0),
EbsVolumeThroughput: pulumi.Int(0),
EbsVolumeType: pulumi.String("string"),
FirstOnDemand: pulumi.Int(0),
InstanceProfileArn: pulumi.String("string"),
SpotBidPricePercent: pulumi.Int(0),
ZoneId: pulumi.String("string"),
},
InitScripts: databricks.ClusterInitScriptArray{
&databricks.ClusterInitScriptArgs{
Abfss: &databricks.ClusterInitScriptAbfssArgs{
Destination: pulumi.String("string"),
},
File: &databricks.ClusterInitScriptFileArgs{
Destination: pulumi.String("string"),
},
Gcs: &databricks.ClusterInitScriptGcsArgs{
Destination: pulumi.String("string"),
},
S3: &databricks.ClusterInitScriptS3Args{
Destination: pulumi.String("string"),
CannedAcl: pulumi.String("string"),
EnableEncryption: pulumi.Bool(false),
EncryptionType: pulumi.String("string"),
Endpoint: pulumi.String("string"),
KmsKey: pulumi.String("string"),
Region: pulumi.String("string"),
},
Volumes: &databricks.ClusterInitScriptVolumesArgs{
Destination: pulumi.String("string"),
},
Workspace: &databricks.ClusterInitScriptWorkspaceArgs{
Destination: pulumi.String("string"),
},
},
},
AutoterminationMinutes: pulumi.Int(0),
InstancePoolId: pulumi.String("string"),
IsPinned: pulumi.Bool(false),
Libraries: databricks.ClusterLibraryArray{
&databricks.ClusterLibraryArgs{
Cran: &databricks.ClusterLibraryCranArgs{
Package: pulumi.String("string"),
Repo: pulumi.String("string"),
},
Egg: pulumi.String("string"),
Jar: pulumi.String("string"),
Maven: &databricks.ClusterLibraryMavenArgs{
Coordinates: pulumi.String("string"),
Exclusions: pulumi.StringArray{
pulumi.String("string"),
},
Repo: pulumi.String("string"),
},
Pypi: &databricks.ClusterLibraryPypiArgs{
Package: pulumi.String("string"),
Repo: pulumi.String("string"),
},
Requirements: pulumi.String("string"),
Whl: pulumi.String("string"),
},
},
NoWait: pulumi.Bool(false),
NodeTypeId: pulumi.String("string"),
NumWorkers: pulumi.Int(0),
PolicyId: pulumi.String("string"),
RuntimeEngine: pulumi.String("string"),
SingleUserName: pulumi.String("string"),
SparkConf: pulumi.StringMap{
"string": pulumi.String("string"),
},
SparkEnvVars: pulumi.StringMap{
"string": pulumi.String("string"),
},
Autoscale: &databricks.ClusterAutoscaleArgs{
MaxWorkers: pulumi.Int(0),
MinWorkers: pulumi.Int(0),
},
SshPublicKeys: pulumi.StringArray{
pulumi.String("string"),
},
ApplyPolicyDefaultValues: pulumi.Bool(false),
})
var clusterResource = new Cluster("clusterResource", ClusterArgs.builder()
.sparkVersion("string")
.idempotencyToken("string")
.workloadType(ClusterWorkloadTypeArgs.builder()
.clients(ClusterWorkloadTypeClientsArgs.builder()
.jobs(false)
.notebooks(false)
.build())
.build())
.gcpAttributes(ClusterGcpAttributesArgs.builder()
.availability("string")
.bootDiskSize(0)
.googleServiceAccount("string")
.localSsdCount(0)
.usePreemptibleExecutors(false)
.zoneId("string")
.build())
.azureAttributes(ClusterAzureAttributesArgs.builder()
.availability("string")
.firstOnDemand(0)
.logAnalyticsInfo(ClusterAzureAttributesLogAnalyticsInfoArgs.builder()
.logAnalyticsPrimaryKey("string")
.logAnalyticsWorkspaceId("string")
.build())
.spotBidMaxPrice(0)
.build())
.clusterLogConf(ClusterClusterLogConfArgs.builder()
.dbfs(ClusterClusterLogConfDbfsArgs.builder()
.destination("string")
.build())
.s3(ClusterClusterLogConfS3Args.builder()
.destination("string")
.cannedAcl("string")
.enableEncryption(false)
.encryptionType("string")
.endpoint("string")
.kmsKey("string")
.region("string")
.build())
.build())
.clusterMountInfos(ClusterClusterMountInfoArgs.builder()
.localMountDirPath("string")
.networkFilesystemInfo(ClusterClusterMountInfoNetworkFilesystemInfoArgs.builder()
.serverAddress("string")
.mountOptions("string")
.build())
.remoteMountDirPath("string")
.build())
.clusterName("string")
.customTags(Map.of("string", "string"))
.dataSecurityMode("string")
.dockerImage(ClusterDockerImageArgs.builder()
.url("string")
.basicAuth(ClusterDockerImageBasicAuthArgs.builder()
.password("string")
.username("string")
.build())
.build())
.driverInstancePoolId("string")
.driverNodeTypeId("string")
.enableElasticDisk(false)
.enableLocalDiskEncryption(false)
.awsAttributes(ClusterAwsAttributesArgs.builder()
.availability("string")
.ebsVolumeCount(0)
.ebsVolumeIops(0)
.ebsVolumeSize(0)
.ebsVolumeThroughput(0)
.ebsVolumeType("string")
.firstOnDemand(0)
.instanceProfileArn("string")
.spotBidPricePercent(0)
.zoneId("string")
.build())
.initScripts(ClusterInitScriptArgs.builder()
.abfss(ClusterInitScriptAbfssArgs.builder()
.destination("string")
.build())
.file(ClusterInitScriptFileArgs.builder()
.destination("string")
.build())
.gcs(ClusterInitScriptGcsArgs.builder()
.destination("string")
.build())
.s3(ClusterInitScriptS3Args.builder()
.destination("string")
.cannedAcl("string")
.enableEncryption(false)
.encryptionType("string")
.endpoint("string")
.kmsKey("string")
.region("string")
.build())
.volumes(ClusterInitScriptVolumesArgs.builder()
.destination("string")
.build())
.workspace(ClusterInitScriptWorkspaceArgs.builder()
.destination("string")
.build())
.build())
.autoterminationMinutes(0)
.instancePoolId("string")
.isPinned(false)
.libraries(ClusterLibraryArgs.builder()
.cran(ClusterLibraryCranArgs.builder()
.package_("string")
.repo("string")
.build())
.egg("string")
.jar("string")
.maven(ClusterLibraryMavenArgs.builder()
.coordinates("string")
.exclusions("string")
.repo("string")
.build())
.pypi(ClusterLibraryPypiArgs.builder()
.package_("string")
.repo("string")
.build())
.requirements("string")
.whl("string")
.build())
.noWait(false)
.nodeTypeId("string")
.numWorkers(0)
.policyId("string")
.runtimeEngine("string")
.singleUserName("string")
.sparkConf(Map.of("string", "string"))
.sparkEnvVars(Map.of("string", "string"))
.autoscale(ClusterAutoscaleArgs.builder()
.maxWorkers(0)
.minWorkers(0)
.build())
.sshPublicKeys("string")
.applyPolicyDefaultValues(false)
.build());
cluster_resource = databricks.Cluster("clusterResource",
spark_version="string",
idempotency_token="string",
workload_type={
"clients": {
"jobs": False,
"notebooks": False,
},
},
gcp_attributes={
"availability": "string",
"boot_disk_size": 0,
"google_service_account": "string",
"local_ssd_count": 0,
"use_preemptible_executors": False,
"zone_id": "string",
},
azure_attributes={
"availability": "string",
"first_on_demand": 0,
"log_analytics_info": {
"log_analytics_primary_key": "string",
"log_analytics_workspace_id": "string",
},
"spot_bid_max_price": 0,
},
cluster_log_conf={
"dbfs": {
"destination": "string",
},
"s3": {
"destination": "string",
"canned_acl": "string",
"enable_encryption": False,
"encryption_type": "string",
"endpoint": "string",
"kms_key": "string",
"region": "string",
},
},
cluster_mount_infos=[{
"local_mount_dir_path": "string",
"network_filesystem_info": {
"server_address": "string",
"mount_options": "string",
},
"remote_mount_dir_path": "string",
}],
cluster_name="string",
custom_tags={
"string": "string",
},
data_security_mode="string",
docker_image={
"url": "string",
"basic_auth": {
"password": "string",
"username": "string",
},
},
driver_instance_pool_id="string",
driver_node_type_id="string",
enable_elastic_disk=False,
enable_local_disk_encryption=False,
aws_attributes={
"availability": "string",
"ebs_volume_count": 0,
"ebs_volume_iops": 0,
"ebs_volume_size": 0,
"ebs_volume_throughput": 0,
"ebs_volume_type": "string",
"first_on_demand": 0,
"instance_profile_arn": "string",
"spot_bid_price_percent": 0,
"zone_id": "string",
},
init_scripts=[{
"abfss": {
"destination": "string",
},
"file": {
"destination": "string",
},
"gcs": {
"destination": "string",
},
"s3": {
"destination": "string",
"canned_acl": "string",
"enable_encryption": False,
"encryption_type": "string",
"endpoint": "string",
"kms_key": "string",
"region": "string",
},
"volumes": {
"destination": "string",
},
"workspace": {
"destination": "string",
},
}],
autotermination_minutes=0,
instance_pool_id="string",
is_pinned=False,
libraries=[{
"cran": {
"package": "string",
"repo": "string",
},
"egg": "string",
"jar": "string",
"maven": {
"coordinates": "string",
"exclusions": ["string"],
"repo": "string",
},
"pypi": {
"package": "string",
"repo": "string",
},
"requirements": "string",
"whl": "string",
}],
no_wait=False,
node_type_id="string",
num_workers=0,
policy_id="string",
runtime_engine="string",
single_user_name="string",
spark_conf={
"string": "string",
},
spark_env_vars={
"string": "string",
},
autoscale={
"max_workers": 0,
"min_workers": 0,
},
ssh_public_keys=["string"],
apply_policy_default_values=False)
const clusterResource = new databricks.Cluster("clusterResource", {
sparkVersion: "string",
idempotencyToken: "string",
workloadType: {
clients: {
jobs: false,
notebooks: false,
},
},
gcpAttributes: {
availability: "string",
bootDiskSize: 0,
googleServiceAccount: "string",
localSsdCount: 0,
usePreemptibleExecutors: false,
zoneId: "string",
},
azureAttributes: {
availability: "string",
firstOnDemand: 0,
logAnalyticsInfo: {
logAnalyticsPrimaryKey: "string",
logAnalyticsWorkspaceId: "string",
},
spotBidMaxPrice: 0,
},
clusterLogConf: {
dbfs: {
destination: "string",
},
s3: {
destination: "string",
cannedAcl: "string",
enableEncryption: false,
encryptionType: "string",
endpoint: "string",
kmsKey: "string",
region: "string",
},
},
clusterMountInfos: [{
localMountDirPath: "string",
networkFilesystemInfo: {
serverAddress: "string",
mountOptions: "string",
},
remoteMountDirPath: "string",
}],
clusterName: "string",
customTags: {
string: "string",
},
dataSecurityMode: "string",
dockerImage: {
url: "string",
basicAuth: {
password: "string",
username: "string",
},
},
driverInstancePoolId: "string",
driverNodeTypeId: "string",
enableElasticDisk: false,
enableLocalDiskEncryption: false,
awsAttributes: {
availability: "string",
ebsVolumeCount: 0,
ebsVolumeIops: 0,
ebsVolumeSize: 0,
ebsVolumeThroughput: 0,
ebsVolumeType: "string",
firstOnDemand: 0,
instanceProfileArn: "string",
spotBidPricePercent: 0,
zoneId: "string",
},
initScripts: [{
abfss: {
destination: "string",
},
file: {
destination: "string",
},
gcs: {
destination: "string",
},
s3: {
destination: "string",
cannedAcl: "string",
enableEncryption: false,
encryptionType: "string",
endpoint: "string",
kmsKey: "string",
region: "string",
},
volumes: {
destination: "string",
},
workspace: {
destination: "string",
},
}],
autoterminationMinutes: 0,
instancePoolId: "string",
isPinned: false,
libraries: [{
cran: {
"package": "string",
repo: "string",
},
egg: "string",
jar: "string",
maven: {
coordinates: "string",
exclusions: ["string"],
repo: "string",
},
pypi: {
"package": "string",
repo: "string",
},
requirements: "string",
whl: "string",
}],
noWait: false,
nodeTypeId: "string",
numWorkers: 0,
policyId: "string",
runtimeEngine: "string",
singleUserName: "string",
sparkConf: {
string: "string",
},
sparkEnvVars: {
string: "string",
},
autoscale: {
maxWorkers: 0,
minWorkers: 0,
},
sshPublicKeys: ["string"],
applyPolicyDefaultValues: false,
});
type: databricks:Cluster
properties:
applyPolicyDefaultValues: false
autoscale:
maxWorkers: 0
minWorkers: 0
autoterminationMinutes: 0
awsAttributes:
availability: string
ebsVolumeCount: 0
ebsVolumeIops: 0
ebsVolumeSize: 0
ebsVolumeThroughput: 0
ebsVolumeType: string
firstOnDemand: 0
instanceProfileArn: string
spotBidPricePercent: 0
zoneId: string
azureAttributes:
availability: string
firstOnDemand: 0
logAnalyticsInfo:
logAnalyticsPrimaryKey: string
logAnalyticsWorkspaceId: string
spotBidMaxPrice: 0
clusterLogConf:
dbfs:
destination: string
s3:
cannedAcl: string
destination: string
enableEncryption: false
encryptionType: string
endpoint: string
kmsKey: string
region: string
clusterMountInfos:
- localMountDirPath: string
networkFilesystemInfo:
mountOptions: string
serverAddress: string
remoteMountDirPath: string
clusterName: string
customTags:
string: string
dataSecurityMode: string
dockerImage:
basicAuth:
password: string
username: string
url: string
driverInstancePoolId: string
driverNodeTypeId: string
enableElasticDisk: false
enableLocalDiskEncryption: false
gcpAttributes:
availability: string
bootDiskSize: 0
googleServiceAccount: string
localSsdCount: 0
usePreemptibleExecutors: false
zoneId: string
idempotencyToken: string
initScripts:
- abfss:
destination: string
file:
destination: string
gcs:
destination: string
s3:
cannedAcl: string
destination: string
enableEncryption: false
encryptionType: string
endpoint: string
kmsKey: string
region: string
volumes:
destination: string
workspace:
destination: string
instancePoolId: string
isPinned: false
libraries:
- cran:
package: string
repo: string
egg: string
jar: string
maven:
coordinates: string
exclusions:
- string
repo: string
pypi:
package: string
repo: string
requirements: string
whl: string
noWait: false
nodeTypeId: string
numWorkers: 0
policyId: string
runtimeEngine: string
singleUserName: string
sparkConf:
string: string
sparkEnvVars:
string: string
sparkVersion: string
sshPublicKeys:
- string
workloadType:
clients:
jobs: false
notebooks: false
Cluster Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The Cluster resource accepts the following input properties:
- Spark
Version string - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- Apply
Policy boolDefault Values - Whether to use policy default values for missing cluster attributes.
- Autoscale
Cluster
Autoscale - Autotermination
Minutes int - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - Aws
Attributes ClusterAws Attributes - Azure
Attributes ClusterAzure Attributes - Cluster
Log ClusterConf Cluster Log Conf - Cluster
Mount List<ClusterInfos Cluster Mount Info> - Cluster
Name string - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- Dictionary<string, string>
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- Data
Security stringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - Docker
Image ClusterDocker Image - Driver
Instance stringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - Driver
Node stringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - Enable
Elastic boolDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - Enable
Local boolDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- Gcp
Attributes ClusterGcp Attributes - Idempotency
Token string - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- Init
Scripts List<ClusterInit Script> - Instance
Pool stringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - Is
Pinned bool - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - Libraries
List<Cluster
Library> - No
Wait bool If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- Node
Type stringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - Num
Workers int - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - Policy
Id string - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - Runtime
Engine string - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - Single
User stringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - Spark
Conf Dictionary<string, string> - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- Spark
Env Dictionary<string, string>Vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- Ssh
Public List<string>Keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- Workload
Type ClusterWorkload Type
- Spark
Version string - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- Apply
Policy boolDefault Values - Whether to use policy default values for missing cluster attributes.
- Autoscale
Cluster
Autoscale Args - Autotermination
Minutes int - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - Aws
Attributes ClusterAws Attributes Args - Azure
Attributes ClusterAzure Attributes Args - Cluster
Log ClusterConf Cluster Log Conf Args - Cluster
Mount []ClusterInfos Cluster Mount Info Args - Cluster
Name string - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- map[string]string
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- Data
Security stringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - Docker
Image ClusterDocker Image Args - Driver
Instance stringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - Driver
Node stringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - Enable
Elastic boolDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - Enable
Local boolDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- Gcp
Attributes ClusterGcp Attributes Args - Idempotency
Token string - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- Init
Scripts []ClusterInit Script Args - Instance
Pool stringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - Is
Pinned bool - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - Libraries
[]Cluster
Library Args - No
Wait bool If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- Node
Type stringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - Num
Workers int - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - Policy
Id string - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - Runtime
Engine string - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - Single
User stringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - Spark
Conf map[string]string - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- Spark
Env map[string]stringVars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- Ssh
Public []stringKeys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- Workload
Type ClusterWorkload Type Args
- spark
Version String - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- apply
Policy BooleanDefault Values - Whether to use policy default values for missing cluster attributes.
- autoscale
Cluster
Autoscale - autotermination
Minutes Integer - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - aws
Attributes ClusterAws Attributes - azure
Attributes ClusterAzure Attributes - cluster
Log ClusterConf Cluster Log Conf - cluster
Mount List<ClusterInfos Cluster Mount Info> - cluster
Name String - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- Map<String,String>
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- data
Security StringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - docker
Image ClusterDocker Image - driver
Instance StringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - driver
Node StringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - enable
Elastic BooleanDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - enable
Local BooleanDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- gcp
Attributes ClusterGcp Attributes - idempotency
Token String - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- init
Scripts List<ClusterInit Script> - instance
Pool StringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - is
Pinned Boolean - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - libraries
List<Cluster
Library> - no
Wait Boolean If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- node
Type StringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - num
Workers Integer - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - policy
Id String - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - runtime
Engine String - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - single
User StringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - spark
Conf Map<String,String> - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- spark
Env Map<String,String>Vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- ssh
Public List<String>Keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- workload
Type ClusterWorkload Type
- spark
Version string - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- apply
Policy booleanDefault Values - Whether to use policy default values for missing cluster attributes.
- autoscale
Cluster
Autoscale - autotermination
Minutes number - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - aws
Attributes ClusterAws Attributes - azure
Attributes ClusterAzure Attributes - cluster
Log ClusterConf Cluster Log Conf - cluster
Mount ClusterInfos Cluster Mount Info[] - cluster
Name string - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- {[key: string]: string}
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- data
Security stringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - docker
Image ClusterDocker Image - driver
Instance stringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - driver
Node stringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - enable
Elastic booleanDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - enable
Local booleanDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- gcp
Attributes ClusterGcp Attributes - idempotency
Token string - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- init
Scripts ClusterInit Script[] - instance
Pool stringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - is
Pinned boolean - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - libraries
Cluster
Library[] - no
Wait boolean If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- node
Type stringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - num
Workers number - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - policy
Id string - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - runtime
Engine string - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - single
User stringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - spark
Conf {[key: string]: string} - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- spark
Env {[key: string]: string}Vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- ssh
Public string[]Keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- workload
Type ClusterWorkload Type
- spark_
version str - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- apply_
policy_ booldefault_ values - Whether to use policy default values for missing cluster attributes.
- autoscale
Cluster
Autoscale Args - autotermination_
minutes int - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - aws_
attributes ClusterAws Attributes Args - azure_
attributes ClusterAzure Attributes Args - cluster_
log_ Clusterconf Cluster Log Conf Args - cluster_
mount_ Sequence[Clusterinfos Cluster Mount Info Args] - cluster_
name str - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- Mapping[str, str]
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- data_
security_ strmode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - docker_
image ClusterDocker Image Args - driver_
instance_ strpool_ id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - driver_
node_ strtype_ id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - enable_
elastic_ booldisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - enable_
local_ booldisk_ encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- gcp_
attributes ClusterGcp Attributes Args - idempotency_
token str - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- init_
scripts Sequence[ClusterInit Script Args] - instance_
pool_ strid - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - is_
pinned bool - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - libraries
Sequence[Cluster
Library Args] - no_
wait bool If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- node_
type_ strid - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - num_
workers int - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - policy_
id str - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - runtime_
engine str - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - single_
user_ strname - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - spark_
conf Mapping[str, str] - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- spark_
env_ Mapping[str, str]vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- ssh_
public_ Sequence[str]keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- workload_
type ClusterWorkload Type Args
- spark
Version String - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- apply
Policy BooleanDefault Values - Whether to use policy default values for missing cluster attributes.
- autoscale Property Map
- autotermination
Minutes Number - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - aws
Attributes Property Map - azure
Attributes Property Map - cluster
Log Property MapConf - cluster
Mount List<Property Map>Infos - cluster
Name String - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- Map<String>
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- data
Security StringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - docker
Image Property Map - driver
Instance StringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - driver
Node StringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - enable
Elastic BooleanDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - enable
Local BooleanDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- gcp
Attributes Property Map - idempotency
Token String - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- init
Scripts List<Property Map> - instance
Pool StringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - is
Pinned Boolean - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - libraries List<Property Map>
- no
Wait Boolean If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- node
Type StringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - num
Workers Number - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - policy
Id String - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - runtime
Engine String - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - single
User StringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - spark
Conf Map<String> - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- spark
Env Map<String>Vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- ssh
Public List<String>Keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- workload
Type Property Map
Outputs
All input properties are implicitly available as output properties. Additionally, the Cluster resource produces the following output properties:
- Cluster
Id string - Dictionary<string, string>
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - Id string
- The provider-assigned unique ID for this managed resource.
- State string
- (string) State of the cluster.
- Url string
- Cluster
Id string - map[string]string
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - Id string
- The provider-assigned unique ID for this managed resource.
- State string
- (string) State of the cluster.
- Url string
- cluster
Id String - Map<String,String>
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - id String
- The provider-assigned unique ID for this managed resource.
- state String
- (string) State of the cluster.
- url String
- cluster
Id string - {[key: string]: string}
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - id string
- The provider-assigned unique ID for this managed resource.
- state string
- (string) State of the cluster.
- url string
- cluster_
id str - Mapping[str, str]
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - id str
- The provider-assigned unique ID for this managed resource.
- state str
- (string) State of the cluster.
- url str
- cluster
Id String - Map<String>
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - id String
- The provider-assigned unique ID for this managed resource.
- state String
- (string) State of the cluster.
- url String
Look up Existing Cluster Resource
Get an existing Cluster resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: ClusterState, opts?: CustomResourceOptions): Cluster
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
apply_policy_default_values: Optional[bool] = None,
autoscale: Optional[ClusterAutoscaleArgs] = None,
autotermination_minutes: Optional[int] = None,
aws_attributes: Optional[ClusterAwsAttributesArgs] = None,
azure_attributes: Optional[ClusterAzureAttributesArgs] = None,
cluster_id: Optional[str] = None,
cluster_log_conf: Optional[ClusterClusterLogConfArgs] = None,
cluster_mount_infos: Optional[Sequence[ClusterClusterMountInfoArgs]] = None,
cluster_name: Optional[str] = None,
custom_tags: Optional[Mapping[str, str]] = None,
data_security_mode: Optional[str] = None,
default_tags: Optional[Mapping[str, str]] = None,
docker_image: Optional[ClusterDockerImageArgs] = None,
driver_instance_pool_id: Optional[str] = None,
driver_node_type_id: Optional[str] = None,
enable_elastic_disk: Optional[bool] = None,
enable_local_disk_encryption: Optional[bool] = None,
gcp_attributes: Optional[ClusterGcpAttributesArgs] = None,
idempotency_token: Optional[str] = None,
init_scripts: Optional[Sequence[ClusterInitScriptArgs]] = None,
instance_pool_id: Optional[str] = None,
is_pinned: Optional[bool] = None,
libraries: Optional[Sequence[ClusterLibraryArgs]] = None,
no_wait: Optional[bool] = None,
node_type_id: Optional[str] = None,
num_workers: Optional[int] = None,
policy_id: Optional[str] = None,
runtime_engine: Optional[str] = None,
single_user_name: Optional[str] = None,
spark_conf: Optional[Mapping[str, str]] = None,
spark_env_vars: Optional[Mapping[str, str]] = None,
spark_version: Optional[str] = None,
ssh_public_keys: Optional[Sequence[str]] = None,
state: Optional[str] = None,
url: Optional[str] = None,
workload_type: Optional[ClusterWorkloadTypeArgs] = None) -> Cluster
func GetCluster(ctx *Context, name string, id IDInput, state *ClusterState, opts ...ResourceOption) (*Cluster, error)
public static Cluster Get(string name, Input<string> id, ClusterState? state, CustomResourceOptions? opts = null)
public static Cluster get(String name, Output<String> id, ClusterState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Apply
Policy boolDefault Values - Whether to use policy default values for missing cluster attributes.
- Autoscale
Cluster
Autoscale - Autotermination
Minutes int - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - Aws
Attributes ClusterAws Attributes - Azure
Attributes ClusterAzure Attributes - Cluster
Id string - Cluster
Log ClusterConf Cluster Log Conf - Cluster
Mount List<ClusterInfos Cluster Mount Info> - Cluster
Name string - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- Dictionary<string, string>
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- Data
Security stringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - Dictionary<string, string>
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - Docker
Image ClusterDocker Image - Driver
Instance stringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - Driver
Node stringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - Enable
Elastic boolDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - Enable
Local boolDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- Gcp
Attributes ClusterGcp Attributes - Idempotency
Token string - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- Init
Scripts List<ClusterInit Script> - Instance
Pool stringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - Is
Pinned bool - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - Libraries
List<Cluster
Library> - No
Wait bool If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- Node
Type stringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - Num
Workers int - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - Policy
Id string - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - Runtime
Engine string - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - Single
User stringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - Spark
Conf Dictionary<string, string> - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- Spark
Env Dictionary<string, string>Vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- Spark
Version string - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- Ssh
Public List<string>Keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- State string
- (string) State of the cluster.
- Url string
- Workload
Type ClusterWorkload Type
- Apply
Policy boolDefault Values - Whether to use policy default values for missing cluster attributes.
- Autoscale
Cluster
Autoscale Args - Autotermination
Minutes int - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - Aws
Attributes ClusterAws Attributes Args - Azure
Attributes ClusterAzure Attributes Args - Cluster
Id string - Cluster
Log ClusterConf Cluster Log Conf Args - Cluster
Mount []ClusterInfos Cluster Mount Info Args - Cluster
Name string - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- map[string]string
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- Data
Security stringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - map[string]string
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - Docker
Image ClusterDocker Image Args - Driver
Instance stringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - Driver
Node stringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - Enable
Elastic boolDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - Enable
Local boolDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- Gcp
Attributes ClusterGcp Attributes Args - Idempotency
Token string - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- Init
Scripts []ClusterInit Script Args - Instance
Pool stringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - Is
Pinned bool - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - Libraries
[]Cluster
Library Args - No
Wait bool If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- Node
Type stringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - Num
Workers int - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - Policy
Id string - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - Runtime
Engine string - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - Single
User stringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - Spark
Conf map[string]string - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- Spark
Env map[string]stringVars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- Spark
Version string - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- Ssh
Public []stringKeys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- State string
- (string) State of the cluster.
- Url string
- Workload
Type ClusterWorkload Type Args
- apply
Policy BooleanDefault Values - Whether to use policy default values for missing cluster attributes.
- autoscale
Cluster
Autoscale - autotermination
Minutes Integer - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - aws
Attributes ClusterAws Attributes - azure
Attributes ClusterAzure Attributes - cluster
Id String - cluster
Log ClusterConf Cluster Log Conf - cluster
Mount List<ClusterInfos Cluster Mount Info> - cluster
Name String - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- Map<String,String>
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- data
Security StringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - Map<String,String>
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - docker
Image ClusterDocker Image - driver
Instance StringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - driver
Node StringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - enable
Elastic BooleanDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - enable
Local BooleanDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- gcp
Attributes ClusterGcp Attributes - idempotency
Token String - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- init
Scripts List<ClusterInit Script> - instance
Pool StringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - is
Pinned Boolean - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - libraries
List<Cluster
Library> - no
Wait Boolean If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- node
Type StringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - num
Workers Integer - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - policy
Id String - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - runtime
Engine String - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - single
User StringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - spark
Conf Map<String,String> - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- spark
Env Map<String,String>Vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- spark
Version String - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- ssh
Public List<String>Keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- state String
- (string) State of the cluster.
- url String
- workload
Type ClusterWorkload Type
- apply
Policy booleanDefault Values - Whether to use policy default values for missing cluster attributes.
- autoscale
Cluster
Autoscale - autotermination
Minutes number - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - aws
Attributes ClusterAws Attributes - azure
Attributes ClusterAzure Attributes - cluster
Id string - cluster
Log ClusterConf Cluster Log Conf - cluster
Mount ClusterInfos Cluster Mount Info[] - cluster
Name string - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- {[key: string]: string}
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- data
Security stringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - {[key: string]: string}
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - docker
Image ClusterDocker Image - driver
Instance stringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - driver
Node stringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - enable
Elastic booleanDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - enable
Local booleanDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- gcp
Attributes ClusterGcp Attributes - idempotency
Token string - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- init
Scripts ClusterInit Script[] - instance
Pool stringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - is
Pinned boolean - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - libraries
Cluster
Library[] - no
Wait boolean If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- node
Type stringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - num
Workers number - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - policy
Id string - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - runtime
Engine string - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - single
User stringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - spark
Conf {[key: string]: string} - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- spark
Env {[key: string]: string}Vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- spark
Version string - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- ssh
Public string[]Keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- state string
- (string) State of the cluster.
- url string
- workload
Type ClusterWorkload Type
- apply_
policy_ booldefault_ values - Whether to use policy default values for missing cluster attributes.
- autoscale
Cluster
Autoscale Args - autotermination_
minutes int - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - aws_
attributes ClusterAws Attributes Args - azure_
attributes ClusterAzure Attributes Args - cluster_
id str - cluster_
log_ Clusterconf Cluster Log Conf Args - cluster_
mount_ Sequence[Clusterinfos Cluster Mount Info Args] - cluster_
name str - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- Mapping[str, str]
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- data_
security_ strmode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - Mapping[str, str]
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - docker_
image ClusterDocker Image Args - driver_
instance_ strpool_ id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - driver_
node_ strtype_ id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - enable_
elastic_ booldisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - enable_
local_ booldisk_ encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- gcp_
attributes ClusterGcp Attributes Args - idempotency_
token str - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- init_
scripts Sequence[ClusterInit Script Args] - instance_
pool_ strid - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - is_
pinned bool - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - libraries
Sequence[Cluster
Library Args] - no_
wait bool If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- node_
type_ strid - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - num_
workers int - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - policy_
id str - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - runtime_
engine str - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - single_
user_ strname - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - spark_
conf Mapping[str, str] - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- spark_
env_ Mapping[str, str]vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- spark_
version str - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- ssh_
public_ Sequence[str]keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- state str
- (string) State of the cluster.
- url str
- workload_
type ClusterWorkload Type Args
- apply
Policy BooleanDefault Values - Whether to use policy default values for missing cluster attributes.
- autoscale Property Map
- autotermination
Minutes Number - Automatically terminate the cluster after being inactive for this time in minutes. If specified, the threshold must be between 10 and 10000 minutes. You can also set this value to 0 to explicitly disable automatic termination. Defaults to
60
. We highly recommend having this setting present for Interactive/BI clusters. - aws
Attributes Property Map - azure
Attributes Property Map - cluster
Id String - cluster
Log Property MapConf - cluster
Mount List<Property Map>Infos - cluster
Name String - Cluster name, which doesn’t have to be unique. If not specified at creation, the cluster name will be an empty string.
- Map<String>
should have tag
ResourceClass
set to valueServerless
For example:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const clusterWithTableAccessControl = new databricks.Cluster("cluster_with_table_access_control", { clusterName: "Shared High-Concurrency", sparkVersion: latestLts.id, nodeTypeId: smallest.id, autoterminationMinutes: 20, sparkConf: { "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, customTags: { ResourceClass: "Serverless", }, });
import pulumi import pulumi_databricks as databricks cluster_with_table_access_control = databricks.Cluster("cluster_with_table_access_control", cluster_name="Shared High-Concurrency", spark_version=latest_lts["id"], node_type_id=smallest["id"], autotermination_minutes=20, spark_conf={ "spark.databricks.repl.allowedLanguages": "python,sql", "spark.databricks.cluster.profile": "serverless", }, custom_tags={ "ResourceClass": "Serverless", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var clusterWithTableAccessControl = new Databricks.Cluster("cluster_with_table_access_control", new() { ClusterName = "Shared High-Concurrency", SparkVersion = latestLts.Id, NodeTypeId = smallest.Id, AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.repl.allowedLanguages", "python,sql" }, { "spark.databricks.cluster.profile", "serverless" }, }, CustomTags = { { "ResourceClass", "Serverless" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "cluster_with_table_access_control", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared High-Concurrency"), SparkVersion: pulumi.Any(latestLts.Id), NodeTypeId: pulumi.Any(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.repl.allowedLanguages": pulumi.String("python,sql"), "spark.databricks.cluster.profile": pulumi.String("serverless"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("Serverless"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var clusterWithTableAccessControl = new Cluster("clusterWithTableAccessControl", ClusterArgs.builder() .clusterName("Shared High-Concurrency") .sparkVersion(latestLts.id()) .nodeTypeId(smallest.id()) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.repl.allowedLanguages", "python,sql"), Map.entry("spark.databricks.cluster.profile", "serverless") )) .customTags(Map.of("ResourceClass", "Serverless")) .build()); } }
resources: clusterWithTableAccessControl: type: databricks:Cluster name: cluster_with_table_access_control properties: clusterName: Shared High-Concurrency sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.repl.allowedLanguages: python,sql spark.databricks.cluster.profile: serverless customTags: ResourceClass: Serverless
- data
Security StringMode - Select the security features of the cluster. Unity Catalog requires
SINGLE_USER
orUSER_ISOLATION
mode.LEGACY_PASSTHROUGH
for passthrough cluster andLEGACY_TABLE_ACL
for Table ACL cluster. If omitted, default security features are enabled. To disable security features useNONE
or legacy modeNO_ISOLATION
. In the Databricks UI, this has been recently been renamed Access Mode andUSER_ISOLATION
has been renamed Shared, but use these terms here. - Map<String>
- (map) Tags that are added by Databricks by default, regardless of any
custom_tags
that may have been added. These include: Vendor: Databricks, Creator: <username_of_creator>, ClusterName: <name_of_cluster>, ClusterId: <id_of_cluster>, Name: , and any workspace and pool tags. - docker
Image Property Map - driver
Instance StringPool Id - similar to
instance_pool_id
, but for driver node. If omitted, andinstance_pool_id
is specified, then the driver will be allocated from that pool. - driver
Node StringType Id - The node type of the Spark driver. This field is optional; if unset, API will set the driver node type to the same value as
node_type_id
defined above. - enable
Elastic BooleanDisk - If you don’t want to allocate a fixed number of EBS volumes at cluster creation time, use autoscaling local storage. With autoscaling local storage, Databricks monitors the amount of free disk space available on your cluster’s Spark workers. If a worker begins to run too low on disk, Databricks automatically attaches a new EBS volume to the worker before it runs out of disk space. EBS volumes are attached up to a limit of 5 TB of total disk space per instance (including the instance’s local storage). To scale down EBS usage, make sure you have
autotermination_minutes
andautoscale
attributes set. More documentation available at cluster configuration page. - enable
Local BooleanDisk Encryption - Some instance types you use to run clusters may have locally attached disks. Databricks may store shuffle data or temporary data on these locally attached disks. To ensure that all data at rest is encrypted for all storage types, including shuffle data stored temporarily on your cluster’s local disks, you can enable local disk encryption. When local disk encryption is enabled, Databricks generates an encryption key locally unique to each cluster node and uses it to encrypt all data stored on local disks. The scope of the key is local to each cluster node and is destroyed along with the cluster node itself. During its lifetime, the key resides in memory for encryption and decryption and is stored encrypted on the disk. Your workloads may run more slowly because of the performance impact of reading and writing encrypted data to and from local volumes. This feature is not available for all Azure Databricks subscriptions. Contact your Microsoft or Databricks account representative to request access.
- gcp
Attributes Property Map - idempotency
Token String - An optional token to guarantee the idempotency of cluster creation requests. If an active cluster with the provided token already exists, the request will not create a new cluster, but it will return the existing running cluster's ID instead. If you specify the idempotency token, upon failure, you can retry until the request succeeds. Databricks platform guarantees to launch exactly one cluster with that idempotency token. This token should have at most 64 characters.
- init
Scripts List<Property Map> - instance
Pool StringId - To reduce cluster start time, you can attach a cluster to a predefined pool of idle instances. When attached to a pool, a cluster allocates its driver and worker nodes from the pool. If the pool does not have sufficient idle resources to accommodate the cluster’s request, it expands by allocating new instances from the instance provider. When an attached cluster changes its state to
TERMINATED
, the instances it used are returned to the pool and reused by a different cluster. - is
Pinned Boolean - boolean value specifying if the cluster is pinned (not pinned by default). You must be a Databricks administrator to use this. The pinned clusters' maximum number is limited to 100, so
apply
may fail if you have more than that (this number may change over time, so check Databricks documentation for actual number). - libraries List<Property Map>
- no
Wait Boolean If true, the provider will not wait for the cluster to reach
RUNNING
state when creating the cluster, allowing cluster creation and library installation to continue asynchronously. Defaults to false (the provider will wait for cluster creation and library installation to succeed).The following example demonstrates how to create an autoscaling cluster with Delta Cache enabled:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const sharedAutoscaling = new databricks.Cluster("shared_autoscaling", { clusterName: "Shared Autoscaling", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, autoscale: { minWorkers: 1, maxWorkers: 50, }, sparkConf: { "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) shared_autoscaling = databricks.Cluster("shared_autoscaling", cluster_name="Shared Autoscaling", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, autoscale={ "min_workers": 1, "max_workers": 50, }, spark_conf={ "spark.databricks.io.cache.enabled": "true", "spark.databricks.io.cache.maxDiskUsage": "50g", "spark.databricks.io.cache.maxMetaDataCache": "1g", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var sharedAutoscaling = new Databricks.Cluster("shared_autoscaling", new() { ClusterName = "Shared Autoscaling", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, Autoscale = new Databricks.Inputs.ClusterAutoscaleArgs { MinWorkers = 1, MaxWorkers = 50, }, SparkConf = { { "spark.databricks.io.cache.enabled", "true" }, { "spark.databricks.io.cache.maxDiskUsage", "50g" }, { "spark.databricks.io.cache.maxMetaDataCache", "1g" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "shared_autoscaling", &databricks.ClusterArgs{ ClusterName: pulumi.String("Shared Autoscaling"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), Autoscale: &databricks.ClusterAutoscaleArgs{ MinWorkers: pulumi.Int(1), MaxWorkers: pulumi.Int(50), }, SparkConf: pulumi.StringMap{ "spark.databricks.io.cache.enabled": pulumi.String("true"), "spark.databricks.io.cache.maxDiskUsage": pulumi.String("50g"), "spark.databricks.io.cache.maxMetaDataCache": pulumi.String("1g"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterAutoscaleArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var sharedAutoscaling = new Cluster("sharedAutoscaling", ClusterArgs.builder() .clusterName("Shared Autoscaling") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .autoscale(ClusterAutoscaleArgs.builder() .minWorkers(1) .maxWorkers(50) .build()) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.io.cache.enabled", true), Map.entry("spark.databricks.io.cache.maxDiskUsage", "50g"), Map.entry("spark.databricks.io.cache.maxMetaDataCache", "1g") )) .build()); } }
resources: sharedAutoscaling: type: databricks:Cluster name: shared_autoscaling properties: clusterName: Shared Autoscaling sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 autoscale: minWorkers: 1 maxWorkers: 50 sparkConf: spark.databricks.io.cache.enabled: true spark.databricks.io.cache.maxDiskUsage: 50g spark.databricks.io.cache.maxMetaDataCache: 1g variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- node
Type StringId - Any supported databricks.getNodeType id. If
instance_pool_id
is specified, this field is not needed. - num
Workers Number - Number of worker nodes that this cluster should have. A cluster has one Spark driver and
num_workers
executors for a total ofnum_workers
+ 1 Spark nodes. - policy
Id String - Identifier of Cluster Policy to validate cluster and preset certain defaults. The primary use for cluster policies is to allow users to create policy-scoped clusters via UI rather than sharing configuration for API-created clusters. For example, when you specify
policy_id
of external metastore policy, you still have to fill in relevant keys forspark_conf
. If relevant fields aren't filled in, then it will cause the configuration drift detected on each plan/apply, and Pulumi will try to apply the detected changes. - runtime
Engine String - The type of runtime engine to use. If not specified, the runtime engine type is inferred based on the spark_version value. Allowed values include:
PHOTON
,STANDARD
. - single
User StringName - The optional user name of the user to assign to an interactive cluster. This field is required when using
data_security_mode
set toSINGLE_USER
or AAD Passthrough for Azure Data Lake Storage (ADLS) with a single-user cluster (i.e., not high-concurrency clusters). - spark
Conf Map<String> - should have following items:
spark.databricks.repl.allowedLanguages
set to a list of supported languages, for example:python,sql
, orpython,sql,r
. Scala is not supported!spark.databricks.cluster.profile
set toserverless
- spark
Env Map<String>Vars - Map with environment variable key-value pairs to fine-tune Spark clusters. Key-value pairs of the form (X,Y) are exported (i.e., X='Y') while launching the driver and workers.
- spark
Version String - Runtime version of the cluster. Any supported databricks.getSparkVersion id. We advise using Cluster Policies to restrict the list of versions for simplicity while maintaining enough control.
- ssh
Public List<String>Keys - SSH public key contents that will be added to each Spark node in this cluster. The corresponding private keys can be used to login with the user name ubuntu on port 2200. You can specify up to 10 keys.
- state String
- (string) State of the cluster.
- url String
- workload
Type Property Map
Supporting Types
ClusterAutoscale, ClusterAutoscaleArgs
- Max
Workers int The maximum number of workers to which the cluster can scale up when overloaded. max_workers must be strictly greater than min_workers.
When using a Single Node cluster,
num_workers
needs to be0
. It can be set to0
explicitly, or simply not specified, as it defaults to0
. Whennum_workers
is0
, provider checks for presence of the required Spark configurations:spark.master
must have prefixlocal
, likelocal[*]
spark.databricks.cluster.profile
must have valuesingleNode
and also
custom_tag
entry:"ResourceClass" = "SingleNode"
The following example demonstrates how to create an single node cluster:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const singleNode = new databricks.Cluster("single_node", { clusterName: "Single Node", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, sparkConf: { "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, customTags: { ResourceClass: "SingleNode", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) single_node = databricks.Cluster("single_node", cluster_name="Single Node", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, spark_conf={ "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, custom_tags={ "ResourceClass": "SingleNode", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var singleNode = new Databricks.Cluster("single_node", new() { ClusterName = "Single Node", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.cluster.profile", "singleNode" }, { "spark.master", "local[*]" }, }, CustomTags = { { "ResourceClass", "SingleNode" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "single_node", &databricks.ClusterArgs{ ClusterName: pulumi.String("Single Node"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.cluster.profile": pulumi.String("singleNode"), "spark.master": pulumi.String("local[*]"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("SingleNode"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var singleNode = new Cluster("singleNode", ClusterArgs.builder() .clusterName("Single Node") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.cluster.profile", "singleNode"), Map.entry("spark.master", "local[*]") )) .customTags(Map.of("ResourceClass", "SingleNode")) .build()); } }
resources: singleNode: type: databricks:Cluster name: single_node properties: clusterName: Single Node sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.cluster.profile: singleNode spark.master: local[*] customTags: ResourceClass: SingleNode variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- Min
Workers int - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation.
- Max
Workers int The maximum number of workers to which the cluster can scale up when overloaded. max_workers must be strictly greater than min_workers.
When using a Single Node cluster,
num_workers
needs to be0
. It can be set to0
explicitly, or simply not specified, as it defaults to0
. Whennum_workers
is0
, provider checks for presence of the required Spark configurations:spark.master
must have prefixlocal
, likelocal[*]
spark.databricks.cluster.profile
must have valuesingleNode
and also
custom_tag
entry:"ResourceClass" = "SingleNode"
The following example demonstrates how to create an single node cluster:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const singleNode = new databricks.Cluster("single_node", { clusterName: "Single Node", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, sparkConf: { "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, customTags: { ResourceClass: "SingleNode", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) single_node = databricks.Cluster("single_node", cluster_name="Single Node", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, spark_conf={ "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, custom_tags={ "ResourceClass": "SingleNode", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var singleNode = new Databricks.Cluster("single_node", new() { ClusterName = "Single Node", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.cluster.profile", "singleNode" }, { "spark.master", "local[*]" }, }, CustomTags = { { "ResourceClass", "SingleNode" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "single_node", &databricks.ClusterArgs{ ClusterName: pulumi.String("Single Node"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.cluster.profile": pulumi.String("singleNode"), "spark.master": pulumi.String("local[*]"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("SingleNode"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var singleNode = new Cluster("singleNode", ClusterArgs.builder() .clusterName("Single Node") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.cluster.profile", "singleNode"), Map.entry("spark.master", "local[*]") )) .customTags(Map.of("ResourceClass", "SingleNode")) .build()); } }
resources: singleNode: type: databricks:Cluster name: single_node properties: clusterName: Single Node sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.cluster.profile: singleNode spark.master: local[*] customTags: ResourceClass: SingleNode variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- Min
Workers int - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation.
- max
Workers Integer The maximum number of workers to which the cluster can scale up when overloaded. max_workers must be strictly greater than min_workers.
When using a Single Node cluster,
num_workers
needs to be0
. It can be set to0
explicitly, or simply not specified, as it defaults to0
. Whennum_workers
is0
, provider checks for presence of the required Spark configurations:spark.master
must have prefixlocal
, likelocal[*]
spark.databricks.cluster.profile
must have valuesingleNode
and also
custom_tag
entry:"ResourceClass" = "SingleNode"
The following example demonstrates how to create an single node cluster:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const singleNode = new databricks.Cluster("single_node", { clusterName: "Single Node", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, sparkConf: { "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, customTags: { ResourceClass: "SingleNode", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) single_node = databricks.Cluster("single_node", cluster_name="Single Node", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, spark_conf={ "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, custom_tags={ "ResourceClass": "SingleNode", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var singleNode = new Databricks.Cluster("single_node", new() { ClusterName = "Single Node", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.cluster.profile", "singleNode" }, { "spark.master", "local[*]" }, }, CustomTags = { { "ResourceClass", "SingleNode" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "single_node", &databricks.ClusterArgs{ ClusterName: pulumi.String("Single Node"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.cluster.profile": pulumi.String("singleNode"), "spark.master": pulumi.String("local[*]"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("SingleNode"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var singleNode = new Cluster("singleNode", ClusterArgs.builder() .clusterName("Single Node") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.cluster.profile", "singleNode"), Map.entry("spark.master", "local[*]") )) .customTags(Map.of("ResourceClass", "SingleNode")) .build()); } }
resources: singleNode: type: databricks:Cluster name: single_node properties: clusterName: Single Node sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.cluster.profile: singleNode spark.master: local[*] customTags: ResourceClass: SingleNode variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- min
Workers Integer - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation.
- max
Workers number The maximum number of workers to which the cluster can scale up when overloaded. max_workers must be strictly greater than min_workers.
When using a Single Node cluster,
num_workers
needs to be0
. It can be set to0
explicitly, or simply not specified, as it defaults to0
. Whennum_workers
is0
, provider checks for presence of the required Spark configurations:spark.master
must have prefixlocal
, likelocal[*]
spark.databricks.cluster.profile
must have valuesingleNode
and also
custom_tag
entry:"ResourceClass" = "SingleNode"
The following example demonstrates how to create an single node cluster:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const singleNode = new databricks.Cluster("single_node", { clusterName: "Single Node", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, sparkConf: { "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, customTags: { ResourceClass: "SingleNode", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) single_node = databricks.Cluster("single_node", cluster_name="Single Node", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, spark_conf={ "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, custom_tags={ "ResourceClass": "SingleNode", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var singleNode = new Databricks.Cluster("single_node", new() { ClusterName = "Single Node", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.cluster.profile", "singleNode" }, { "spark.master", "local[*]" }, }, CustomTags = { { "ResourceClass", "SingleNode" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "single_node", &databricks.ClusterArgs{ ClusterName: pulumi.String("Single Node"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.cluster.profile": pulumi.String("singleNode"), "spark.master": pulumi.String("local[*]"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("SingleNode"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var singleNode = new Cluster("singleNode", ClusterArgs.builder() .clusterName("Single Node") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.cluster.profile", "singleNode"), Map.entry("spark.master", "local[*]") )) .customTags(Map.of("ResourceClass", "SingleNode")) .build()); } }
resources: singleNode: type: databricks:Cluster name: single_node properties: clusterName: Single Node sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.cluster.profile: singleNode spark.master: local[*] customTags: ResourceClass: SingleNode variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- min
Workers number - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation.
- max_
workers int The maximum number of workers to which the cluster can scale up when overloaded. max_workers must be strictly greater than min_workers.
When using a Single Node cluster,
num_workers
needs to be0
. It can be set to0
explicitly, or simply not specified, as it defaults to0
. Whennum_workers
is0
, provider checks for presence of the required Spark configurations:spark.master
must have prefixlocal
, likelocal[*]
spark.databricks.cluster.profile
must have valuesingleNode
and also
custom_tag
entry:"ResourceClass" = "SingleNode"
The following example demonstrates how to create an single node cluster:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const singleNode = new databricks.Cluster("single_node", { clusterName: "Single Node", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, sparkConf: { "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, customTags: { ResourceClass: "SingleNode", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) single_node = databricks.Cluster("single_node", cluster_name="Single Node", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, spark_conf={ "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, custom_tags={ "ResourceClass": "SingleNode", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var singleNode = new Databricks.Cluster("single_node", new() { ClusterName = "Single Node", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.cluster.profile", "singleNode" }, { "spark.master", "local[*]" }, }, CustomTags = { { "ResourceClass", "SingleNode" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "single_node", &databricks.ClusterArgs{ ClusterName: pulumi.String("Single Node"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.cluster.profile": pulumi.String("singleNode"), "spark.master": pulumi.String("local[*]"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("SingleNode"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var singleNode = new Cluster("singleNode", ClusterArgs.builder() .clusterName("Single Node") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.cluster.profile", "singleNode"), Map.entry("spark.master", "local[*]") )) .customTags(Map.of("ResourceClass", "SingleNode")) .build()); } }
resources: singleNode: type: databricks:Cluster name: single_node properties: clusterName: Single Node sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.cluster.profile: singleNode spark.master: local[*] customTags: ResourceClass: SingleNode variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- min_
workers int - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation.
- max
Workers Number The maximum number of workers to which the cluster can scale up when overloaded. max_workers must be strictly greater than min_workers.
When using a Single Node cluster,
num_workers
needs to be0
. It can be set to0
explicitly, or simply not specified, as it defaults to0
. Whennum_workers
is0
, provider checks for presence of the required Spark configurations:spark.master
must have prefixlocal
, likelocal[*]
spark.databricks.cluster.profile
must have valuesingleNode
and also
custom_tag
entry:"ResourceClass" = "SingleNode"
The following example demonstrates how to create an single node cluster:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const smallest = databricks.getNodeType({ localDisk: true, }); const latestLts = databricks.getSparkVersion({ longTermSupport: true, }); const singleNode = new databricks.Cluster("single_node", { clusterName: "Single Node", sparkVersion: latestLts.then(latestLts => latestLts.id), nodeTypeId: smallest.then(smallest => smallest.id), autoterminationMinutes: 20, sparkConf: { "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, customTags: { ResourceClass: "SingleNode", }, });
import pulumi import pulumi_databricks as databricks smallest = databricks.get_node_type(local_disk=True) latest_lts = databricks.get_spark_version(long_term_support=True) single_node = databricks.Cluster("single_node", cluster_name="Single Node", spark_version=latest_lts.id, node_type_id=smallest.id, autotermination_minutes=20, spark_conf={ "spark.databricks.cluster.profile": "singleNode", "spark.master": "local[*]", }, custom_tags={ "ResourceClass": "SingleNode", })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var smallest = Databricks.GetNodeType.Invoke(new() { LocalDisk = true, }); var latestLts = Databricks.GetSparkVersion.Invoke(new() { LongTermSupport = true, }); var singleNode = new Databricks.Cluster("single_node", new() { ClusterName = "Single Node", SparkVersion = latestLts.Apply(getSparkVersionResult => getSparkVersionResult.Id), NodeTypeId = smallest.Apply(getNodeTypeResult => getNodeTypeResult.Id), AutoterminationMinutes = 20, SparkConf = { { "spark.databricks.cluster.profile", "singleNode" }, { "spark.master", "local[*]" }, }, CustomTags = { { "ResourceClass", "SingleNode" }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { smallest, err := databricks.GetNodeType(ctx, &databricks.GetNodeTypeArgs{ LocalDisk: pulumi.BoolRef(true), }, nil) if err != nil { return err } latestLts, err := databricks.GetSparkVersion(ctx, &databricks.GetSparkVersionArgs{ LongTermSupport: pulumi.BoolRef(true), }, nil) if err != nil { return err } _, err = databricks.NewCluster(ctx, "single_node", &databricks.ClusterArgs{ ClusterName: pulumi.String("Single Node"), SparkVersion: pulumi.String(latestLts.Id), NodeTypeId: pulumi.String(smallest.Id), AutoterminationMinutes: pulumi.Int(20), SparkConf: pulumi.StringMap{ "spark.databricks.cluster.profile": pulumi.String("singleNode"), "spark.master": pulumi.String("local[*]"), }, CustomTags: pulumi.StringMap{ "ResourceClass": pulumi.String("SingleNode"), }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.DatabricksFunctions; import com.pulumi.databricks.inputs.GetNodeTypeArgs; import com.pulumi.databricks.inputs.GetSparkVersionArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var smallest = DatabricksFunctions.getNodeType(GetNodeTypeArgs.builder() .localDisk(true) .build()); final var latestLts = DatabricksFunctions.getSparkVersion(GetSparkVersionArgs.builder() .longTermSupport(true) .build()); var singleNode = new Cluster("singleNode", ClusterArgs.builder() .clusterName("Single Node") .sparkVersion(latestLts.applyValue(getSparkVersionResult -> getSparkVersionResult.id())) .nodeTypeId(smallest.applyValue(getNodeTypeResult -> getNodeTypeResult.id())) .autoterminationMinutes(20) .sparkConf(Map.ofEntries( Map.entry("spark.databricks.cluster.profile", "singleNode"), Map.entry("spark.master", "local[*]") )) .customTags(Map.of("ResourceClass", "SingleNode")) .build()); } }
resources: singleNode: type: databricks:Cluster name: single_node properties: clusterName: Single Node sparkVersion: ${latestLts.id} nodeTypeId: ${smallest.id} autoterminationMinutes: 20 sparkConf: spark.databricks.cluster.profile: singleNode spark.master: local[*] customTags: ResourceClass: SingleNode variables: smallest: fn::invoke: Function: databricks:getNodeType Arguments: localDisk: true latestLts: fn::invoke: Function: databricks:getSparkVersion Arguments: longTermSupport: true
- min
Workers Number - The minimum number of workers to which the cluster can scale down when underutilized. It is also the initial number of workers the cluster will have after creation.
ClusterAwsAttributes, ClusterAwsAttributesArgs
- Availability string
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT
,SPOT_WITH_FALLBACK
andON_DEMAND
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. Backend default value isSPOT_WITH_FALLBACK
and could change in the future - Ebs
Volume intCount - The number of volumes launched for each instance. You can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at /ebs0, /ebs1, and etc. Instance store volumes will be mounted at /local_disk0, /local_disk1, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogeneously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. If EBS volumes are specified, then the Spark configuration spark.local.dir will be overridden.
- Ebs
Volume intIops - Ebs
Volume intSize - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. Custom EBS volumes cannot be specified for the legacy node types (memory-optimized and compute-optimized).
- Ebs
Volume intThroughput - Ebs
Volume stringType - The type of EBS volumes that will be launched with this cluster. Valid values are
GENERAL_PURPOSE_SSD
orTHROUGHPUT_OPTIMIZED_HDD
. Use this option only if you're not picking Delta Optimizedi3.*
node types. - First
On intDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. Backend default value is1
and could change in the future - Instance
Profile stringArn - Nodes for this cluster will only be placed on AWS instances with this instance profile. Please see databricks.InstanceProfile resource documentation for extended examples on adding a valid instance profile using Pulumi.
- Spot
Bid intPrice Percent - The max price for AWS spot instances, as a percentage of the corresponding instance type’s on-demand price. For example, if this field is set to 50, and the cluster needs a new
i3.xlarge
spot instance, then the max price is half of the price of on-demandi3.xlarge
instances. Similarly, if this field is set to 200, the max price is twice the price of on-demandi3.xlarge
instances. If not specified, the default value is100
. When spot instances are requested for this cluster, only spot instances whose max price percentage matches this field will be considered. For safety, we enforce this field to be no more than10000
. - Zone
Id string - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like
us-west-2a
. The provided availability zone must be in the same region as the Databricks deployment. For example,us-west-2a
is not a valid zone ID if the Databricks deployment resides in theus-east-1
region. Enable automatic availability zone selection ("Auto-AZ"), by setting the valueauto
. Databricks selects the AZ based on available IPs in the workspace subnets and retries in other availability zones if AWS returns insufficient capacity errors.
- Availability string
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT
,SPOT_WITH_FALLBACK
andON_DEMAND
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. Backend default value isSPOT_WITH_FALLBACK
and could change in the future - Ebs
Volume intCount - The number of volumes launched for each instance. You can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at /ebs0, /ebs1, and etc. Instance store volumes will be mounted at /local_disk0, /local_disk1, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogeneously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. If EBS volumes are specified, then the Spark configuration spark.local.dir will be overridden.
- Ebs
Volume intIops - Ebs
Volume intSize - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. Custom EBS volumes cannot be specified for the legacy node types (memory-optimized and compute-optimized).
- Ebs
Volume intThroughput - Ebs
Volume stringType - The type of EBS volumes that will be launched with this cluster. Valid values are
GENERAL_PURPOSE_SSD
orTHROUGHPUT_OPTIMIZED_HDD
. Use this option only if you're not picking Delta Optimizedi3.*
node types. - First
On intDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. Backend default value is1
and could change in the future - Instance
Profile stringArn - Nodes for this cluster will only be placed on AWS instances with this instance profile. Please see databricks.InstanceProfile resource documentation for extended examples on adding a valid instance profile using Pulumi.
- Spot
Bid intPrice Percent - The max price for AWS spot instances, as a percentage of the corresponding instance type’s on-demand price. For example, if this field is set to 50, and the cluster needs a new
i3.xlarge
spot instance, then the max price is half of the price of on-demandi3.xlarge
instances. Similarly, if this field is set to 200, the max price is twice the price of on-demandi3.xlarge
instances. If not specified, the default value is100
. When spot instances are requested for this cluster, only spot instances whose max price percentage matches this field will be considered. For safety, we enforce this field to be no more than10000
. - Zone
Id string - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like
us-west-2a
. The provided availability zone must be in the same region as the Databricks deployment. For example,us-west-2a
is not a valid zone ID if the Databricks deployment resides in theus-east-1
region. Enable automatic availability zone selection ("Auto-AZ"), by setting the valueauto
. Databricks selects the AZ based on available IPs in the workspace subnets and retries in other availability zones if AWS returns insufficient capacity errors.
- availability String
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT
,SPOT_WITH_FALLBACK
andON_DEMAND
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. Backend default value isSPOT_WITH_FALLBACK
and could change in the future - ebs
Volume IntegerCount - The number of volumes launched for each instance. You can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at /ebs0, /ebs1, and etc. Instance store volumes will be mounted at /local_disk0, /local_disk1, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogeneously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. If EBS volumes are specified, then the Spark configuration spark.local.dir will be overridden.
- ebs
Volume IntegerIops - ebs
Volume IntegerSize - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. Custom EBS volumes cannot be specified for the legacy node types (memory-optimized and compute-optimized).
- ebs
Volume IntegerThroughput - ebs
Volume StringType - The type of EBS volumes that will be launched with this cluster. Valid values are
GENERAL_PURPOSE_SSD
orTHROUGHPUT_OPTIMIZED_HDD
. Use this option only if you're not picking Delta Optimizedi3.*
node types. - first
On IntegerDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. Backend default value is1
and could change in the future - instance
Profile StringArn - Nodes for this cluster will only be placed on AWS instances with this instance profile. Please see databricks.InstanceProfile resource documentation for extended examples on adding a valid instance profile using Pulumi.
- spot
Bid IntegerPrice Percent - The max price for AWS spot instances, as a percentage of the corresponding instance type’s on-demand price. For example, if this field is set to 50, and the cluster needs a new
i3.xlarge
spot instance, then the max price is half of the price of on-demandi3.xlarge
instances. Similarly, if this field is set to 200, the max price is twice the price of on-demandi3.xlarge
instances. If not specified, the default value is100
. When spot instances are requested for this cluster, only spot instances whose max price percentage matches this field will be considered. For safety, we enforce this field to be no more than10000
. - zone
Id String - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like
us-west-2a
. The provided availability zone must be in the same region as the Databricks deployment. For example,us-west-2a
is not a valid zone ID if the Databricks deployment resides in theus-east-1
region. Enable automatic availability zone selection ("Auto-AZ"), by setting the valueauto
. Databricks selects the AZ based on available IPs in the workspace subnets and retries in other availability zones if AWS returns insufficient capacity errors.
- availability string
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT
,SPOT_WITH_FALLBACK
andON_DEMAND
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. Backend default value isSPOT_WITH_FALLBACK
and could change in the future - ebs
Volume numberCount - The number of volumes launched for each instance. You can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at /ebs0, /ebs1, and etc. Instance store volumes will be mounted at /local_disk0, /local_disk1, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogeneously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. If EBS volumes are specified, then the Spark configuration spark.local.dir will be overridden.
- ebs
Volume numberIops - ebs
Volume numberSize - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. Custom EBS volumes cannot be specified for the legacy node types (memory-optimized and compute-optimized).
- ebs
Volume numberThroughput - ebs
Volume stringType - The type of EBS volumes that will be launched with this cluster. Valid values are
GENERAL_PURPOSE_SSD
orTHROUGHPUT_OPTIMIZED_HDD
. Use this option only if you're not picking Delta Optimizedi3.*
node types. - first
On numberDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. Backend default value is1
and could change in the future - instance
Profile stringArn - Nodes for this cluster will only be placed on AWS instances with this instance profile. Please see databricks.InstanceProfile resource documentation for extended examples on adding a valid instance profile using Pulumi.
- spot
Bid numberPrice Percent - The max price for AWS spot instances, as a percentage of the corresponding instance type’s on-demand price. For example, if this field is set to 50, and the cluster needs a new
i3.xlarge
spot instance, then the max price is half of the price of on-demandi3.xlarge
instances. Similarly, if this field is set to 200, the max price is twice the price of on-demandi3.xlarge
instances. If not specified, the default value is100
. When spot instances are requested for this cluster, only spot instances whose max price percentage matches this field will be considered. For safety, we enforce this field to be no more than10000
. - zone
Id string - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like
us-west-2a
. The provided availability zone must be in the same region as the Databricks deployment. For example,us-west-2a
is not a valid zone ID if the Databricks deployment resides in theus-east-1
region. Enable automatic availability zone selection ("Auto-AZ"), by setting the valueauto
. Databricks selects the AZ based on available IPs in the workspace subnets and retries in other availability zones if AWS returns insufficient capacity errors.
- availability str
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT
,SPOT_WITH_FALLBACK
andON_DEMAND
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. Backend default value isSPOT_WITH_FALLBACK
and could change in the future - ebs_
volume_ intcount - The number of volumes launched for each instance. You can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at /ebs0, /ebs1, and etc. Instance store volumes will be mounted at /local_disk0, /local_disk1, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogeneously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. If EBS volumes are specified, then the Spark configuration spark.local.dir will be overridden.
- ebs_
volume_ intiops - ebs_
volume_ intsize - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. Custom EBS volumes cannot be specified for the legacy node types (memory-optimized and compute-optimized).
- ebs_
volume_ intthroughput - ebs_
volume_ strtype - The type of EBS volumes that will be launched with this cluster. Valid values are
GENERAL_PURPOSE_SSD
orTHROUGHPUT_OPTIMIZED_HDD
. Use this option only if you're not picking Delta Optimizedi3.*
node types. - first_
on_ intdemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. Backend default value is1
and could change in the future - instance_
profile_ strarn - Nodes for this cluster will only be placed on AWS instances with this instance profile. Please see databricks.InstanceProfile resource documentation for extended examples on adding a valid instance profile using Pulumi.
- spot_
bid_ intprice_ percent - The max price for AWS spot instances, as a percentage of the corresponding instance type’s on-demand price. For example, if this field is set to 50, and the cluster needs a new
i3.xlarge
spot instance, then the max price is half of the price of on-demandi3.xlarge
instances. Similarly, if this field is set to 200, the max price is twice the price of on-demandi3.xlarge
instances. If not specified, the default value is100
. When spot instances are requested for this cluster, only spot instances whose max price percentage matches this field will be considered. For safety, we enforce this field to be no more than10000
. - zone_
id str - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like
us-west-2a
. The provided availability zone must be in the same region as the Databricks deployment. For example,us-west-2a
is not a valid zone ID if the Databricks deployment resides in theus-east-1
region. Enable automatic availability zone selection ("Auto-AZ"), by setting the valueauto
. Databricks selects the AZ based on available IPs in the workspace subnets and retries in other availability zones if AWS returns insufficient capacity errors.
- availability String
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT
,SPOT_WITH_FALLBACK
andON_DEMAND
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. Backend default value isSPOT_WITH_FALLBACK
and could change in the future - ebs
Volume NumberCount - The number of volumes launched for each instance. You can choose up to 10 volumes. This feature is only enabled for supported node types. Legacy node types cannot specify custom EBS volumes. For node types with no instance store, at least one EBS volume needs to be specified; otherwise, cluster creation will fail. These EBS volumes will be mounted at /ebs0, /ebs1, and etc. Instance store volumes will be mounted at /local_disk0, /local_disk1, and etc. If EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for scratch storage because heterogeneously sized scratch devices can lead to inefficient disk utilization. If no EBS volumes are attached, Databricks will configure Spark to use instance store volumes. If EBS volumes are specified, then the Spark configuration spark.local.dir will be overridden.
- ebs
Volume NumberIops - ebs
Volume NumberSize - The size of each EBS volume (in GiB) launched for each instance. For general purpose SSD, this value must be within the range 100 - 4096. For throughput optimized HDD, this value must be within the range 500 - 4096. Custom EBS volumes cannot be specified for the legacy node types (memory-optimized and compute-optimized).
- ebs
Volume NumberThroughput - ebs
Volume StringType - The type of EBS volumes that will be launched with this cluster. Valid values are
GENERAL_PURPOSE_SSD
orTHROUGHPUT_OPTIMIZED_HDD
. Use this option only if you're not picking Delta Optimizedi3.*
node types. - first
On NumberDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. Backend default value is1
and could change in the future - instance
Profile StringArn - Nodes for this cluster will only be placed on AWS instances with this instance profile. Please see databricks.InstanceProfile resource documentation for extended examples on adding a valid instance profile using Pulumi.
- spot
Bid NumberPrice Percent - The max price for AWS spot instances, as a percentage of the corresponding instance type’s on-demand price. For example, if this field is set to 50, and the cluster needs a new
i3.xlarge
spot instance, then the max price is half of the price of on-demandi3.xlarge
instances. Similarly, if this field is set to 200, the max price is twice the price of on-demandi3.xlarge
instances. If not specified, the default value is100
. When spot instances are requested for this cluster, only spot instances whose max price percentage matches this field will be considered. For safety, we enforce this field to be no more than10000
. - zone
Id String - Identifier for the availability zone/datacenter in which the cluster resides. This string will be of a form like
us-west-2a
. The provided availability zone must be in the same region as the Databricks deployment. For example,us-west-2a
is not a valid zone ID if the Databricks deployment resides in theus-east-1
region. Enable automatic availability zone selection ("Auto-AZ"), by setting the valueauto
. Databricks selects the AZ based on available IPs in the workspace subnets and retries in other availability zones if AWS returns insufficient capacity errors.
ClusterAzureAttributes, ClusterAzureAttributesArgs
- Availability string
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT_AZURE
,SPOT_WITH_FALLBACK_AZURE
, andON_DEMAND_AZURE
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. - First
On intDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. - Log
Analytics ClusterInfo Azure Attributes Log Analytics Info - Spot
Bid doubleMax Price - The max bid price used for Azure spot instances. You can set this to greater than or equal to the current spot price. You can also set this to
-1
, which specifies that the instance cannot be evicted on the basis of price. The price for the instance will be the current price for spot instances or the price for a standard instance.
- Availability string
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT_AZURE
,SPOT_WITH_FALLBACK_AZURE
, andON_DEMAND_AZURE
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. - First
On intDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. - Log
Analytics ClusterInfo Azure Attributes Log Analytics Info - Spot
Bid float64Max Price - The max bid price used for Azure spot instances. You can set this to greater than or equal to the current spot price. You can also set this to
-1
, which specifies that the instance cannot be evicted on the basis of price. The price for the instance will be the current price for spot instances or the price for a standard instance.
- availability String
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT_AZURE
,SPOT_WITH_FALLBACK_AZURE
, andON_DEMAND_AZURE
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. - first
On IntegerDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. - log
Analytics ClusterInfo Azure Attributes Log Analytics Info - spot
Bid DoubleMax Price - The max bid price used for Azure spot instances. You can set this to greater than or equal to the current spot price. You can also set this to
-1
, which specifies that the instance cannot be evicted on the basis of price. The price for the instance will be the current price for spot instances or the price for a standard instance.
- availability string
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT_AZURE
,SPOT_WITH_FALLBACK_AZURE
, andON_DEMAND_AZURE
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. - first
On numberDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. - log
Analytics ClusterInfo Azure Attributes Log Analytics Info - spot
Bid numberMax Price - The max bid price used for Azure spot instances. You can set this to greater than or equal to the current spot price. You can also set this to
-1
, which specifies that the instance cannot be evicted on the basis of price. The price for the instance will be the current price for spot instances or the price for a standard instance.
- availability str
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT_AZURE
,SPOT_WITH_FALLBACK_AZURE
, andON_DEMAND_AZURE
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. - first_
on_ intdemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. - log_
analytics_ Clusterinfo Azure Attributes Log Analytics Info - spot_
bid_ floatmax_ price - The max bid price used for Azure spot instances. You can set this to greater than or equal to the current spot price. You can also set this to
-1
, which specifies that the instance cannot be evicted on the basis of price. The price for the instance will be the current price for spot instances or the price for a standard instance.
- availability String
- Availability type used for all subsequent nodes past the
first_on_demand
ones. Valid values areSPOT_AZURE
,SPOT_WITH_FALLBACK_AZURE
, andON_DEMAND_AZURE
. Note: Iffirst_on_demand
is zero, this availability type will be used for the entire cluster. - first
On NumberDemand - The first
first_on_demand
nodes of the cluster will be placed on on-demand instances. If this value is greater than 0, the cluster driver node will be placed on an on-demand instance. If this value is greater than or equal to the current cluster size, all nodes will be placed on on-demand instances. If this value is less than the current cluster size,first_on_demand
nodes will be placed on on-demand instances, and the remainder will be placed on availability instances. This value does not affect cluster size and cannot be mutated over the lifetime of a cluster. - log
Analytics Property MapInfo - spot
Bid NumberMax Price - The max bid price used for Azure spot instances. You can set this to greater than or equal to the current spot price. You can also set this to
-1
, which specifies that the instance cannot be evicted on the basis of price. The price for the instance will be the current price for spot instances or the price for a standard instance.
ClusterAzureAttributesLogAnalyticsInfo, ClusterAzureAttributesLogAnalyticsInfoArgs
- Log
Analytics stringPrimary Key - Log
Analytics stringWorkspace Id
- Log
Analytics stringPrimary Key - Log
Analytics stringWorkspace Id
- log
Analytics StringPrimary Key - log
Analytics StringWorkspace Id
- log
Analytics stringPrimary Key - log
Analytics stringWorkspace Id
- log
Analytics StringPrimary Key - log
Analytics StringWorkspace Id
ClusterClusterLogConf, ClusterClusterLogConfArgs
ClusterClusterLogConfDbfs, ClusterClusterLogConfDbfsArgs
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination str
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
ClusterClusterLogConfS3, ClusterClusterLogConfS3Args
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - Canned
Acl string - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - Enable
Encryption bool - Enable server-side encryption, false by default.
- Encryption
Type string - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - Endpoint string
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - Kms
Key string - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - Region string
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - Canned
Acl string - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - Enable
Encryption bool - Enable server-side encryption, false by default.
- Encryption
Type string - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - Endpoint string
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - Kms
Key string - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - Region string
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - canned
Acl String - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - enable
Encryption Boolean - Enable server-side encryption, false by default.
- encryption
Type String - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - endpoint String
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - kms
Key String - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - region String
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - canned
Acl string - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - enable
Encryption boolean - Enable server-side encryption, false by default.
- encryption
Type string - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - endpoint string
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - kms
Key string - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - region string
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- destination str
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - canned_
acl str - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - enable_
encryption bool - Enable server-side encryption, false by default.
- encryption_
type str - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - endpoint str
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - kms_
key str - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - region str
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - canned
Acl String - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - enable
Encryption Boolean - Enable server-side encryption, false by default.
- encryption
Type String - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - endpoint String
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - kms
Key String - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - region String
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
ClusterClusterMountInfo, ClusterClusterMountInfoArgs
- Local
Mount stringDir Path path inside the Spark container.
For example, you can mount Azure Data Lake Storage container using the following code:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const storageAccount = "ewfw3ggwegwg"; const storageContainer = "test"; const withNfs = new databricks.Cluster("with_nfs", {clusterMountInfos: [{ networkFilesystemInfo: { serverAddress:
${storageAccount}.blob.core.windows.net
, mountOptions: "sec=sys,vers=3,nolock,proto=tcp", }, remoteMountDirPath:${storageAccount}/${storageContainer}
, localMountDirPath: "/mnt/nfs-test", }]});import pulumi import pulumi_databricks as databricks storage_account = "ewfw3ggwegwg" storage_container = "test" with_nfs = databricks.Cluster("with_nfs", cluster_mount_infos=[{ "network_filesystem_info": { "server_address": f"{storage_account}.blob.core.windows.net", "mount_options": "sec=sys,vers=3,nolock,proto=tcp", }, "remote_mount_dir_path": f"{storage_account}/{storage_container}", "local_mount_dir_path": "/mnt/nfs-test", }])
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var storageAccount = "ewfw3ggwegwg"; var storageContainer = "test"; var withNfs = new Databricks.Cluster("with_nfs", new() { ClusterMountInfos = new[] { new Databricks.Inputs.ClusterClusterMountInfoArgs { NetworkFilesystemInfo = new Databricks.Inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs { ServerAddress = $"{storageAccount}.blob.core.windows.net", MountOptions = "sec=sys,vers=3,nolock,proto=tcp", }, RemoteMountDirPath = $"{storageAccount}/{storageContainer}", LocalMountDirPath = "/mnt/nfs-test", }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { storageAccount := "ewfw3ggwegwg" storageContainer := "test" _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ ClusterMountInfos: databricks.ClusterClusterMountInfoArray{ &databricks.ClusterClusterMountInfoArgs{ NetworkFilesystemInfo: &databricks.ClusterClusterMountInfoNetworkFilesystemInfoArgs{ ServerAddress: pulumi.Sprintf("%v.blob.core.windows.net", storageAccount), MountOptions: pulumi.String("sec=sys,vers=3,nolock,proto=tcp"), }, RemoteMountDirPath: pulumi.Sprintf("%v/%v", storageAccount, storageContainer), LocalMountDirPath: pulumi.String("/mnt/nfs-test"), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var storageAccount = "ewfw3ggwegwg"; final var storageContainer = "test"; var withNfs = new Cluster("withNfs", ClusterArgs.builder() .clusterMountInfos(ClusterClusterMountInfoArgs.builder() .networkFilesystemInfo(ClusterClusterMountInfoNetworkFilesystemInfoArgs.builder() .serverAddress(String.format("%s.blob.core.windows.net", storageAccount)) .mountOptions("sec=sys,vers=3,nolock,proto=tcp") .build()) .remoteMountDirPath(String.format("%s/%s", storageAccount,storageContainer)) .localMountDirPath("/mnt/nfs-test") .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: clusterMountInfos: - networkFilesystemInfo: serverAddress: ${storageAccount}.blob.core.windows.net mountOptions: sec=sys,vers=3,nolock,proto=tcp remoteMountDirPath: ${storageAccount}/${storageContainer} localMountDirPath: /mnt/nfs-test variables: storageAccount: ewfw3ggwegwg storageContainer: test
- Network
Filesystem ClusterInfo Cluster Mount Info Network Filesystem Info - block specifying connection. It consists of:
- Remote
Mount stringDir Path - string specifying path to mount on the remote service.
- Local
Mount stringDir Path path inside the Spark container.
For example, you can mount Azure Data Lake Storage container using the following code:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const storageAccount = "ewfw3ggwegwg"; const storageContainer = "test"; const withNfs = new databricks.Cluster("with_nfs", {clusterMountInfos: [{ networkFilesystemInfo: { serverAddress:
${storageAccount}.blob.core.windows.net
, mountOptions: "sec=sys,vers=3,nolock,proto=tcp", }, remoteMountDirPath:${storageAccount}/${storageContainer}
, localMountDirPath: "/mnt/nfs-test", }]});import pulumi import pulumi_databricks as databricks storage_account = "ewfw3ggwegwg" storage_container = "test" with_nfs = databricks.Cluster("with_nfs", cluster_mount_infos=[{ "network_filesystem_info": { "server_address": f"{storage_account}.blob.core.windows.net", "mount_options": "sec=sys,vers=3,nolock,proto=tcp", }, "remote_mount_dir_path": f"{storage_account}/{storage_container}", "local_mount_dir_path": "/mnt/nfs-test", }])
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var storageAccount = "ewfw3ggwegwg"; var storageContainer = "test"; var withNfs = new Databricks.Cluster("with_nfs", new() { ClusterMountInfos = new[] { new Databricks.Inputs.ClusterClusterMountInfoArgs { NetworkFilesystemInfo = new Databricks.Inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs { ServerAddress = $"{storageAccount}.blob.core.windows.net", MountOptions = "sec=sys,vers=3,nolock,proto=tcp", }, RemoteMountDirPath = $"{storageAccount}/{storageContainer}", LocalMountDirPath = "/mnt/nfs-test", }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { storageAccount := "ewfw3ggwegwg" storageContainer := "test" _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ ClusterMountInfos: databricks.ClusterClusterMountInfoArray{ &databricks.ClusterClusterMountInfoArgs{ NetworkFilesystemInfo: &databricks.ClusterClusterMountInfoNetworkFilesystemInfoArgs{ ServerAddress: pulumi.Sprintf("%v.blob.core.windows.net", storageAccount), MountOptions: pulumi.String("sec=sys,vers=3,nolock,proto=tcp"), }, RemoteMountDirPath: pulumi.Sprintf("%v/%v", storageAccount, storageContainer), LocalMountDirPath: pulumi.String("/mnt/nfs-test"), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var storageAccount = "ewfw3ggwegwg"; final var storageContainer = "test"; var withNfs = new Cluster("withNfs", ClusterArgs.builder() .clusterMountInfos(ClusterClusterMountInfoArgs.builder() .networkFilesystemInfo(ClusterClusterMountInfoNetworkFilesystemInfoArgs.builder() .serverAddress(String.format("%s.blob.core.windows.net", storageAccount)) .mountOptions("sec=sys,vers=3,nolock,proto=tcp") .build()) .remoteMountDirPath(String.format("%s/%s", storageAccount,storageContainer)) .localMountDirPath("/mnt/nfs-test") .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: clusterMountInfos: - networkFilesystemInfo: serverAddress: ${storageAccount}.blob.core.windows.net mountOptions: sec=sys,vers=3,nolock,proto=tcp remoteMountDirPath: ${storageAccount}/${storageContainer} localMountDirPath: /mnt/nfs-test variables: storageAccount: ewfw3ggwegwg storageContainer: test
- Network
Filesystem ClusterInfo Cluster Mount Info Network Filesystem Info - block specifying connection. It consists of:
- Remote
Mount stringDir Path - string specifying path to mount on the remote service.
- local
Mount StringDir Path path inside the Spark container.
For example, you can mount Azure Data Lake Storage container using the following code:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const storageAccount = "ewfw3ggwegwg"; const storageContainer = "test"; const withNfs = new databricks.Cluster("with_nfs", {clusterMountInfos: [{ networkFilesystemInfo: { serverAddress:
${storageAccount}.blob.core.windows.net
, mountOptions: "sec=sys,vers=3,nolock,proto=tcp", }, remoteMountDirPath:${storageAccount}/${storageContainer}
, localMountDirPath: "/mnt/nfs-test", }]});import pulumi import pulumi_databricks as databricks storage_account = "ewfw3ggwegwg" storage_container = "test" with_nfs = databricks.Cluster("with_nfs", cluster_mount_infos=[{ "network_filesystem_info": { "server_address": f"{storage_account}.blob.core.windows.net", "mount_options": "sec=sys,vers=3,nolock,proto=tcp", }, "remote_mount_dir_path": f"{storage_account}/{storage_container}", "local_mount_dir_path": "/mnt/nfs-test", }])
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var storageAccount = "ewfw3ggwegwg"; var storageContainer = "test"; var withNfs = new Databricks.Cluster("with_nfs", new() { ClusterMountInfos = new[] { new Databricks.Inputs.ClusterClusterMountInfoArgs { NetworkFilesystemInfo = new Databricks.Inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs { ServerAddress = $"{storageAccount}.blob.core.windows.net", MountOptions = "sec=sys,vers=3,nolock,proto=tcp", }, RemoteMountDirPath = $"{storageAccount}/{storageContainer}", LocalMountDirPath = "/mnt/nfs-test", }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { storageAccount := "ewfw3ggwegwg" storageContainer := "test" _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ ClusterMountInfos: databricks.ClusterClusterMountInfoArray{ &databricks.ClusterClusterMountInfoArgs{ NetworkFilesystemInfo: &databricks.ClusterClusterMountInfoNetworkFilesystemInfoArgs{ ServerAddress: pulumi.Sprintf("%v.blob.core.windows.net", storageAccount), MountOptions: pulumi.String("sec=sys,vers=3,nolock,proto=tcp"), }, RemoteMountDirPath: pulumi.Sprintf("%v/%v", storageAccount, storageContainer), LocalMountDirPath: pulumi.String("/mnt/nfs-test"), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var storageAccount = "ewfw3ggwegwg"; final var storageContainer = "test"; var withNfs = new Cluster("withNfs", ClusterArgs.builder() .clusterMountInfos(ClusterClusterMountInfoArgs.builder() .networkFilesystemInfo(ClusterClusterMountInfoNetworkFilesystemInfoArgs.builder() .serverAddress(String.format("%s.blob.core.windows.net", storageAccount)) .mountOptions("sec=sys,vers=3,nolock,proto=tcp") .build()) .remoteMountDirPath(String.format("%s/%s", storageAccount,storageContainer)) .localMountDirPath("/mnt/nfs-test") .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: clusterMountInfos: - networkFilesystemInfo: serverAddress: ${storageAccount}.blob.core.windows.net mountOptions: sec=sys,vers=3,nolock,proto=tcp remoteMountDirPath: ${storageAccount}/${storageContainer} localMountDirPath: /mnt/nfs-test variables: storageAccount: ewfw3ggwegwg storageContainer: test
- network
Filesystem ClusterInfo Cluster Mount Info Network Filesystem Info - block specifying connection. It consists of:
- remote
Mount StringDir Path - string specifying path to mount on the remote service.
- local
Mount stringDir Path path inside the Spark container.
For example, you can mount Azure Data Lake Storage container using the following code:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const storageAccount = "ewfw3ggwegwg"; const storageContainer = "test"; const withNfs = new databricks.Cluster("with_nfs", {clusterMountInfos: [{ networkFilesystemInfo: { serverAddress:
${storageAccount}.blob.core.windows.net
, mountOptions: "sec=sys,vers=3,nolock,proto=tcp", }, remoteMountDirPath:${storageAccount}/${storageContainer}
, localMountDirPath: "/mnt/nfs-test", }]});import pulumi import pulumi_databricks as databricks storage_account = "ewfw3ggwegwg" storage_container = "test" with_nfs = databricks.Cluster("with_nfs", cluster_mount_infos=[{ "network_filesystem_info": { "server_address": f"{storage_account}.blob.core.windows.net", "mount_options": "sec=sys,vers=3,nolock,proto=tcp", }, "remote_mount_dir_path": f"{storage_account}/{storage_container}", "local_mount_dir_path": "/mnt/nfs-test", }])
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var storageAccount = "ewfw3ggwegwg"; var storageContainer = "test"; var withNfs = new Databricks.Cluster("with_nfs", new() { ClusterMountInfos = new[] { new Databricks.Inputs.ClusterClusterMountInfoArgs { NetworkFilesystemInfo = new Databricks.Inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs { ServerAddress = $"{storageAccount}.blob.core.windows.net", MountOptions = "sec=sys,vers=3,nolock,proto=tcp", }, RemoteMountDirPath = $"{storageAccount}/{storageContainer}", LocalMountDirPath = "/mnt/nfs-test", }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { storageAccount := "ewfw3ggwegwg" storageContainer := "test" _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ ClusterMountInfos: databricks.ClusterClusterMountInfoArray{ &databricks.ClusterClusterMountInfoArgs{ NetworkFilesystemInfo: &databricks.ClusterClusterMountInfoNetworkFilesystemInfoArgs{ ServerAddress: pulumi.Sprintf("%v.blob.core.windows.net", storageAccount), MountOptions: pulumi.String("sec=sys,vers=3,nolock,proto=tcp"), }, RemoteMountDirPath: pulumi.Sprintf("%v/%v", storageAccount, storageContainer), LocalMountDirPath: pulumi.String("/mnt/nfs-test"), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var storageAccount = "ewfw3ggwegwg"; final var storageContainer = "test"; var withNfs = new Cluster("withNfs", ClusterArgs.builder() .clusterMountInfos(ClusterClusterMountInfoArgs.builder() .networkFilesystemInfo(ClusterClusterMountInfoNetworkFilesystemInfoArgs.builder() .serverAddress(String.format("%s.blob.core.windows.net", storageAccount)) .mountOptions("sec=sys,vers=3,nolock,proto=tcp") .build()) .remoteMountDirPath(String.format("%s/%s", storageAccount,storageContainer)) .localMountDirPath("/mnt/nfs-test") .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: clusterMountInfos: - networkFilesystemInfo: serverAddress: ${storageAccount}.blob.core.windows.net mountOptions: sec=sys,vers=3,nolock,proto=tcp remoteMountDirPath: ${storageAccount}/${storageContainer} localMountDirPath: /mnt/nfs-test variables: storageAccount: ewfw3ggwegwg storageContainer: test
- network
Filesystem ClusterInfo Cluster Mount Info Network Filesystem Info - block specifying connection. It consists of:
- remote
Mount stringDir Path - string specifying path to mount on the remote service.
- local_
mount_ strdir_ path path inside the Spark container.
For example, you can mount Azure Data Lake Storage container using the following code:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const storageAccount = "ewfw3ggwegwg"; const storageContainer = "test"; const withNfs = new databricks.Cluster("with_nfs", {clusterMountInfos: [{ networkFilesystemInfo: { serverAddress:
${storageAccount}.blob.core.windows.net
, mountOptions: "sec=sys,vers=3,nolock,proto=tcp", }, remoteMountDirPath:${storageAccount}/${storageContainer}
, localMountDirPath: "/mnt/nfs-test", }]});import pulumi import pulumi_databricks as databricks storage_account = "ewfw3ggwegwg" storage_container = "test" with_nfs = databricks.Cluster("with_nfs", cluster_mount_infos=[{ "network_filesystem_info": { "server_address": f"{storage_account}.blob.core.windows.net", "mount_options": "sec=sys,vers=3,nolock,proto=tcp", }, "remote_mount_dir_path": f"{storage_account}/{storage_container}", "local_mount_dir_path": "/mnt/nfs-test", }])
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var storageAccount = "ewfw3ggwegwg"; var storageContainer = "test"; var withNfs = new Databricks.Cluster("with_nfs", new() { ClusterMountInfos = new[] { new Databricks.Inputs.ClusterClusterMountInfoArgs { NetworkFilesystemInfo = new Databricks.Inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs { ServerAddress = $"{storageAccount}.blob.core.windows.net", MountOptions = "sec=sys,vers=3,nolock,proto=tcp", }, RemoteMountDirPath = $"{storageAccount}/{storageContainer}", LocalMountDirPath = "/mnt/nfs-test", }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { storageAccount := "ewfw3ggwegwg" storageContainer := "test" _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ ClusterMountInfos: databricks.ClusterClusterMountInfoArray{ &databricks.ClusterClusterMountInfoArgs{ NetworkFilesystemInfo: &databricks.ClusterClusterMountInfoNetworkFilesystemInfoArgs{ ServerAddress: pulumi.Sprintf("%v.blob.core.windows.net", storageAccount), MountOptions: pulumi.String("sec=sys,vers=3,nolock,proto=tcp"), }, RemoteMountDirPath: pulumi.Sprintf("%v/%v", storageAccount, storageContainer), LocalMountDirPath: pulumi.String("/mnt/nfs-test"), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var storageAccount = "ewfw3ggwegwg"; final var storageContainer = "test"; var withNfs = new Cluster("withNfs", ClusterArgs.builder() .clusterMountInfos(ClusterClusterMountInfoArgs.builder() .networkFilesystemInfo(ClusterClusterMountInfoNetworkFilesystemInfoArgs.builder() .serverAddress(String.format("%s.blob.core.windows.net", storageAccount)) .mountOptions("sec=sys,vers=3,nolock,proto=tcp") .build()) .remoteMountDirPath(String.format("%s/%s", storageAccount,storageContainer)) .localMountDirPath("/mnt/nfs-test") .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: clusterMountInfos: - networkFilesystemInfo: serverAddress: ${storageAccount}.blob.core.windows.net mountOptions: sec=sys,vers=3,nolock,proto=tcp remoteMountDirPath: ${storageAccount}/${storageContainer} localMountDirPath: /mnt/nfs-test variables: storageAccount: ewfw3ggwegwg storageContainer: test
- network_
filesystem_ Clusterinfo Cluster Mount Info Network Filesystem Info - block specifying connection. It consists of:
- remote_
mount_ strdir_ path - string specifying path to mount on the remote service.
- local
Mount StringDir Path path inside the Spark container.
For example, you can mount Azure Data Lake Storage container using the following code:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const storageAccount = "ewfw3ggwegwg"; const storageContainer = "test"; const withNfs = new databricks.Cluster("with_nfs", {clusterMountInfos: [{ networkFilesystemInfo: { serverAddress:
${storageAccount}.blob.core.windows.net
, mountOptions: "sec=sys,vers=3,nolock,proto=tcp", }, remoteMountDirPath:${storageAccount}/${storageContainer}
, localMountDirPath: "/mnt/nfs-test", }]});import pulumi import pulumi_databricks as databricks storage_account = "ewfw3ggwegwg" storage_container = "test" with_nfs = databricks.Cluster("with_nfs", cluster_mount_infos=[{ "network_filesystem_info": { "server_address": f"{storage_account}.blob.core.windows.net", "mount_options": "sec=sys,vers=3,nolock,proto=tcp", }, "remote_mount_dir_path": f"{storage_account}/{storage_container}", "local_mount_dir_path": "/mnt/nfs-test", }])
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var storageAccount = "ewfw3ggwegwg"; var storageContainer = "test"; var withNfs = new Databricks.Cluster("with_nfs", new() { ClusterMountInfos = new[] { new Databricks.Inputs.ClusterClusterMountInfoArgs { NetworkFilesystemInfo = new Databricks.Inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs { ServerAddress = $"{storageAccount}.blob.core.windows.net", MountOptions = "sec=sys,vers=3,nolock,proto=tcp", }, RemoteMountDirPath = $"{storageAccount}/{storageContainer}", LocalMountDirPath = "/mnt/nfs-test", }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { storageAccount := "ewfw3ggwegwg" storageContainer := "test" _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ ClusterMountInfos: databricks.ClusterClusterMountInfoArray{ &databricks.ClusterClusterMountInfoArgs{ NetworkFilesystemInfo: &databricks.ClusterClusterMountInfoNetworkFilesystemInfoArgs{ ServerAddress: pulumi.Sprintf("%v.blob.core.windows.net", storageAccount), MountOptions: pulumi.String("sec=sys,vers=3,nolock,proto=tcp"), }, RemoteMountDirPath: pulumi.Sprintf("%v/%v", storageAccount, storageContainer), LocalMountDirPath: pulumi.String("/mnt/nfs-test"), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoArgs; import com.pulumi.databricks.inputs.ClusterClusterMountInfoNetworkFilesystemInfoArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { final var storageAccount = "ewfw3ggwegwg"; final var storageContainer = "test"; var withNfs = new Cluster("withNfs", ClusterArgs.builder() .clusterMountInfos(ClusterClusterMountInfoArgs.builder() .networkFilesystemInfo(ClusterClusterMountInfoNetworkFilesystemInfoArgs.builder() .serverAddress(String.format("%s.blob.core.windows.net", storageAccount)) .mountOptions("sec=sys,vers=3,nolock,proto=tcp") .build()) .remoteMountDirPath(String.format("%s/%s", storageAccount,storageContainer)) .localMountDirPath("/mnt/nfs-test") .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: clusterMountInfos: - networkFilesystemInfo: serverAddress: ${storageAccount}.blob.core.windows.net mountOptions: sec=sys,vers=3,nolock,proto=tcp remoteMountDirPath: ${storageAccount}/${storageContainer} localMountDirPath: /mnt/nfs-test variables: storageAccount: ewfw3ggwegwg storageContainer: test
- network
Filesystem Property MapInfo - block specifying connection. It consists of:
- remote
Mount StringDir Path - string specifying path to mount on the remote service.
ClusterClusterMountInfoNetworkFilesystemInfo, ClusterClusterMountInfoNetworkFilesystemInfoArgs
- Server
Address string - host name.
- Mount
Options string - string that will be passed as options passed to the
mount
command.
- Server
Address string - host name.
- Mount
Options string - string that will be passed as options passed to the
mount
command.
- server
Address String - host name.
- mount
Options String - string that will be passed as options passed to the
mount
command.
- server
Address string - host name.
- mount
Options string - string that will be passed as options passed to the
mount
command.
- server_
address str - host name.
- mount_
options str - string that will be passed as options passed to the
mount
command.
- server
Address String - host name.
- mount
Options String - string that will be passed as options passed to the
mount
command.
ClusterDockerImage, ClusterDockerImageArgs
- Url string
- URL for the Docker image
- Basic
Auth ClusterDocker Image Basic Auth basic_auth.username
andbasic_auth.password
for Docker repository. Docker registry credentials are encrypted when they are stored in Databricks internal storage and when they are passed to a registry upon fetching Docker images at cluster launch. However, other authenticated and authorized API users of this workspace can access the username and password.Example usage with azurerm_container_registry and docker_registry_image, that you can adapt to your specific use-case:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks"; import * as docker from "@pulumi/docker";
const _this = new docker.index.RegistryImage("this", { build: [{}], name:
${thisAzurermContainerRegistry.loginServer}/sample:latest
, }); const thisCluster = new databricks.Cluster("this", {dockerImage: { url: _this.name, basicAuth: { username: thisAzurermContainerRegistry.adminUsername, password: thisAzurermContainerRegistry.adminPassword, }, }});import pulumi import pulumi_databricks as databricks import pulumi_docker as docker this = docker.index.RegistryImage("this", build=[{}], name=f{this_azurerm_container_registry.login_server}/sample:latest) this_cluster = databricks.Cluster("this", docker_image={ "url": this["name"], "basic_auth": { "username": this_azurerm_container_registry["adminUsername"], "password": this_azurerm_container_registry["adminPassword"], }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; using Docker = Pulumi.Docker; return await Deployment.RunAsync(() => { var @this = new Docker.Index.RegistryImage("this", new() { Build = new[] { null, }, Name = $"{thisAzurermContainerRegistry.LoginServer}/sample:latest", }); var thisCluster = new Databricks.Cluster("this", new() { DockerImage = new Databricks.Inputs.ClusterDockerImageArgs { Url = @this.Name, BasicAuth = new Databricks.Inputs.ClusterDockerImageBasicAuthArgs { Username = thisAzurermContainerRegistry.AdminUsername, Password = thisAzurermContainerRegistry.AdminPassword, }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi-docker/sdk/v4/go/docker" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { this, err := docker.NewRegistryImage(ctx, "this", &docker.RegistryImageArgs{ Build: []map[string]interface{}{ map[string]interface{}{}, }, Name: fmt.Sprintf("%v/sample:latest", thisAzurermContainerRegistry.LoginServer), }) if err != nil { return err } _, err = databricks.NewCluster(ctx, "this", &databricks.ClusterArgs{ DockerImage: &databricks.ClusterDockerImageArgs{ Url: this.Name, BasicAuth: &databricks.ClusterDockerImageBasicAuthArgs{ Username: pulumi.Any(thisAzurermContainerRegistry.AdminUsername), Password: pulumi.Any(thisAzurermContainerRegistry.AdminPassword), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.docker.registryImage; import com.pulumi.docker.RegistryImageArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterDockerImageArgs; import com.pulumi.databricks.inputs.ClusterDockerImageBasicAuthArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var this_ = new RegistryImage("this", RegistryImageArgs.builder() .build() .name(String.format("%s/sample:latest", thisAzurermContainerRegistry.loginServer())) .build()); var thisCluster = new Cluster("thisCluster", ClusterArgs.builder() .dockerImage(ClusterDockerImageArgs.builder() .url(this_.name()) .basicAuth(ClusterDockerImageBasicAuthArgs.builder() .username(thisAzurermContainerRegistry.adminUsername()) .password(thisAzurermContainerRegistry.adminPassword()) .build()) .build()) .build()); } }
resources: this: type: docker:registryImage properties: build: - {} name: ${thisAzurermContainerRegistry.loginServer}/sample:latest thisCluster: type: databricks:Cluster name: this properties: dockerImage: url: ${this.name} basicAuth: username: ${thisAzurermContainerRegistry.adminUsername} password: ${thisAzurermContainerRegistry.adminPassword}
- Url string
- URL for the Docker image
- Basic
Auth ClusterDocker Image Basic Auth basic_auth.username
andbasic_auth.password
for Docker repository. Docker registry credentials are encrypted when they are stored in Databricks internal storage and when they are passed to a registry upon fetching Docker images at cluster launch. However, other authenticated and authorized API users of this workspace can access the username and password.Example usage with azurerm_container_registry and docker_registry_image, that you can adapt to your specific use-case:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks"; import * as docker from "@pulumi/docker";
const _this = new docker.index.RegistryImage("this", { build: [{}], name:
${thisAzurermContainerRegistry.loginServer}/sample:latest
, }); const thisCluster = new databricks.Cluster("this", {dockerImage: { url: _this.name, basicAuth: { username: thisAzurermContainerRegistry.adminUsername, password: thisAzurermContainerRegistry.adminPassword, }, }});import pulumi import pulumi_databricks as databricks import pulumi_docker as docker this = docker.index.RegistryImage("this", build=[{}], name=f{this_azurerm_container_registry.login_server}/sample:latest) this_cluster = databricks.Cluster("this", docker_image={ "url": this["name"], "basic_auth": { "username": this_azurerm_container_registry["adminUsername"], "password": this_azurerm_container_registry["adminPassword"], }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; using Docker = Pulumi.Docker; return await Deployment.RunAsync(() => { var @this = new Docker.Index.RegistryImage("this", new() { Build = new[] { null, }, Name = $"{thisAzurermContainerRegistry.LoginServer}/sample:latest", }); var thisCluster = new Databricks.Cluster("this", new() { DockerImage = new Databricks.Inputs.ClusterDockerImageArgs { Url = @this.Name, BasicAuth = new Databricks.Inputs.ClusterDockerImageBasicAuthArgs { Username = thisAzurermContainerRegistry.AdminUsername, Password = thisAzurermContainerRegistry.AdminPassword, }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi-docker/sdk/v4/go/docker" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { this, err := docker.NewRegistryImage(ctx, "this", &docker.RegistryImageArgs{ Build: []map[string]interface{}{ map[string]interface{}{}, }, Name: fmt.Sprintf("%v/sample:latest", thisAzurermContainerRegistry.LoginServer), }) if err != nil { return err } _, err = databricks.NewCluster(ctx, "this", &databricks.ClusterArgs{ DockerImage: &databricks.ClusterDockerImageArgs{ Url: this.Name, BasicAuth: &databricks.ClusterDockerImageBasicAuthArgs{ Username: pulumi.Any(thisAzurermContainerRegistry.AdminUsername), Password: pulumi.Any(thisAzurermContainerRegistry.AdminPassword), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.docker.registryImage; import com.pulumi.docker.RegistryImageArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterDockerImageArgs; import com.pulumi.databricks.inputs.ClusterDockerImageBasicAuthArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var this_ = new RegistryImage("this", RegistryImageArgs.builder() .build() .name(String.format("%s/sample:latest", thisAzurermContainerRegistry.loginServer())) .build()); var thisCluster = new Cluster("thisCluster", ClusterArgs.builder() .dockerImage(ClusterDockerImageArgs.builder() .url(this_.name()) .basicAuth(ClusterDockerImageBasicAuthArgs.builder() .username(thisAzurermContainerRegistry.adminUsername()) .password(thisAzurermContainerRegistry.adminPassword()) .build()) .build()) .build()); } }
resources: this: type: docker:registryImage properties: build: - {} name: ${thisAzurermContainerRegistry.loginServer}/sample:latest thisCluster: type: databricks:Cluster name: this properties: dockerImage: url: ${this.name} basicAuth: username: ${thisAzurermContainerRegistry.adminUsername} password: ${thisAzurermContainerRegistry.adminPassword}
- url String
- URL for the Docker image
- basic
Auth ClusterDocker Image Basic Auth basic_auth.username
andbasic_auth.password
for Docker repository. Docker registry credentials are encrypted when they are stored in Databricks internal storage and when they are passed to a registry upon fetching Docker images at cluster launch. However, other authenticated and authorized API users of this workspace can access the username and password.Example usage with azurerm_container_registry and docker_registry_image, that you can adapt to your specific use-case:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks"; import * as docker from "@pulumi/docker";
const _this = new docker.index.RegistryImage("this", { build: [{}], name:
${thisAzurermContainerRegistry.loginServer}/sample:latest
, }); const thisCluster = new databricks.Cluster("this", {dockerImage: { url: _this.name, basicAuth: { username: thisAzurermContainerRegistry.adminUsername, password: thisAzurermContainerRegistry.adminPassword, }, }});import pulumi import pulumi_databricks as databricks import pulumi_docker as docker this = docker.index.RegistryImage("this", build=[{}], name=f{this_azurerm_container_registry.login_server}/sample:latest) this_cluster = databricks.Cluster("this", docker_image={ "url": this["name"], "basic_auth": { "username": this_azurerm_container_registry["adminUsername"], "password": this_azurerm_container_registry["adminPassword"], }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; using Docker = Pulumi.Docker; return await Deployment.RunAsync(() => { var @this = new Docker.Index.RegistryImage("this", new() { Build = new[] { null, }, Name = $"{thisAzurermContainerRegistry.LoginServer}/sample:latest", }); var thisCluster = new Databricks.Cluster("this", new() { DockerImage = new Databricks.Inputs.ClusterDockerImageArgs { Url = @this.Name, BasicAuth = new Databricks.Inputs.ClusterDockerImageBasicAuthArgs { Username = thisAzurermContainerRegistry.AdminUsername, Password = thisAzurermContainerRegistry.AdminPassword, }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi-docker/sdk/v4/go/docker" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { this, err := docker.NewRegistryImage(ctx, "this", &docker.RegistryImageArgs{ Build: []map[string]interface{}{ map[string]interface{}{}, }, Name: fmt.Sprintf("%v/sample:latest", thisAzurermContainerRegistry.LoginServer), }) if err != nil { return err } _, err = databricks.NewCluster(ctx, "this", &databricks.ClusterArgs{ DockerImage: &databricks.ClusterDockerImageArgs{ Url: this.Name, BasicAuth: &databricks.ClusterDockerImageBasicAuthArgs{ Username: pulumi.Any(thisAzurermContainerRegistry.AdminUsername), Password: pulumi.Any(thisAzurermContainerRegistry.AdminPassword), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.docker.registryImage; import com.pulumi.docker.RegistryImageArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterDockerImageArgs; import com.pulumi.databricks.inputs.ClusterDockerImageBasicAuthArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var this_ = new RegistryImage("this", RegistryImageArgs.builder() .build() .name(String.format("%s/sample:latest", thisAzurermContainerRegistry.loginServer())) .build()); var thisCluster = new Cluster("thisCluster", ClusterArgs.builder() .dockerImage(ClusterDockerImageArgs.builder() .url(this_.name()) .basicAuth(ClusterDockerImageBasicAuthArgs.builder() .username(thisAzurermContainerRegistry.adminUsername()) .password(thisAzurermContainerRegistry.adminPassword()) .build()) .build()) .build()); } }
resources: this: type: docker:registryImage properties: build: - {} name: ${thisAzurermContainerRegistry.loginServer}/sample:latest thisCluster: type: databricks:Cluster name: this properties: dockerImage: url: ${this.name} basicAuth: username: ${thisAzurermContainerRegistry.adminUsername} password: ${thisAzurermContainerRegistry.adminPassword}
- url string
- URL for the Docker image
- basic
Auth ClusterDocker Image Basic Auth basic_auth.username
andbasic_auth.password
for Docker repository. Docker registry credentials are encrypted when they are stored in Databricks internal storage and when they are passed to a registry upon fetching Docker images at cluster launch. However, other authenticated and authorized API users of this workspace can access the username and password.Example usage with azurerm_container_registry and docker_registry_image, that you can adapt to your specific use-case:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks"; import * as docker from "@pulumi/docker";
const _this = new docker.index.RegistryImage("this", { build: [{}], name:
${thisAzurermContainerRegistry.loginServer}/sample:latest
, }); const thisCluster = new databricks.Cluster("this", {dockerImage: { url: _this.name, basicAuth: { username: thisAzurermContainerRegistry.adminUsername, password: thisAzurermContainerRegistry.adminPassword, }, }});import pulumi import pulumi_databricks as databricks import pulumi_docker as docker this = docker.index.RegistryImage("this", build=[{}], name=f{this_azurerm_container_registry.login_server}/sample:latest) this_cluster = databricks.Cluster("this", docker_image={ "url": this["name"], "basic_auth": { "username": this_azurerm_container_registry["adminUsername"], "password": this_azurerm_container_registry["adminPassword"], }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; using Docker = Pulumi.Docker; return await Deployment.RunAsync(() => { var @this = new Docker.Index.RegistryImage("this", new() { Build = new[] { null, }, Name = $"{thisAzurermContainerRegistry.LoginServer}/sample:latest", }); var thisCluster = new Databricks.Cluster("this", new() { DockerImage = new Databricks.Inputs.ClusterDockerImageArgs { Url = @this.Name, BasicAuth = new Databricks.Inputs.ClusterDockerImageBasicAuthArgs { Username = thisAzurermContainerRegistry.AdminUsername, Password = thisAzurermContainerRegistry.AdminPassword, }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi-docker/sdk/v4/go/docker" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { this, err := docker.NewRegistryImage(ctx, "this", &docker.RegistryImageArgs{ Build: []map[string]interface{}{ map[string]interface{}{}, }, Name: fmt.Sprintf("%v/sample:latest", thisAzurermContainerRegistry.LoginServer), }) if err != nil { return err } _, err = databricks.NewCluster(ctx, "this", &databricks.ClusterArgs{ DockerImage: &databricks.ClusterDockerImageArgs{ Url: this.Name, BasicAuth: &databricks.ClusterDockerImageBasicAuthArgs{ Username: pulumi.Any(thisAzurermContainerRegistry.AdminUsername), Password: pulumi.Any(thisAzurermContainerRegistry.AdminPassword), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.docker.registryImage; import com.pulumi.docker.RegistryImageArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterDockerImageArgs; import com.pulumi.databricks.inputs.ClusterDockerImageBasicAuthArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var this_ = new RegistryImage("this", RegistryImageArgs.builder() .build() .name(String.format("%s/sample:latest", thisAzurermContainerRegistry.loginServer())) .build()); var thisCluster = new Cluster("thisCluster", ClusterArgs.builder() .dockerImage(ClusterDockerImageArgs.builder() .url(this_.name()) .basicAuth(ClusterDockerImageBasicAuthArgs.builder() .username(thisAzurermContainerRegistry.adminUsername()) .password(thisAzurermContainerRegistry.adminPassword()) .build()) .build()) .build()); } }
resources: this: type: docker:registryImage properties: build: - {} name: ${thisAzurermContainerRegistry.loginServer}/sample:latest thisCluster: type: databricks:Cluster name: this properties: dockerImage: url: ${this.name} basicAuth: username: ${thisAzurermContainerRegistry.adminUsername} password: ${thisAzurermContainerRegistry.adminPassword}
- url str
- URL for the Docker image
- basic_
auth ClusterDocker Image Basic Auth basic_auth.username
andbasic_auth.password
for Docker repository. Docker registry credentials are encrypted when they are stored in Databricks internal storage and when they are passed to a registry upon fetching Docker images at cluster launch. However, other authenticated and authorized API users of this workspace can access the username and password.Example usage with azurerm_container_registry and docker_registry_image, that you can adapt to your specific use-case:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks"; import * as docker from "@pulumi/docker";
const _this = new docker.index.RegistryImage("this", { build: [{}], name:
${thisAzurermContainerRegistry.loginServer}/sample:latest
, }); const thisCluster = new databricks.Cluster("this", {dockerImage: { url: _this.name, basicAuth: { username: thisAzurermContainerRegistry.adminUsername, password: thisAzurermContainerRegistry.adminPassword, }, }});import pulumi import pulumi_databricks as databricks import pulumi_docker as docker this = docker.index.RegistryImage("this", build=[{}], name=f{this_azurerm_container_registry.login_server}/sample:latest) this_cluster = databricks.Cluster("this", docker_image={ "url": this["name"], "basic_auth": { "username": this_azurerm_container_registry["adminUsername"], "password": this_azurerm_container_registry["adminPassword"], }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; using Docker = Pulumi.Docker; return await Deployment.RunAsync(() => { var @this = new Docker.Index.RegistryImage("this", new() { Build = new[] { null, }, Name = $"{thisAzurermContainerRegistry.LoginServer}/sample:latest", }); var thisCluster = new Databricks.Cluster("this", new() { DockerImage = new Databricks.Inputs.ClusterDockerImageArgs { Url = @this.Name, BasicAuth = new Databricks.Inputs.ClusterDockerImageBasicAuthArgs { Username = thisAzurermContainerRegistry.AdminUsername, Password = thisAzurermContainerRegistry.AdminPassword, }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi-docker/sdk/v4/go/docker" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { this, err := docker.NewRegistryImage(ctx, "this", &docker.RegistryImageArgs{ Build: []map[string]interface{}{ map[string]interface{}{}, }, Name: fmt.Sprintf("%v/sample:latest", thisAzurermContainerRegistry.LoginServer), }) if err != nil { return err } _, err = databricks.NewCluster(ctx, "this", &databricks.ClusterArgs{ DockerImage: &databricks.ClusterDockerImageArgs{ Url: this.Name, BasicAuth: &databricks.ClusterDockerImageBasicAuthArgs{ Username: pulumi.Any(thisAzurermContainerRegistry.AdminUsername), Password: pulumi.Any(thisAzurermContainerRegistry.AdminPassword), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.docker.registryImage; import com.pulumi.docker.RegistryImageArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterDockerImageArgs; import com.pulumi.databricks.inputs.ClusterDockerImageBasicAuthArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var this_ = new RegistryImage("this", RegistryImageArgs.builder() .build() .name(String.format("%s/sample:latest", thisAzurermContainerRegistry.loginServer())) .build()); var thisCluster = new Cluster("thisCluster", ClusterArgs.builder() .dockerImage(ClusterDockerImageArgs.builder() .url(this_.name()) .basicAuth(ClusterDockerImageBasicAuthArgs.builder() .username(thisAzurermContainerRegistry.adminUsername()) .password(thisAzurermContainerRegistry.adminPassword()) .build()) .build()) .build()); } }
resources: this: type: docker:registryImage properties: build: - {} name: ${thisAzurermContainerRegistry.loginServer}/sample:latest thisCluster: type: databricks:Cluster name: this properties: dockerImage: url: ${this.name} basicAuth: username: ${thisAzurermContainerRegistry.adminUsername} password: ${thisAzurermContainerRegistry.adminPassword}
- url String
- URL for the Docker image
- basic
Auth Property Map basic_auth.username
andbasic_auth.password
for Docker repository. Docker registry credentials are encrypted when they are stored in Databricks internal storage and when they are passed to a registry upon fetching Docker images at cluster launch. However, other authenticated and authorized API users of this workspace can access the username and password.Example usage with azurerm_container_registry and docker_registry_image, that you can adapt to your specific use-case:
import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks"; import * as docker from "@pulumi/docker";
const _this = new docker.index.RegistryImage("this", { build: [{}], name:
${thisAzurermContainerRegistry.loginServer}/sample:latest
, }); const thisCluster = new databricks.Cluster("this", {dockerImage: { url: _this.name, basicAuth: { username: thisAzurermContainerRegistry.adminUsername, password: thisAzurermContainerRegistry.adminPassword, }, }});import pulumi import pulumi_databricks as databricks import pulumi_docker as docker this = docker.index.RegistryImage("this", build=[{}], name=f{this_azurerm_container_registry.login_server}/sample:latest) this_cluster = databricks.Cluster("this", docker_image={ "url": this["name"], "basic_auth": { "username": this_azurerm_container_registry["adminUsername"], "password": this_azurerm_container_registry["adminPassword"], }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; using Docker = Pulumi.Docker; return await Deployment.RunAsync(() => { var @this = new Docker.Index.RegistryImage("this", new() { Build = new[] { null, }, Name = $"{thisAzurermContainerRegistry.LoginServer}/sample:latest", }); var thisCluster = new Databricks.Cluster("this", new() { DockerImage = new Databricks.Inputs.ClusterDockerImageArgs { Url = @this.Name, BasicAuth = new Databricks.Inputs.ClusterDockerImageBasicAuthArgs { Username = thisAzurermContainerRegistry.AdminUsername, Password = thisAzurermContainerRegistry.AdminPassword, }, }, }); });
package main import ( "fmt" "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi-docker/sdk/v4/go/docker" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { this, err := docker.NewRegistryImage(ctx, "this", &docker.RegistryImageArgs{ Build: []map[string]interface{}{ map[string]interface{}{}, }, Name: fmt.Sprintf("%v/sample:latest", thisAzurermContainerRegistry.LoginServer), }) if err != nil { return err } _, err = databricks.NewCluster(ctx, "this", &databricks.ClusterArgs{ DockerImage: &databricks.ClusterDockerImageArgs{ Url: this.Name, BasicAuth: &databricks.ClusterDockerImageBasicAuthArgs{ Username: pulumi.Any(thisAzurermContainerRegistry.AdminUsername), Password: pulumi.Any(thisAzurermContainerRegistry.AdminPassword), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.docker.registryImage; import com.pulumi.docker.RegistryImageArgs; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterDockerImageArgs; import com.pulumi.databricks.inputs.ClusterDockerImageBasicAuthArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var this_ = new RegistryImage("this", RegistryImageArgs.builder() .build() .name(String.format("%s/sample:latest", thisAzurermContainerRegistry.loginServer())) .build()); var thisCluster = new Cluster("thisCluster", ClusterArgs.builder() .dockerImage(ClusterDockerImageArgs.builder() .url(this_.name()) .basicAuth(ClusterDockerImageBasicAuthArgs.builder() .username(thisAzurermContainerRegistry.adminUsername()) .password(thisAzurermContainerRegistry.adminPassword()) .build()) .build()) .build()); } }
resources: this: type: docker:registryImage properties: build: - {} name: ${thisAzurermContainerRegistry.loginServer}/sample:latest thisCluster: type: databricks:Cluster name: this properties: dockerImage: url: ${this.name} basicAuth: username: ${thisAzurermContainerRegistry.adminUsername} password: ${thisAzurermContainerRegistry.adminPassword}
ClusterDockerImageBasicAuth, ClusterDockerImageBasicAuthArgs
ClusterGcpAttributes, ClusterGcpAttributesArgs
- Availability string
- Availability type used for all nodes. Valid values are
PREEMPTIBLE_GCP
,PREEMPTIBLE_WITH_FALLBACK_GCP
andON_DEMAND_GCP
, default:ON_DEMAND_GCP
. - Boot
Disk intSize - Boot disk size in GB
- Google
Service stringAccount - Google Service Account email address that the cluster uses to authenticate with Google Identity. This field is used for authentication with the GCS and BigQuery data sources.
- Local
Ssd intCount - Number of local SSD disks (each is 375GB in size) that will be attached to each node of the cluster.
- Use
Preemptible boolExecutors - if we should use preemptible executors (GCP documentation). Warning: this field is deprecated in favor of
availability
, and will be removed soon. - Zone
Id string - Identifier for the availability zone in which the cluster resides. This can be one of the following:
HA
(default): High availability, spread nodes across availability zones for a Databricks deployment region.AUTO
: Databricks picks an availability zone to schedule the cluster on.- name of a GCP availability zone: pick one of the available zones from the list of available availability zones.
- Availability string
- Availability type used for all nodes. Valid values are
PREEMPTIBLE_GCP
,PREEMPTIBLE_WITH_FALLBACK_GCP
andON_DEMAND_GCP
, default:ON_DEMAND_GCP
. - Boot
Disk intSize - Boot disk size in GB
- Google
Service stringAccount - Google Service Account email address that the cluster uses to authenticate with Google Identity. This field is used for authentication with the GCS and BigQuery data sources.
- Local
Ssd intCount - Number of local SSD disks (each is 375GB in size) that will be attached to each node of the cluster.
- Use
Preemptible boolExecutors - if we should use preemptible executors (GCP documentation). Warning: this field is deprecated in favor of
availability
, and will be removed soon. - Zone
Id string - Identifier for the availability zone in which the cluster resides. This can be one of the following:
HA
(default): High availability, spread nodes across availability zones for a Databricks deployment region.AUTO
: Databricks picks an availability zone to schedule the cluster on.- name of a GCP availability zone: pick one of the available zones from the list of available availability zones.
- availability String
- Availability type used for all nodes. Valid values are
PREEMPTIBLE_GCP
,PREEMPTIBLE_WITH_FALLBACK_GCP
andON_DEMAND_GCP
, default:ON_DEMAND_GCP
. - boot
Disk IntegerSize - Boot disk size in GB
- google
Service StringAccount - Google Service Account email address that the cluster uses to authenticate with Google Identity. This field is used for authentication with the GCS and BigQuery data sources.
- local
Ssd IntegerCount - Number of local SSD disks (each is 375GB in size) that will be attached to each node of the cluster.
- use
Preemptible BooleanExecutors - if we should use preemptible executors (GCP documentation). Warning: this field is deprecated in favor of
availability
, and will be removed soon. - zone
Id String - Identifier for the availability zone in which the cluster resides. This can be one of the following:
HA
(default): High availability, spread nodes across availability zones for a Databricks deployment region.AUTO
: Databricks picks an availability zone to schedule the cluster on.- name of a GCP availability zone: pick one of the available zones from the list of available availability zones.
- availability string
- Availability type used for all nodes. Valid values are
PREEMPTIBLE_GCP
,PREEMPTIBLE_WITH_FALLBACK_GCP
andON_DEMAND_GCP
, default:ON_DEMAND_GCP
. - boot
Disk numberSize - Boot disk size in GB
- google
Service stringAccount - Google Service Account email address that the cluster uses to authenticate with Google Identity. This field is used for authentication with the GCS and BigQuery data sources.
- local
Ssd numberCount - Number of local SSD disks (each is 375GB in size) that will be attached to each node of the cluster.
- use
Preemptible booleanExecutors - if we should use preemptible executors (GCP documentation). Warning: this field is deprecated in favor of
availability
, and will be removed soon. - zone
Id string - Identifier for the availability zone in which the cluster resides. This can be one of the following:
HA
(default): High availability, spread nodes across availability zones for a Databricks deployment region.AUTO
: Databricks picks an availability zone to schedule the cluster on.- name of a GCP availability zone: pick one of the available zones from the list of available availability zones.
- availability str
- Availability type used for all nodes. Valid values are
PREEMPTIBLE_GCP
,PREEMPTIBLE_WITH_FALLBACK_GCP
andON_DEMAND_GCP
, default:ON_DEMAND_GCP
. - boot_
disk_ intsize - Boot disk size in GB
- google_
service_ straccount - Google Service Account email address that the cluster uses to authenticate with Google Identity. This field is used for authentication with the GCS and BigQuery data sources.
- local_
ssd_ intcount - Number of local SSD disks (each is 375GB in size) that will be attached to each node of the cluster.
- use_
preemptible_ boolexecutors - if we should use preemptible executors (GCP documentation). Warning: this field is deprecated in favor of
availability
, and will be removed soon. - zone_
id str - Identifier for the availability zone in which the cluster resides. This can be one of the following:
HA
(default): High availability, spread nodes across availability zones for a Databricks deployment region.AUTO
: Databricks picks an availability zone to schedule the cluster on.- name of a GCP availability zone: pick one of the available zones from the list of available availability zones.
- availability String
- Availability type used for all nodes. Valid values are
PREEMPTIBLE_GCP
,PREEMPTIBLE_WITH_FALLBACK_GCP
andON_DEMAND_GCP
, default:ON_DEMAND_GCP
. - boot
Disk NumberSize - Boot disk size in GB
- google
Service StringAccount - Google Service Account email address that the cluster uses to authenticate with Google Identity. This field is used for authentication with the GCS and BigQuery data sources.
- local
Ssd NumberCount - Number of local SSD disks (each is 375GB in size) that will be attached to each node of the cluster.
- use
Preemptible BooleanExecutors - if we should use preemptible executors (GCP documentation). Warning: this field is deprecated in favor of
availability
, and will be removed soon. - zone
Id String - Identifier for the availability zone in which the cluster resides. This can be one of the following:
HA
(default): High availability, spread nodes across availability zones for a Databricks deployment region.AUTO
: Databricks picks an availability zone to schedule the cluster on.- name of a GCP availability zone: pick one of the available zones from the list of available availability zones.
ClusterInitScript, ClusterInitScriptArgs
ClusterInitScriptAbfss, ClusterInitScriptAbfssArgs
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination str
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
ClusterInitScriptDbfs, ClusterInitScriptDbfsArgs
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination str
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
ClusterInitScriptFile, ClusterInitScriptFileArgs
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination str
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
ClusterInitScriptGcs, ClusterInitScriptGcsArgs
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination str
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
ClusterInitScriptS3, ClusterInitScriptS3Args
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - Canned
Acl string - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - Enable
Encryption bool - Enable server-side encryption, false by default.
- Encryption
Type string - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - Endpoint string
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - Kms
Key string - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - Region string
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - Canned
Acl string - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - Enable
Encryption bool - Enable server-side encryption, false by default.
- Encryption
Type string - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - Endpoint string
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - Kms
Key string - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - Region string
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - canned
Acl String - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - enable
Encryption Boolean - Enable server-side encryption, false by default.
- encryption
Type String - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - endpoint String
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - kms
Key String - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - region String
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - canned
Acl string - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - enable
Encryption boolean - Enable server-side encryption, false by default.
- encryption
Type string - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - endpoint string
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - kms
Key string - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - region string
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- destination str
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - canned_
acl str - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - enable_
encryption bool - Enable server-side encryption, false by default.
- encryption_
type str - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - endpoint str
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - kms_
key str - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - region str
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys. - canned
Acl String - Set canned access control list, e.g.
bucket-owner-full-control
. Ifcanned_cal
is set, the cluster instance profile must haves3:PutObjectAcl
permission on the destination bucket and prefix. The full list of possible canned ACLs can be found here. By default, only the object owner gets full control. If you are using a cross-account role for writing data, you may want to setbucket-owner-full-control
to make bucket owners able to read the logs. - enable
Encryption Boolean - Enable server-side encryption, false by default.
- encryption
Type String - The encryption type, it could be
sse-s3
orsse-kms
. It is used only when encryption is enabled, and the default type issse-s3
. - endpoint String
- S3 endpoint, e.g. https://s3-us-west-2.amazonaws.com. Either
region
orendpoint
needs to be set. If both are set, the endpoint is used. - kms
Key String - KMS key used if encryption is enabled and encryption type is set to
sse-kms
. - region String
- S3 region, e.g.
us-west-2
. Eitherregion
orendpoint
must be set. If both are set, the endpoint is used.
ClusterInitScriptVolumes, ClusterInitScriptVolumesArgs
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination str
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
ClusterInitScriptWorkspace, ClusterInitScriptWorkspaceArgs
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- Destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination string
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination str
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
- destination String
- S3 destination, e.g.,
s3://my-bucket/some-prefix
You must configure the cluster with an instance profile, and the instance profile must have write access to the destination. You cannot use AWS keys.
ClusterLibrary, ClusterLibraryArgs
- Cran
Cluster
Library Cran - Egg string
- Jar string
- Maven
Cluster
Library Maven - Pypi
Cluster
Library Pypi - Requirements string
- Whl string
- Cran
Cluster
Library Cran - Egg string
- Jar string
- Maven
Cluster
Library Maven - Pypi
Cluster
Library Pypi - Requirements string
- Whl string
- cran
Cluster
Library Cran - egg String
- jar String
- maven
Cluster
Library Maven - pypi
Cluster
Library Pypi - requirements String
- whl String
- cran
Cluster
Library Cran - egg string
- jar string
- maven
Cluster
Library Maven - pypi
Cluster
Library Pypi - requirements string
- whl string
- cran Property Map
- egg String
- jar String
- maven Property Map
- pypi Property Map
- requirements String
- whl String
ClusterLibraryCran, ClusterLibraryCranArgs
ClusterLibraryMaven, ClusterLibraryMavenArgs
- Coordinates string
- Exclusions List<string>
- Repo string
- Coordinates string
- Exclusions []string
- Repo string
- coordinates String
- exclusions List<String>
- repo String
- coordinates string
- exclusions string[]
- repo string
- coordinates str
- exclusions Sequence[str]
- repo str
- coordinates String
- exclusions List<String>
- repo String
ClusterLibraryPypi, ClusterLibraryPypiArgs
ClusterWorkloadType, ClusterWorkloadTypeArgs
ClusterWorkloadTypeClients, ClusterWorkloadTypeClientsArgs
- Jobs bool
- boolean flag defining if it's possible to run Databricks Jobs on this cluster. Default:
true
.import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const withNfs = new databricks.Cluster("with_nfs", {workloadType: { clients: { jobs: false, notebooks: true, }, }});
import pulumi import pulumi_databricks as databricks with_nfs = databricks.Cluster("with_nfs", workload_type={ "clients": { "jobs": False, "notebooks": True, }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var withNfs = new Databricks.Cluster("with_nfs", new() { WorkloadType = new Databricks.Inputs.ClusterWorkloadTypeArgs { Clients = new Databricks.Inputs.ClusterWorkloadTypeClientsArgs { Jobs = false, Notebooks = true, }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ WorkloadType: &databricks.ClusterWorkloadTypeArgs{ Clients: &databricks.ClusterWorkloadTypeClientsArgs{ Jobs: pulumi.Bool(false), Notebooks: pulumi.Bool(true), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeClientsArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var withNfs = new Cluster("withNfs", ClusterArgs.builder() .workloadType(ClusterWorkloadTypeArgs.builder() .clients(ClusterWorkloadTypeClientsArgs.builder() .jobs(false) .notebooks(true) .build()) .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: workloadType: clients: jobs: false notebooks: true
title="Optional"> <span id="notebooks_csharp">
Notebooks bool
- boolean flag defining if it’s possible to run notebooks on this cluster. Default:
true
.
- Jobs bool
- boolean flag defining if it's possible to run Databricks Jobs on this cluster. Default:
true
.import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const withNfs = new databricks.Cluster("with_nfs", {workloadType: { clients: { jobs: false, notebooks: true, }, }});
import pulumi import pulumi_databricks as databricks with_nfs = databricks.Cluster("with_nfs", workload_type={ "clients": { "jobs": False, "notebooks": True, }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var withNfs = new Databricks.Cluster("with_nfs", new() { WorkloadType = new Databricks.Inputs.ClusterWorkloadTypeArgs { Clients = new Databricks.Inputs.ClusterWorkloadTypeClientsArgs { Jobs = false, Notebooks = true, }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ WorkloadType: &databricks.ClusterWorkloadTypeArgs{ Clients: &databricks.ClusterWorkloadTypeClientsArgs{ Jobs: pulumi.Bool(false), Notebooks: pulumi.Bool(true), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeClientsArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var withNfs = new Cluster("withNfs", ClusterArgs.builder() .workloadType(ClusterWorkloadTypeArgs.builder() .clients(ClusterWorkloadTypeClientsArgs.builder() .jobs(false) .notebooks(true) .build()) .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: workloadType: clients: jobs: false notebooks: true
title="Optional"> <span id="notebooks_go">
Notebooks bool
- boolean flag defining if it’s possible to run notebooks on this cluster. Default:
true
.
- jobs Boolean
- boolean flag defining if it's possible to run Databricks Jobs on this cluster. Default:
true
.import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const withNfs = new databricks.Cluster("with_nfs", {workloadType: { clients: { jobs: false, notebooks: true, }, }});
import pulumi import pulumi_databricks as databricks with_nfs = databricks.Cluster("with_nfs", workload_type={ "clients": { "jobs": False, "notebooks": True, }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var withNfs = new Databricks.Cluster("with_nfs", new() { WorkloadType = new Databricks.Inputs.ClusterWorkloadTypeArgs { Clients = new Databricks.Inputs.ClusterWorkloadTypeClientsArgs { Jobs = false, Notebooks = true, }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ WorkloadType: &databricks.ClusterWorkloadTypeArgs{ Clients: &databricks.ClusterWorkloadTypeClientsArgs{ Jobs: pulumi.Bool(false), Notebooks: pulumi.Bool(true), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeClientsArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var withNfs = new Cluster("withNfs", ClusterArgs.builder() .workloadType(ClusterWorkloadTypeArgs.builder() .clients(ClusterWorkloadTypeClientsArgs.builder() .jobs(false) .notebooks(true) .build()) .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: workloadType: clients: jobs: false notebooks: true
title="Optional"> <span id="notebooks_java">
notebooks Boolean
- boolean flag defining if it’s possible to run notebooks on this cluster. Default:
true
.
- jobs boolean
- boolean flag defining if it's possible to run Databricks Jobs on this cluster. Default:
true
.import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const withNfs = new databricks.Cluster("with_nfs", {workloadType: { clients: { jobs: false, notebooks: true, }, }});
import pulumi import pulumi_databricks as databricks with_nfs = databricks.Cluster("with_nfs", workload_type={ "clients": { "jobs": False, "notebooks": True, }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var withNfs = new Databricks.Cluster("with_nfs", new() { WorkloadType = new Databricks.Inputs.ClusterWorkloadTypeArgs { Clients = new Databricks.Inputs.ClusterWorkloadTypeClientsArgs { Jobs = false, Notebooks = true, }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ WorkloadType: &databricks.ClusterWorkloadTypeArgs{ Clients: &databricks.ClusterWorkloadTypeClientsArgs{ Jobs: pulumi.Bool(false), Notebooks: pulumi.Bool(true), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeClientsArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var withNfs = new Cluster("withNfs", ClusterArgs.builder() .workloadType(ClusterWorkloadTypeArgs.builder() .clients(ClusterWorkloadTypeClientsArgs.builder() .jobs(false) .notebooks(true) .build()) .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: workloadType: clients: jobs: false notebooks: true
title="Optional"> <span id="notebooks_nodejs">
notebooks boolean
- boolean flag defining if it’s possible to run notebooks on this cluster. Default:
true
.
- jobs bool
- boolean flag defining if it's possible to run Databricks Jobs on this cluster. Default:
true
.import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const withNfs = new databricks.Cluster("with_nfs", {workloadType: { clients: { jobs: false, notebooks: true, }, }});
import pulumi import pulumi_databricks as databricks with_nfs = databricks.Cluster("with_nfs", workload_type={ "clients": { "jobs": False, "notebooks": True, }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var withNfs = new Databricks.Cluster("with_nfs", new() { WorkloadType = new Databricks.Inputs.ClusterWorkloadTypeArgs { Clients = new Databricks.Inputs.ClusterWorkloadTypeClientsArgs { Jobs = false, Notebooks = true, }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ WorkloadType: &databricks.ClusterWorkloadTypeArgs{ Clients: &databricks.ClusterWorkloadTypeClientsArgs{ Jobs: pulumi.Bool(false), Notebooks: pulumi.Bool(true), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeClientsArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var withNfs = new Cluster("withNfs", ClusterArgs.builder() .workloadType(ClusterWorkloadTypeArgs.builder() .clients(ClusterWorkloadTypeClientsArgs.builder() .jobs(false) .notebooks(true) .build()) .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: workloadType: clients: jobs: false notebooks: true
title="Optional"> <span id="notebooks_python">
notebooks bool
- boolean flag defining if it’s possible to run notebooks on this cluster. Default:
true
.
- jobs Boolean
- boolean flag defining if it's possible to run Databricks Jobs on this cluster. Default:
true
.import * as pulumi from "@pulumi/pulumi"; import * as databricks from "@pulumi/databricks";
const withNfs = new databricks.Cluster("with_nfs", {workloadType: { clients: { jobs: false, notebooks: true, }, }});
import pulumi import pulumi_databricks as databricks with_nfs = databricks.Cluster("with_nfs", workload_type={ "clients": { "jobs": False, "notebooks": True, }, })
using System.Collections.Generic; using System.Linq; using Pulumi; using Databricks = Pulumi.Databricks; return await Deployment.RunAsync(() => { var withNfs = new Databricks.Cluster("with_nfs", new() { WorkloadType = new Databricks.Inputs.ClusterWorkloadTypeArgs { Clients = new Databricks.Inputs.ClusterWorkloadTypeClientsArgs { Jobs = false, Notebooks = true, }, }, }); });
package main import ( "github.com/pulumi/pulumi-databricks/sdk/go/databricks" "github.com/pulumi/pulumi/sdk/v3/go/pulumi" ) func main() { pulumi.Run(func(ctx *pulumi.Context) error { _, err := databricks.NewCluster(ctx, "with_nfs", &databricks.ClusterArgs{ WorkloadType: &databricks.ClusterWorkloadTypeArgs{ Clients: &databricks.ClusterWorkloadTypeClientsArgs{ Jobs: pulumi.Bool(false), Notebooks: pulumi.Bool(true), }, }, }) if err != nil { return err } return nil }) }
package generated_program; import com.pulumi.Context; import com.pulumi.Pulumi; import com.pulumi.core.Output; import com.pulumi.databricks.Cluster; import com.pulumi.databricks.ClusterArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeArgs; import com.pulumi.databricks.inputs.ClusterWorkloadTypeClientsArgs; import java.util.List; import java.util.ArrayList; import java.util.Map; import java.io.File; import java.nio.file.Files; import java.nio.file.Paths; public class App { public static void main(String[] args) { Pulumi.run(App::stack); } public static void stack(Context ctx) { var withNfs = new Cluster("withNfs", ClusterArgs.builder() .workloadType(ClusterWorkloadTypeArgs.builder() .clients(ClusterWorkloadTypeClientsArgs.builder() .jobs(false) .notebooks(true) .build()) .build()) .build()); } }
resources: withNfs: type: databricks:Cluster name: with_nfs properties: workloadType: clients: jobs: false notebooks: true
title="Optional"> <span id="notebooks_yaml">
notebooks Boolean
- boolean flag defining if it’s possible to run notebooks on this cluster. Default:
true
.
Package Details
- Repository
- databricks pulumi/pulumi-databricks
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
databricks
Terraform Provider.