1. Packages
  2. AWS
  3. API Docs
  4. glue
  5. Crawler
AWS v6.60.0 published on Tuesday, Nov 19, 2024 by Pulumi

aws.glue.Crawler

Explore with Pulumi AI

aws logo
AWS v6.60.0 published on Tuesday, Nov 19, 2024 by Pulumi

    Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide

    Example Usage

    DynamoDB Target Example

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.glue.Crawler("example", {
        databaseName: exampleAwsGlueCatalogDatabase.name,
        name: "example",
        role: exampleAwsIamRole.arn,
        dynamodbTargets: [{
            path: "table-name",
        }],
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.glue.Crawler("example",
        database_name=example_aws_glue_catalog_database["name"],
        name="example",
        role=example_aws_iam_role["arn"],
        dynamodb_targets=[{
            "path": "table-name",
        }])
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
    			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
    			Name:         pulumi.String("example"),
    			Role:         pulumi.Any(exampleAwsIamRole.Arn),
    			DynamodbTargets: glue.CrawlerDynamodbTargetArray{
    				&glue.CrawlerDynamodbTargetArgs{
    					Path: pulumi.String("table-name"),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Glue.Crawler("example", new()
        {
            DatabaseName = exampleAwsGlueCatalogDatabase.Name,
            Name = "example",
            Role = exampleAwsIamRole.Arn,
            DynamodbTargets = new[]
            {
                new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
                {
                    Path = "table-name",
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.glue.Crawler;
    import com.pulumi.aws.glue.CrawlerArgs;
    import com.pulumi.aws.glue.inputs.CrawlerDynamodbTargetArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new Crawler("example", CrawlerArgs.builder()
                .databaseName(exampleAwsGlueCatalogDatabase.name())
                .name("example")
                .role(exampleAwsIamRole.arn())
                .dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
                    .path("table-name")
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:glue:Crawler
        properties:
          databaseName: ${exampleAwsGlueCatalogDatabase.name}
          name: example
          role: ${exampleAwsIamRole.arn}
          dynamodbTargets:
            - path: table-name
    

    JDBC Target Example

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.glue.Crawler("example", {
        databaseName: exampleAwsGlueCatalogDatabase.name,
        name: "example",
        role: exampleAwsIamRole.arn,
        jdbcTargets: [{
            connectionName: exampleAwsGlueConnection.name,
            path: "database-name/%",
        }],
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.glue.Crawler("example",
        database_name=example_aws_glue_catalog_database["name"],
        name="example",
        role=example_aws_iam_role["arn"],
        jdbc_targets=[{
            "connection_name": example_aws_glue_connection["name"],
            "path": "database-name/%",
        }])
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
    			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
    			Name:         pulumi.String("example"),
    			Role:         pulumi.Any(exampleAwsIamRole.Arn),
    			JdbcTargets: glue.CrawlerJdbcTargetArray{
    				&glue.CrawlerJdbcTargetArgs{
    					ConnectionName: pulumi.Any(exampleAwsGlueConnection.Name),
    					Path:           pulumi.String("database-name/%"),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Glue.Crawler("example", new()
        {
            DatabaseName = exampleAwsGlueCatalogDatabase.Name,
            Name = "example",
            Role = exampleAwsIamRole.Arn,
            JdbcTargets = new[]
            {
                new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
                {
                    ConnectionName = exampleAwsGlueConnection.Name,
                    Path = "database-name/%",
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.glue.Crawler;
    import com.pulumi.aws.glue.CrawlerArgs;
    import com.pulumi.aws.glue.inputs.CrawlerJdbcTargetArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new Crawler("example", CrawlerArgs.builder()
                .databaseName(exampleAwsGlueCatalogDatabase.name())
                .name("example")
                .role(exampleAwsIamRole.arn())
                .jdbcTargets(CrawlerJdbcTargetArgs.builder()
                    .connectionName(exampleAwsGlueConnection.name())
                    .path("database-name/%")
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:glue:Crawler
        properties:
          databaseName: ${exampleAwsGlueCatalogDatabase.name}
          name: example
          role: ${exampleAwsIamRole.arn}
          jdbcTargets:
            - connectionName: ${exampleAwsGlueConnection.name}
              path: database-name/%
    

    S3 Target Example

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.glue.Crawler("example", {
        databaseName: exampleAwsGlueCatalogDatabase.name,
        name: "example",
        role: exampleAwsIamRole.arn,
        s3Targets: [{
            path: `s3://${exampleAwsS3Bucket.bucket}`,
        }],
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.glue.Crawler("example",
        database_name=example_aws_glue_catalog_database["name"],
        name="example",
        role=example_aws_iam_role["arn"],
        s3_targets=[{
            "path": f"s3://{example_aws_s3_bucket['bucket']}",
        }])
    
    package main
    
    import (
    	"fmt"
    
    	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
    			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
    			Name:         pulumi.String("example"),
    			Role:         pulumi.Any(exampleAwsIamRole.Arn),
    			S3Targets: glue.CrawlerS3TargetArray{
    				&glue.CrawlerS3TargetArgs{
    					Path: pulumi.Sprintf("s3://%v", exampleAwsS3Bucket.Bucket),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Glue.Crawler("example", new()
        {
            DatabaseName = exampleAwsGlueCatalogDatabase.Name,
            Name = "example",
            Role = exampleAwsIamRole.Arn,
            S3Targets = new[]
            {
                new Aws.Glue.Inputs.CrawlerS3TargetArgs
                {
                    Path = $"s3://{exampleAwsS3Bucket.Bucket}",
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.glue.Crawler;
    import com.pulumi.aws.glue.CrawlerArgs;
    import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new Crawler("example", CrawlerArgs.builder()
                .databaseName(exampleAwsGlueCatalogDatabase.name())
                .name("example")
                .role(exampleAwsIamRole.arn())
                .s3Targets(CrawlerS3TargetArgs.builder()
                    .path(String.format("s3://%s", exampleAwsS3Bucket.bucket()))
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:glue:Crawler
        properties:
          databaseName: ${exampleAwsGlueCatalogDatabase.name}
          name: example
          role: ${exampleAwsIamRole.arn}
          s3Targets:
            - path: s3://${exampleAwsS3Bucket.bucket}
    

    Catalog Target Example

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.glue.Crawler("example", {
        databaseName: exampleAwsGlueCatalogDatabase.name,
        name: "example",
        role: exampleAwsIamRole.arn,
        catalogTargets: [{
            databaseName: exampleAwsGlueCatalogDatabase.name,
            tables: [exampleAwsGlueCatalogTable.name],
        }],
        schemaChangePolicy: {
            deleteBehavior: "LOG",
        },
        configuration: `{
      "Version":1.0,
      "Grouping": {
        "TableGroupingPolicy": "CombineCompatibleSchemas"
      }
    }
    `,
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.glue.Crawler("example",
        database_name=example_aws_glue_catalog_database["name"],
        name="example",
        role=example_aws_iam_role["arn"],
        catalog_targets=[{
            "database_name": example_aws_glue_catalog_database["name"],
            "tables": [example_aws_glue_catalog_table["name"]],
        }],
        schema_change_policy={
            "delete_behavior": "LOG",
        },
        configuration="""{
      "Version":1.0,
      "Grouping": {
        "TableGroupingPolicy": "CombineCompatibleSchemas"
      }
    }
    """)
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
    			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
    			Name:         pulumi.String("example"),
    			Role:         pulumi.Any(exampleAwsIamRole.Arn),
    			CatalogTargets: glue.CrawlerCatalogTargetArray{
    				&glue.CrawlerCatalogTargetArgs{
    					DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
    					Tables: pulumi.StringArray{
    						exampleAwsGlueCatalogTable.Name,
    					},
    				},
    			},
    			SchemaChangePolicy: &glue.CrawlerSchemaChangePolicyArgs{
    				DeleteBehavior: pulumi.String("LOG"),
    			},
    			Configuration: pulumi.String(`{
      "Version":1.0,
      "Grouping": {
        "TableGroupingPolicy": "CombineCompatibleSchemas"
      }
    }
    `),
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Glue.Crawler("example", new()
        {
            DatabaseName = exampleAwsGlueCatalogDatabase.Name,
            Name = "example",
            Role = exampleAwsIamRole.Arn,
            CatalogTargets = new[]
            {
                new Aws.Glue.Inputs.CrawlerCatalogTargetArgs
                {
                    DatabaseName = exampleAwsGlueCatalogDatabase.Name,
                    Tables = new[]
                    {
                        exampleAwsGlueCatalogTable.Name,
                    },
                },
            },
            SchemaChangePolicy = new Aws.Glue.Inputs.CrawlerSchemaChangePolicyArgs
            {
                DeleteBehavior = "LOG",
            },
            Configuration = @"{
      ""Version"":1.0,
      ""Grouping"": {
        ""TableGroupingPolicy"": ""CombineCompatibleSchemas""
      }
    }
    ",
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.glue.Crawler;
    import com.pulumi.aws.glue.CrawlerArgs;
    import com.pulumi.aws.glue.inputs.CrawlerCatalogTargetArgs;
    import com.pulumi.aws.glue.inputs.CrawlerSchemaChangePolicyArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new Crawler("example", CrawlerArgs.builder()
                .databaseName(exampleAwsGlueCatalogDatabase.name())
                .name("example")
                .role(exampleAwsIamRole.arn())
                .catalogTargets(CrawlerCatalogTargetArgs.builder()
                    .databaseName(exampleAwsGlueCatalogDatabase.name())
                    .tables(exampleAwsGlueCatalogTable.name())
                    .build())
                .schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
                    .deleteBehavior("LOG")
                    .build())
                .configuration("""
    {
      "Version":1.0,
      "Grouping": {
        "TableGroupingPolicy": "CombineCompatibleSchemas"
      }
    }
                """)
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:glue:Crawler
        properties:
          databaseName: ${exampleAwsGlueCatalogDatabase.name}
          name: example
          role: ${exampleAwsIamRole.arn}
          catalogTargets:
            - databaseName: ${exampleAwsGlueCatalogDatabase.name}
              tables:
                - ${exampleAwsGlueCatalogTable.name}
          schemaChangePolicy:
            deleteBehavior: LOG
          configuration: |
            {
              "Version":1.0,
              "Grouping": {
                "TableGroupingPolicy": "CombineCompatibleSchemas"
              }
            }        
    

    MongoDB Target Example

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const example = new aws.glue.Crawler("example", {
        databaseName: exampleAwsGlueCatalogDatabase.name,
        name: "example",
        role: exampleAwsIamRole.arn,
        mongodbTargets: [{
            connectionName: exampleAwsGlueConnection.name,
            path: "database-name/%",
        }],
    });
    
    import pulumi
    import pulumi_aws as aws
    
    example = aws.glue.Crawler("example",
        database_name=example_aws_glue_catalog_database["name"],
        name="example",
        role=example_aws_iam_role["arn"],
        mongodb_targets=[{
            "connection_name": example_aws_glue_connection["name"],
            "path": "database-name/%",
        }])
    
    package main
    
    import (
    	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
    			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
    			Name:         pulumi.String("example"),
    			Role:         pulumi.Any(exampleAwsIamRole.Arn),
    			MongodbTargets: glue.CrawlerMongodbTargetArray{
    				&glue.CrawlerMongodbTargetArgs{
    					ConnectionName: pulumi.Any(exampleAwsGlueConnection.Name),
    					Path:           pulumi.String("database-name/%"),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var example = new Aws.Glue.Crawler("example", new()
        {
            DatabaseName = exampleAwsGlueCatalogDatabase.Name,
            Name = "example",
            Role = exampleAwsIamRole.Arn,
            MongodbTargets = new[]
            {
                new Aws.Glue.Inputs.CrawlerMongodbTargetArgs
                {
                    ConnectionName = exampleAwsGlueConnection.Name,
                    Path = "database-name/%",
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.glue.Crawler;
    import com.pulumi.aws.glue.CrawlerArgs;
    import com.pulumi.aws.glue.inputs.CrawlerMongodbTargetArgs;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var example = new Crawler("example", CrawlerArgs.builder()
                .databaseName(exampleAwsGlueCatalogDatabase.name())
                .name("example")
                .role(exampleAwsIamRole.arn())
                .mongodbTargets(CrawlerMongodbTargetArgs.builder()
                    .connectionName(exampleAwsGlueConnection.name())
                    .path("database-name/%")
                    .build())
                .build());
    
        }
    }
    
    resources:
      example:
        type: aws:glue:Crawler
        properties:
          databaseName: ${exampleAwsGlueCatalogDatabase.name}
          name: example
          role: ${exampleAwsIamRole.arn}
          mongodbTargets:
            - connectionName: ${exampleAwsGlueConnection.name}
              path: database-name/%
    

    Configuration Settings Example

    import * as pulumi from "@pulumi/pulumi";
    import * as aws from "@pulumi/aws";
    
    const eventsCrawler = new aws.glue.Crawler("events_crawler", {
        databaseName: glueDatabase.name,
        schedule: "cron(0 1 * * ? *)",
        name: `events_crawler_${environmentName}`,
        role: glueRole.arn,
        tags: tags,
        configuration: JSON.stringify({
            Grouping: {
                TableGroupingPolicy: "CombineCompatibleSchemas",
            },
            CrawlerOutput: {
                Partitions: {
                    AddOrUpdateBehavior: "InheritFromTable",
                },
            },
            Version: 1,
        }),
        s3Targets: [{
            path: `s3://${dataLakeBucket.bucket}`,
        }],
    });
    
    import pulumi
    import json
    import pulumi_aws as aws
    
    events_crawler = aws.glue.Crawler("events_crawler",
        database_name=glue_database["name"],
        schedule="cron(0 1 * * ? *)",
        name=f"events_crawler_{environment_name}",
        role=glue_role["arn"],
        tags=tags,
        configuration=json.dumps({
            "Grouping": {
                "TableGroupingPolicy": "CombineCompatibleSchemas",
            },
            "CrawlerOutput": {
                "Partitions": {
                    "AddOrUpdateBehavior": "InheritFromTable",
                },
            },
            "Version": 1,
        }),
        s3_targets=[{
            "path": f"s3://{data_lake_bucket['bucket']}",
        }])
    
    package main
    
    import (
    	"encoding/json"
    	"fmt"
    
    	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
    	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
    )
    
    func main() {
    	pulumi.Run(func(ctx *pulumi.Context) error {
    		tmpJSON0, err := json.Marshal(map[string]interface{}{
    			"Grouping": map[string]interface{}{
    				"TableGroupingPolicy": "CombineCompatibleSchemas",
    			},
    			"CrawlerOutput": map[string]interface{}{
    				"Partitions": map[string]interface{}{
    					"AddOrUpdateBehavior": "InheritFromTable",
    				},
    			},
    			"Version": 1,
    		})
    		if err != nil {
    			return err
    		}
    		json0 := string(tmpJSON0)
    		_, err = glue.NewCrawler(ctx, "events_crawler", &glue.CrawlerArgs{
    			DatabaseName:  pulumi.Any(glueDatabase.Name),
    			Schedule:      pulumi.String("cron(0 1 * * ? *)"),
    			Name:          pulumi.Sprintf("events_crawler_%v", environmentName),
    			Role:          pulumi.Any(glueRole.Arn),
    			Tags:          pulumi.Any(tags),
    			Configuration: pulumi.String(json0),
    			S3Targets: glue.CrawlerS3TargetArray{
    				&glue.CrawlerS3TargetArgs{
    					Path: pulumi.Sprintf("s3://%v", dataLakeBucket.Bucket),
    				},
    			},
    		})
    		if err != nil {
    			return err
    		}
    		return nil
    	})
    }
    
    using System.Collections.Generic;
    using System.Linq;
    using System.Text.Json;
    using Pulumi;
    using Aws = Pulumi.Aws;
    
    return await Deployment.RunAsync(() => 
    {
        var eventsCrawler = new Aws.Glue.Crawler("events_crawler", new()
        {
            DatabaseName = glueDatabase.Name,
            Schedule = "cron(0 1 * * ? *)",
            Name = $"events_crawler_{environmentName}",
            Role = glueRole.Arn,
            Tags = tags,
            Configuration = JsonSerializer.Serialize(new Dictionary<string, object?>
            {
                ["Grouping"] = new Dictionary<string, object?>
                {
                    ["TableGroupingPolicy"] = "CombineCompatibleSchemas",
                },
                ["CrawlerOutput"] = new Dictionary<string, object?>
                {
                    ["Partitions"] = new Dictionary<string, object?>
                    {
                        ["AddOrUpdateBehavior"] = "InheritFromTable",
                    },
                },
                ["Version"] = 1,
            }),
            S3Targets = new[]
            {
                new Aws.Glue.Inputs.CrawlerS3TargetArgs
                {
                    Path = $"s3://{dataLakeBucket.Bucket}",
                },
            },
        });
    
    });
    
    package generated_program;
    
    import com.pulumi.Context;
    import com.pulumi.Pulumi;
    import com.pulumi.core.Output;
    import com.pulumi.aws.glue.Crawler;
    import com.pulumi.aws.glue.CrawlerArgs;
    import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
    import static com.pulumi.codegen.internal.Serialization.*;
    import java.util.List;
    import java.util.ArrayList;
    import java.util.Map;
    import java.io.File;
    import java.nio.file.Files;
    import java.nio.file.Paths;
    
    public class App {
        public static void main(String[] args) {
            Pulumi.run(App::stack);
        }
    
        public static void stack(Context ctx) {
            var eventsCrawler = new Crawler("eventsCrawler", CrawlerArgs.builder()
                .databaseName(glueDatabase.name())
                .schedule("cron(0 1 * * ? *)")
                .name(String.format("events_crawler_%s", environmentName))
                .role(glueRole.arn())
                .tags(tags)
                .configuration(serializeJson(
                    jsonObject(
                        jsonProperty("Grouping", jsonObject(
                            jsonProperty("TableGroupingPolicy", "CombineCompatibleSchemas")
                        )),
                        jsonProperty("CrawlerOutput", jsonObject(
                            jsonProperty("Partitions", jsonObject(
                                jsonProperty("AddOrUpdateBehavior", "InheritFromTable")
                            ))
                        )),
                        jsonProperty("Version", 1)
                    )))
                .s3Targets(CrawlerS3TargetArgs.builder()
                    .path(String.format("s3://%s", dataLakeBucket.bucket()))
                    .build())
                .build());
    
        }
    }
    
    resources:
      eventsCrawler:
        type: aws:glue:Crawler
        name: events_crawler
        properties:
          databaseName: ${glueDatabase.name}
          schedule: cron(0 1 * * ? *)
          name: events_crawler_${environmentName}
          role: ${glueRole.arn}
          tags: ${tags}
          configuration:
            fn::toJSON:
              Grouping:
                TableGroupingPolicy: CombineCompatibleSchemas
              CrawlerOutput:
                Partitions:
                  AddOrUpdateBehavior: InheritFromTable
              Version: 1
          s3Targets:
            - path: s3://${dataLakeBucket.bucket}
    

    Create Crawler Resource

    Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

    Constructor syntax

    new Crawler(name: string, args: CrawlerArgs, opts?: CustomResourceOptions);
    @overload
    def Crawler(resource_name: str,
                args: CrawlerArgs,
                opts: Optional[ResourceOptions] = None)
    
    @overload
    def Crawler(resource_name: str,
                opts: Optional[ResourceOptions] = None,
                database_name: Optional[str] = None,
                role: Optional[str] = None,
                description: Optional[str] = None,
                lineage_configuration: Optional[CrawlerLineageConfigurationArgs] = None,
                delta_targets: Optional[Sequence[CrawlerDeltaTargetArgs]] = None,
                catalog_targets: Optional[Sequence[CrawlerCatalogTargetArgs]] = None,
                dynamodb_targets: Optional[Sequence[CrawlerDynamodbTargetArgs]] = None,
                hudi_targets: Optional[Sequence[CrawlerHudiTargetArgs]] = None,
                iceberg_targets: Optional[Sequence[CrawlerIcebergTargetArgs]] = None,
                jdbc_targets: Optional[Sequence[CrawlerJdbcTargetArgs]] = None,
                lake_formation_configuration: Optional[CrawlerLakeFormationConfigurationArgs] = None,
                configuration: Optional[str] = None,
                mongodb_targets: Optional[Sequence[CrawlerMongodbTargetArgs]] = None,
                name: Optional[str] = None,
                recrawl_policy: Optional[CrawlerRecrawlPolicyArgs] = None,
                classifiers: Optional[Sequence[str]] = None,
                s3_targets: Optional[Sequence[CrawlerS3TargetArgs]] = None,
                schedule: Optional[str] = None,
                schema_change_policy: Optional[CrawlerSchemaChangePolicyArgs] = None,
                security_configuration: Optional[str] = None,
                table_prefix: Optional[str] = None,
                tags: Optional[Mapping[str, str]] = None)
    func NewCrawler(ctx *Context, name string, args CrawlerArgs, opts ...ResourceOption) (*Crawler, error)
    public Crawler(string name, CrawlerArgs args, CustomResourceOptions? opts = null)
    public Crawler(String name, CrawlerArgs args)
    public Crawler(String name, CrawlerArgs args, CustomResourceOptions options)
    
    type: aws:glue:Crawler
    properties: # The arguments to resource properties.
    options: # Bag of options to control resource's behavior.
    
    

    Parameters

    name string
    The unique name of the resource.
    args CrawlerArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    resource_name str
    The unique name of the resource.
    args CrawlerArgs
    The arguments to resource properties.
    opts ResourceOptions
    Bag of options to control resource's behavior.
    ctx Context
    Context object for the current deployment.
    name string
    The unique name of the resource.
    args CrawlerArgs
    The arguments to resource properties.
    opts ResourceOption
    Bag of options to control resource's behavior.
    name string
    The unique name of the resource.
    args CrawlerArgs
    The arguments to resource properties.
    opts CustomResourceOptions
    Bag of options to control resource's behavior.
    name String
    The unique name of the resource.
    args CrawlerArgs
    The arguments to resource properties.
    options CustomResourceOptions
    Bag of options to control resource's behavior.

    Constructor example

    The following reference example uses placeholder values for all input properties.

    var crawlerResource = new Aws.Glue.Crawler("crawlerResource", new()
    {
        DatabaseName = "string",
        Role = "string",
        Description = "string",
        LineageConfiguration = new Aws.Glue.Inputs.CrawlerLineageConfigurationArgs
        {
            CrawlerLineageSettings = "string",
        },
        DeltaTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerDeltaTargetArgs
            {
                DeltaTables = new[]
                {
                    "string",
                },
                WriteManifest = false,
                ConnectionName = "string",
                CreateNativeDeltaTable = false,
            },
        },
        CatalogTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerCatalogTargetArgs
            {
                DatabaseName = "string",
                Tables = new[]
                {
                    "string",
                },
                ConnectionName = "string",
                DlqEventQueueArn = "string",
                EventQueueArn = "string",
            },
        },
        DynamodbTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
            {
                Path = "string",
                ScanAll = false,
                ScanRate = 0,
            },
        },
        HudiTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerHudiTargetArgs
            {
                MaximumTraversalDepth = 0,
                Paths = new[]
                {
                    "string",
                },
                ConnectionName = "string",
                Exclusions = new[]
                {
                    "string",
                },
            },
        },
        IcebergTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerIcebergTargetArgs
            {
                MaximumTraversalDepth = 0,
                Paths = new[]
                {
                    "string",
                },
                ConnectionName = "string",
                Exclusions = new[]
                {
                    "string",
                },
            },
        },
        JdbcTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
            {
                ConnectionName = "string",
                Path = "string",
                EnableAdditionalMetadatas = new[]
                {
                    "string",
                },
                Exclusions = new[]
                {
                    "string",
                },
            },
        },
        LakeFormationConfiguration = new Aws.Glue.Inputs.CrawlerLakeFormationConfigurationArgs
        {
            AccountId = "string",
            UseLakeFormationCredentials = false,
        },
        Configuration = "string",
        MongodbTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerMongodbTargetArgs
            {
                ConnectionName = "string",
                Path = "string",
                ScanAll = false,
            },
        },
        Name = "string",
        RecrawlPolicy = new Aws.Glue.Inputs.CrawlerRecrawlPolicyArgs
        {
            RecrawlBehavior = "string",
        },
        Classifiers = new[]
        {
            "string",
        },
        S3Targets = new[]
        {
            new Aws.Glue.Inputs.CrawlerS3TargetArgs
            {
                Path = "string",
                ConnectionName = "string",
                DlqEventQueueArn = "string",
                EventQueueArn = "string",
                Exclusions = new[]
                {
                    "string",
                },
                SampleSize = 0,
            },
        },
        Schedule = "string",
        SchemaChangePolicy = new Aws.Glue.Inputs.CrawlerSchemaChangePolicyArgs
        {
            DeleteBehavior = "string",
            UpdateBehavior = "string",
        },
        SecurityConfiguration = "string",
        TablePrefix = "string",
        Tags = 
        {
            { "string", "string" },
        },
    });
    
    example, err := glue.NewCrawler(ctx, "crawlerResource", &glue.CrawlerArgs{
    	DatabaseName: pulumi.String("string"),
    	Role:         pulumi.String("string"),
    	Description:  pulumi.String("string"),
    	LineageConfiguration: &glue.CrawlerLineageConfigurationArgs{
    		CrawlerLineageSettings: pulumi.String("string"),
    	},
    	DeltaTargets: glue.CrawlerDeltaTargetArray{
    		&glue.CrawlerDeltaTargetArgs{
    			DeltaTables: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			WriteManifest:          pulumi.Bool(false),
    			ConnectionName:         pulumi.String("string"),
    			CreateNativeDeltaTable: pulumi.Bool(false),
    		},
    	},
    	CatalogTargets: glue.CrawlerCatalogTargetArray{
    		&glue.CrawlerCatalogTargetArgs{
    			DatabaseName: pulumi.String("string"),
    			Tables: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			ConnectionName:   pulumi.String("string"),
    			DlqEventQueueArn: pulumi.String("string"),
    			EventQueueArn:    pulumi.String("string"),
    		},
    	},
    	DynamodbTargets: glue.CrawlerDynamodbTargetArray{
    		&glue.CrawlerDynamodbTargetArgs{
    			Path:     pulumi.String("string"),
    			ScanAll:  pulumi.Bool(false),
    			ScanRate: pulumi.Float64(0),
    		},
    	},
    	HudiTargets: glue.CrawlerHudiTargetArray{
    		&glue.CrawlerHudiTargetArgs{
    			MaximumTraversalDepth: pulumi.Int(0),
    			Paths: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			ConnectionName: pulumi.String("string"),
    			Exclusions: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    		},
    	},
    	IcebergTargets: glue.CrawlerIcebergTargetArray{
    		&glue.CrawlerIcebergTargetArgs{
    			MaximumTraversalDepth: pulumi.Int(0),
    			Paths: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			ConnectionName: pulumi.String("string"),
    			Exclusions: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    		},
    	},
    	JdbcTargets: glue.CrawlerJdbcTargetArray{
    		&glue.CrawlerJdbcTargetArgs{
    			ConnectionName: pulumi.String("string"),
    			Path:           pulumi.String("string"),
    			EnableAdditionalMetadatas: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			Exclusions: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    		},
    	},
    	LakeFormationConfiguration: &glue.CrawlerLakeFormationConfigurationArgs{
    		AccountId:                   pulumi.String("string"),
    		UseLakeFormationCredentials: pulumi.Bool(false),
    	},
    	Configuration: pulumi.String("string"),
    	MongodbTargets: glue.CrawlerMongodbTargetArray{
    		&glue.CrawlerMongodbTargetArgs{
    			ConnectionName: pulumi.String("string"),
    			Path:           pulumi.String("string"),
    			ScanAll:        pulumi.Bool(false),
    		},
    	},
    	Name: pulumi.String("string"),
    	RecrawlPolicy: &glue.CrawlerRecrawlPolicyArgs{
    		RecrawlBehavior: pulumi.String("string"),
    	},
    	Classifiers: pulumi.StringArray{
    		pulumi.String("string"),
    	},
    	S3Targets: glue.CrawlerS3TargetArray{
    		&glue.CrawlerS3TargetArgs{
    			Path:             pulumi.String("string"),
    			ConnectionName:   pulumi.String("string"),
    			DlqEventQueueArn: pulumi.String("string"),
    			EventQueueArn:    pulumi.String("string"),
    			Exclusions: pulumi.StringArray{
    				pulumi.String("string"),
    			},
    			SampleSize: pulumi.Int(0),
    		},
    	},
    	Schedule: pulumi.String("string"),
    	SchemaChangePolicy: &glue.CrawlerSchemaChangePolicyArgs{
    		DeleteBehavior: pulumi.String("string"),
    		UpdateBehavior: pulumi.String("string"),
    	},
    	SecurityConfiguration: pulumi.String("string"),
    	TablePrefix:           pulumi.String("string"),
    	Tags: pulumi.StringMap{
    		"string": pulumi.String("string"),
    	},
    })
    
    var crawlerResource = new Crawler("crawlerResource", CrawlerArgs.builder()
        .databaseName("string")
        .role("string")
        .description("string")
        .lineageConfiguration(CrawlerLineageConfigurationArgs.builder()
            .crawlerLineageSettings("string")
            .build())
        .deltaTargets(CrawlerDeltaTargetArgs.builder()
            .deltaTables("string")
            .writeManifest(false)
            .connectionName("string")
            .createNativeDeltaTable(false)
            .build())
        .catalogTargets(CrawlerCatalogTargetArgs.builder()
            .databaseName("string")
            .tables("string")
            .connectionName("string")
            .dlqEventQueueArn("string")
            .eventQueueArn("string")
            .build())
        .dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
            .path("string")
            .scanAll(false)
            .scanRate(0)
            .build())
        .hudiTargets(CrawlerHudiTargetArgs.builder()
            .maximumTraversalDepth(0)
            .paths("string")
            .connectionName("string")
            .exclusions("string")
            .build())
        .icebergTargets(CrawlerIcebergTargetArgs.builder()
            .maximumTraversalDepth(0)
            .paths("string")
            .connectionName("string")
            .exclusions("string")
            .build())
        .jdbcTargets(CrawlerJdbcTargetArgs.builder()
            .connectionName("string")
            .path("string")
            .enableAdditionalMetadatas("string")
            .exclusions("string")
            .build())
        .lakeFormationConfiguration(CrawlerLakeFormationConfigurationArgs.builder()
            .accountId("string")
            .useLakeFormationCredentials(false)
            .build())
        .configuration("string")
        .mongodbTargets(CrawlerMongodbTargetArgs.builder()
            .connectionName("string")
            .path("string")
            .scanAll(false)
            .build())
        .name("string")
        .recrawlPolicy(CrawlerRecrawlPolicyArgs.builder()
            .recrawlBehavior("string")
            .build())
        .classifiers("string")
        .s3Targets(CrawlerS3TargetArgs.builder()
            .path("string")
            .connectionName("string")
            .dlqEventQueueArn("string")
            .eventQueueArn("string")
            .exclusions("string")
            .sampleSize(0)
            .build())
        .schedule("string")
        .schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
            .deleteBehavior("string")
            .updateBehavior("string")
            .build())
        .securityConfiguration("string")
        .tablePrefix("string")
        .tags(Map.of("string", "string"))
        .build());
    
    crawler_resource = aws.glue.Crawler("crawlerResource",
        database_name="string",
        role="string",
        description="string",
        lineage_configuration={
            "crawler_lineage_settings": "string",
        },
        delta_targets=[{
            "delta_tables": ["string"],
            "write_manifest": False,
            "connection_name": "string",
            "create_native_delta_table": False,
        }],
        catalog_targets=[{
            "database_name": "string",
            "tables": ["string"],
            "connection_name": "string",
            "dlq_event_queue_arn": "string",
            "event_queue_arn": "string",
        }],
        dynamodb_targets=[{
            "path": "string",
            "scan_all": False,
            "scan_rate": 0,
        }],
        hudi_targets=[{
            "maximum_traversal_depth": 0,
            "paths": ["string"],
            "connection_name": "string",
            "exclusions": ["string"],
        }],
        iceberg_targets=[{
            "maximum_traversal_depth": 0,
            "paths": ["string"],
            "connection_name": "string",
            "exclusions": ["string"],
        }],
        jdbc_targets=[{
            "connection_name": "string",
            "path": "string",
            "enable_additional_metadatas": ["string"],
            "exclusions": ["string"],
        }],
        lake_formation_configuration={
            "account_id": "string",
            "use_lake_formation_credentials": False,
        },
        configuration="string",
        mongodb_targets=[{
            "connection_name": "string",
            "path": "string",
            "scan_all": False,
        }],
        name="string",
        recrawl_policy={
            "recrawl_behavior": "string",
        },
        classifiers=["string"],
        s3_targets=[{
            "path": "string",
            "connection_name": "string",
            "dlq_event_queue_arn": "string",
            "event_queue_arn": "string",
            "exclusions": ["string"],
            "sample_size": 0,
        }],
        schedule="string",
        schema_change_policy={
            "delete_behavior": "string",
            "update_behavior": "string",
        },
        security_configuration="string",
        table_prefix="string",
        tags={
            "string": "string",
        })
    
    const crawlerResource = new aws.glue.Crawler("crawlerResource", {
        databaseName: "string",
        role: "string",
        description: "string",
        lineageConfiguration: {
            crawlerLineageSettings: "string",
        },
        deltaTargets: [{
            deltaTables: ["string"],
            writeManifest: false,
            connectionName: "string",
            createNativeDeltaTable: false,
        }],
        catalogTargets: [{
            databaseName: "string",
            tables: ["string"],
            connectionName: "string",
            dlqEventQueueArn: "string",
            eventQueueArn: "string",
        }],
        dynamodbTargets: [{
            path: "string",
            scanAll: false,
            scanRate: 0,
        }],
        hudiTargets: [{
            maximumTraversalDepth: 0,
            paths: ["string"],
            connectionName: "string",
            exclusions: ["string"],
        }],
        icebergTargets: [{
            maximumTraversalDepth: 0,
            paths: ["string"],
            connectionName: "string",
            exclusions: ["string"],
        }],
        jdbcTargets: [{
            connectionName: "string",
            path: "string",
            enableAdditionalMetadatas: ["string"],
            exclusions: ["string"],
        }],
        lakeFormationConfiguration: {
            accountId: "string",
            useLakeFormationCredentials: false,
        },
        configuration: "string",
        mongodbTargets: [{
            connectionName: "string",
            path: "string",
            scanAll: false,
        }],
        name: "string",
        recrawlPolicy: {
            recrawlBehavior: "string",
        },
        classifiers: ["string"],
        s3Targets: [{
            path: "string",
            connectionName: "string",
            dlqEventQueueArn: "string",
            eventQueueArn: "string",
            exclusions: ["string"],
            sampleSize: 0,
        }],
        schedule: "string",
        schemaChangePolicy: {
            deleteBehavior: "string",
            updateBehavior: "string",
        },
        securityConfiguration: "string",
        tablePrefix: "string",
        tags: {
            string: "string",
        },
    });
    
    type: aws:glue:Crawler
    properties:
        catalogTargets:
            - connectionName: string
              databaseName: string
              dlqEventQueueArn: string
              eventQueueArn: string
              tables:
                - string
        classifiers:
            - string
        configuration: string
        databaseName: string
        deltaTargets:
            - connectionName: string
              createNativeDeltaTable: false
              deltaTables:
                - string
              writeManifest: false
        description: string
        dynamodbTargets:
            - path: string
              scanAll: false
              scanRate: 0
        hudiTargets:
            - connectionName: string
              exclusions:
                - string
              maximumTraversalDepth: 0
              paths:
                - string
        icebergTargets:
            - connectionName: string
              exclusions:
                - string
              maximumTraversalDepth: 0
              paths:
                - string
        jdbcTargets:
            - connectionName: string
              enableAdditionalMetadatas:
                - string
              exclusions:
                - string
              path: string
        lakeFormationConfiguration:
            accountId: string
            useLakeFormationCredentials: false
        lineageConfiguration:
            crawlerLineageSettings: string
        mongodbTargets:
            - connectionName: string
              path: string
              scanAll: false
        name: string
        recrawlPolicy:
            recrawlBehavior: string
        role: string
        s3Targets:
            - connectionName: string
              dlqEventQueueArn: string
              eventQueueArn: string
              exclusions:
                - string
              path: string
              sampleSize: 0
        schedule: string
        schemaChangePolicy:
            deleteBehavior: string
            updateBehavior: string
        securityConfiguration: string
        tablePrefix: string
        tags:
            string: string
    

    Crawler Resource Properties

    To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

    Inputs

    In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

    The Crawler resource accepts the following input properties:

    DatabaseName string
    Glue database where results are written.
    Role string
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    CatalogTargets List<CrawlerCatalogTarget>
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    Classifiers List<string>
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    Configuration string
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    DeltaTargets List<CrawlerDeltaTarget>
    List of nested Delta Lake target arguments. See Delta Target below.
    Description string
    Description of the crawler.
    DynamodbTargets List<CrawlerDynamodbTarget>
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    HudiTargets List<CrawlerHudiTarget>
    List of nested Hudi target arguments. See Iceberg Target below.
    IcebergTargets List<CrawlerIcebergTarget>
    List of nested Iceberg target arguments. See Iceberg Target below.
    JdbcTargets List<CrawlerJdbcTarget>
    List of nested JDBC target arguments. See JDBC Target below.
    LakeFormationConfiguration CrawlerLakeFormationConfiguration
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    LineageConfiguration CrawlerLineageConfiguration
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    MongodbTargets List<CrawlerMongodbTarget>
    List of nested MongoDB target arguments. See MongoDB Target below.
    Name string
    Name of the crawler.
    RecrawlPolicy CrawlerRecrawlPolicy
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    S3Targets List<CrawlerS3Target>
    List of nested Amazon S3 target arguments. See S3 Target below.
    Schedule string
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    SchemaChangePolicy CrawlerSchemaChangePolicy
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    SecurityConfiguration string
    The name of Security Configuration to be used by the crawler
    TablePrefix string
    The table prefix used for catalog tables that are created.
    Tags Dictionary<string, string>
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    DatabaseName string
    Glue database where results are written.
    Role string
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    CatalogTargets []CrawlerCatalogTargetArgs
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    Classifiers []string
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    Configuration string
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    DeltaTargets []CrawlerDeltaTargetArgs
    List of nested Delta Lake target arguments. See Delta Target below.
    Description string
    Description of the crawler.
    DynamodbTargets []CrawlerDynamodbTargetArgs
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    HudiTargets []CrawlerHudiTargetArgs
    List of nested Hudi target arguments. See Iceberg Target below.
    IcebergTargets []CrawlerIcebergTargetArgs
    List of nested Iceberg target arguments. See Iceberg Target below.
    JdbcTargets []CrawlerJdbcTargetArgs
    List of nested JDBC target arguments. See JDBC Target below.
    LakeFormationConfiguration CrawlerLakeFormationConfigurationArgs
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    LineageConfiguration CrawlerLineageConfigurationArgs
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    MongodbTargets []CrawlerMongodbTargetArgs
    List of nested MongoDB target arguments. See MongoDB Target below.
    Name string
    Name of the crawler.
    RecrawlPolicy CrawlerRecrawlPolicyArgs
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    S3Targets []CrawlerS3TargetArgs
    List of nested Amazon S3 target arguments. See S3 Target below.
    Schedule string
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    SchemaChangePolicy CrawlerSchemaChangePolicyArgs
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    SecurityConfiguration string
    The name of Security Configuration to be used by the crawler
    TablePrefix string
    The table prefix used for catalog tables that are created.
    Tags map[string]string
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    databaseName String
    Glue database where results are written.
    role String
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    catalogTargets List<CrawlerCatalogTarget>
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    classifiers List<String>
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    configuration String
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    deltaTargets List<CrawlerDeltaTarget>
    List of nested Delta Lake target arguments. See Delta Target below.
    description String
    Description of the crawler.
    dynamodbTargets List<CrawlerDynamodbTarget>
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    hudiTargets List<CrawlerHudiTarget>
    List of nested Hudi target arguments. See Iceberg Target below.
    icebergTargets List<CrawlerIcebergTarget>
    List of nested Iceberg target arguments. See Iceberg Target below.
    jdbcTargets List<CrawlerJdbcTarget>
    List of nested JDBC target arguments. See JDBC Target below.
    lakeFormationConfiguration CrawlerLakeFormationConfiguration
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    lineageConfiguration CrawlerLineageConfiguration
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    mongodbTargets List<CrawlerMongodbTarget>
    List of nested MongoDB target arguments. See MongoDB Target below.
    name String
    Name of the crawler.
    recrawlPolicy CrawlerRecrawlPolicy
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    s3Targets List<CrawlerS3Target>
    List of nested Amazon S3 target arguments. See S3 Target below.
    schedule String
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    schemaChangePolicy CrawlerSchemaChangePolicy
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    securityConfiguration String
    The name of Security Configuration to be used by the crawler
    tablePrefix String
    The table prefix used for catalog tables that are created.
    tags Map<String,String>
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    databaseName string
    Glue database where results are written.
    role string
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    catalogTargets CrawlerCatalogTarget[]
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    classifiers string[]
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    configuration string
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    deltaTargets CrawlerDeltaTarget[]
    List of nested Delta Lake target arguments. See Delta Target below.
    description string
    Description of the crawler.
    dynamodbTargets CrawlerDynamodbTarget[]
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    hudiTargets CrawlerHudiTarget[]
    List of nested Hudi target arguments. See Iceberg Target below.
    icebergTargets CrawlerIcebergTarget[]
    List of nested Iceberg target arguments. See Iceberg Target below.
    jdbcTargets CrawlerJdbcTarget[]
    List of nested JDBC target arguments. See JDBC Target below.
    lakeFormationConfiguration CrawlerLakeFormationConfiguration
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    lineageConfiguration CrawlerLineageConfiguration
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    mongodbTargets CrawlerMongodbTarget[]
    List of nested MongoDB target arguments. See MongoDB Target below.
    name string
    Name of the crawler.
    recrawlPolicy CrawlerRecrawlPolicy
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    s3Targets CrawlerS3Target[]
    List of nested Amazon S3 target arguments. See S3 Target below.
    schedule string
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    schemaChangePolicy CrawlerSchemaChangePolicy
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    securityConfiguration string
    The name of Security Configuration to be used by the crawler
    tablePrefix string
    The table prefix used for catalog tables that are created.
    tags {[key: string]: string}
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    database_name str
    Glue database where results are written.
    role str
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    catalog_targets Sequence[CrawlerCatalogTargetArgs]
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    classifiers Sequence[str]
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    configuration str
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    delta_targets Sequence[CrawlerDeltaTargetArgs]
    List of nested Delta Lake target arguments. See Delta Target below.
    description str
    Description of the crawler.
    dynamodb_targets Sequence[CrawlerDynamodbTargetArgs]
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    hudi_targets Sequence[CrawlerHudiTargetArgs]
    List of nested Hudi target arguments. See Iceberg Target below.
    iceberg_targets Sequence[CrawlerIcebergTargetArgs]
    List of nested Iceberg target arguments. See Iceberg Target below.
    jdbc_targets Sequence[CrawlerJdbcTargetArgs]
    List of nested JDBC target arguments. See JDBC Target below.
    lake_formation_configuration CrawlerLakeFormationConfigurationArgs
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    lineage_configuration CrawlerLineageConfigurationArgs
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    mongodb_targets Sequence[CrawlerMongodbTargetArgs]
    List of nested MongoDB target arguments. See MongoDB Target below.
    name str
    Name of the crawler.
    recrawl_policy CrawlerRecrawlPolicyArgs
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    s3_targets Sequence[CrawlerS3TargetArgs]
    List of nested Amazon S3 target arguments. See S3 Target below.
    schedule str
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    schema_change_policy CrawlerSchemaChangePolicyArgs
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    security_configuration str
    The name of Security Configuration to be used by the crawler
    table_prefix str
    The table prefix used for catalog tables that are created.
    tags Mapping[str, str]
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    databaseName String
    Glue database where results are written.
    role String
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    catalogTargets List<Property Map>
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    classifiers List<String>
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    configuration String
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    deltaTargets List<Property Map>
    List of nested Delta Lake target arguments. See Delta Target below.
    description String
    Description of the crawler.
    dynamodbTargets List<Property Map>
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    hudiTargets List<Property Map>
    List of nested Hudi target arguments. See Iceberg Target below.
    icebergTargets List<Property Map>
    List of nested Iceberg target arguments. See Iceberg Target below.
    jdbcTargets List<Property Map>
    List of nested JDBC target arguments. See JDBC Target below.
    lakeFormationConfiguration Property Map
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    lineageConfiguration Property Map
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    mongodbTargets List<Property Map>
    List of nested MongoDB target arguments. See MongoDB Target below.
    name String
    Name of the crawler.
    recrawlPolicy Property Map
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    s3Targets List<Property Map>
    List of nested Amazon S3 target arguments. See S3 Target below.
    schedule String
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    schemaChangePolicy Property Map
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    securityConfiguration String
    The name of Security Configuration to be used by the crawler
    tablePrefix String
    The table prefix used for catalog tables that are created.
    tags Map<String>
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.

    Outputs

    All input properties are implicitly available as output properties. Additionally, the Crawler resource produces the following output properties:

    Arn string
    The ARN of the crawler
    Id string
    The provider-assigned unique ID for this managed resource.
    TagsAll Dictionary<string, string>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    Arn string
    The ARN of the crawler
    Id string
    The provider-assigned unique ID for this managed resource.
    TagsAll map[string]string
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn String
    The ARN of the crawler
    id String
    The provider-assigned unique ID for this managed resource.
    tagsAll Map<String,String>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn string
    The ARN of the crawler
    id string
    The provider-assigned unique ID for this managed resource.
    tagsAll {[key: string]: string}
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn str
    The ARN of the crawler
    id str
    The provider-assigned unique ID for this managed resource.
    tags_all Mapping[str, str]
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn String
    The ARN of the crawler
    id String
    The provider-assigned unique ID for this managed resource.
    tagsAll Map<String>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    Look up Existing Crawler Resource

    Get an existing Crawler resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

    public static get(name: string, id: Input<ID>, state?: CrawlerState, opts?: CustomResourceOptions): Crawler
    @staticmethod
    def get(resource_name: str,
            id: str,
            opts: Optional[ResourceOptions] = None,
            arn: Optional[str] = None,
            catalog_targets: Optional[Sequence[CrawlerCatalogTargetArgs]] = None,
            classifiers: Optional[Sequence[str]] = None,
            configuration: Optional[str] = None,
            database_name: Optional[str] = None,
            delta_targets: Optional[Sequence[CrawlerDeltaTargetArgs]] = None,
            description: Optional[str] = None,
            dynamodb_targets: Optional[Sequence[CrawlerDynamodbTargetArgs]] = None,
            hudi_targets: Optional[Sequence[CrawlerHudiTargetArgs]] = None,
            iceberg_targets: Optional[Sequence[CrawlerIcebergTargetArgs]] = None,
            jdbc_targets: Optional[Sequence[CrawlerJdbcTargetArgs]] = None,
            lake_formation_configuration: Optional[CrawlerLakeFormationConfigurationArgs] = None,
            lineage_configuration: Optional[CrawlerLineageConfigurationArgs] = None,
            mongodb_targets: Optional[Sequence[CrawlerMongodbTargetArgs]] = None,
            name: Optional[str] = None,
            recrawl_policy: Optional[CrawlerRecrawlPolicyArgs] = None,
            role: Optional[str] = None,
            s3_targets: Optional[Sequence[CrawlerS3TargetArgs]] = None,
            schedule: Optional[str] = None,
            schema_change_policy: Optional[CrawlerSchemaChangePolicyArgs] = None,
            security_configuration: Optional[str] = None,
            table_prefix: Optional[str] = None,
            tags: Optional[Mapping[str, str]] = None,
            tags_all: Optional[Mapping[str, str]] = None) -> Crawler
    func GetCrawler(ctx *Context, name string, id IDInput, state *CrawlerState, opts ...ResourceOption) (*Crawler, error)
    public static Crawler Get(string name, Input<string> id, CrawlerState? state, CustomResourceOptions? opts = null)
    public static Crawler get(String name, Output<String> id, CrawlerState state, CustomResourceOptions options)
    Resource lookup is not supported in YAML
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    resource_name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    name
    The unique name of the resulting resource.
    id
    The unique provider ID of the resource to lookup.
    state
    Any extra arguments used during the lookup.
    opts
    A bag of options that control this resource's behavior.
    The following state arguments are supported:
    Arn string
    The ARN of the crawler
    CatalogTargets List<CrawlerCatalogTarget>
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    Classifiers List<string>
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    Configuration string
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    DatabaseName string
    Glue database where results are written.
    DeltaTargets List<CrawlerDeltaTarget>
    List of nested Delta Lake target arguments. See Delta Target below.
    Description string
    Description of the crawler.
    DynamodbTargets List<CrawlerDynamodbTarget>
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    HudiTargets List<CrawlerHudiTarget>
    List of nested Hudi target arguments. See Iceberg Target below.
    IcebergTargets List<CrawlerIcebergTarget>
    List of nested Iceberg target arguments. See Iceberg Target below.
    JdbcTargets List<CrawlerJdbcTarget>
    List of nested JDBC target arguments. See JDBC Target below.
    LakeFormationConfiguration CrawlerLakeFormationConfiguration
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    LineageConfiguration CrawlerLineageConfiguration
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    MongodbTargets List<CrawlerMongodbTarget>
    List of nested MongoDB target arguments. See MongoDB Target below.
    Name string
    Name of the crawler.
    RecrawlPolicy CrawlerRecrawlPolicy
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    Role string
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    S3Targets List<CrawlerS3Target>
    List of nested Amazon S3 target arguments. See S3 Target below.
    Schedule string
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    SchemaChangePolicy CrawlerSchemaChangePolicy
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    SecurityConfiguration string
    The name of Security Configuration to be used by the crawler
    TablePrefix string
    The table prefix used for catalog tables that are created.
    Tags Dictionary<string, string>
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TagsAll Dictionary<string, string>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    Arn string
    The ARN of the crawler
    CatalogTargets []CrawlerCatalogTargetArgs
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    Classifiers []string
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    Configuration string
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    DatabaseName string
    Glue database where results are written.
    DeltaTargets []CrawlerDeltaTargetArgs
    List of nested Delta Lake target arguments. See Delta Target below.
    Description string
    Description of the crawler.
    DynamodbTargets []CrawlerDynamodbTargetArgs
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    HudiTargets []CrawlerHudiTargetArgs
    List of nested Hudi target arguments. See Iceberg Target below.
    IcebergTargets []CrawlerIcebergTargetArgs
    List of nested Iceberg target arguments. See Iceberg Target below.
    JdbcTargets []CrawlerJdbcTargetArgs
    List of nested JDBC target arguments. See JDBC Target below.
    LakeFormationConfiguration CrawlerLakeFormationConfigurationArgs
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    LineageConfiguration CrawlerLineageConfigurationArgs
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    MongodbTargets []CrawlerMongodbTargetArgs
    List of nested MongoDB target arguments. See MongoDB Target below.
    Name string
    Name of the crawler.
    RecrawlPolicy CrawlerRecrawlPolicyArgs
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    Role string
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    S3Targets []CrawlerS3TargetArgs
    List of nested Amazon S3 target arguments. See S3 Target below.
    Schedule string
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    SchemaChangePolicy CrawlerSchemaChangePolicyArgs
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    SecurityConfiguration string
    The name of Security Configuration to be used by the crawler
    TablePrefix string
    The table prefix used for catalog tables that are created.
    Tags map[string]string
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    TagsAll map[string]string
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn String
    The ARN of the crawler
    catalogTargets List<CrawlerCatalogTarget>
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    classifiers List<String>
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    configuration String
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    databaseName String
    Glue database where results are written.
    deltaTargets List<CrawlerDeltaTarget>
    List of nested Delta Lake target arguments. See Delta Target below.
    description String
    Description of the crawler.
    dynamodbTargets List<CrawlerDynamodbTarget>
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    hudiTargets List<CrawlerHudiTarget>
    List of nested Hudi target arguments. See Iceberg Target below.
    icebergTargets List<CrawlerIcebergTarget>
    List of nested Iceberg target arguments. See Iceberg Target below.
    jdbcTargets List<CrawlerJdbcTarget>
    List of nested JDBC target arguments. See JDBC Target below.
    lakeFormationConfiguration CrawlerLakeFormationConfiguration
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    lineageConfiguration CrawlerLineageConfiguration
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    mongodbTargets List<CrawlerMongodbTarget>
    List of nested MongoDB target arguments. See MongoDB Target below.
    name String
    Name of the crawler.
    recrawlPolicy CrawlerRecrawlPolicy
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    role String
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    s3Targets List<CrawlerS3Target>
    List of nested Amazon S3 target arguments. See S3 Target below.
    schedule String
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    schemaChangePolicy CrawlerSchemaChangePolicy
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    securityConfiguration String
    The name of Security Configuration to be used by the crawler
    tablePrefix String
    The table prefix used for catalog tables that are created.
    tags Map<String,String>
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll Map<String,String>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn string
    The ARN of the crawler
    catalogTargets CrawlerCatalogTarget[]
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    classifiers string[]
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    configuration string
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    databaseName string
    Glue database where results are written.
    deltaTargets CrawlerDeltaTarget[]
    List of nested Delta Lake target arguments. See Delta Target below.
    description string
    Description of the crawler.
    dynamodbTargets CrawlerDynamodbTarget[]
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    hudiTargets CrawlerHudiTarget[]
    List of nested Hudi target arguments. See Iceberg Target below.
    icebergTargets CrawlerIcebergTarget[]
    List of nested Iceberg target arguments. See Iceberg Target below.
    jdbcTargets CrawlerJdbcTarget[]
    List of nested JDBC target arguments. See JDBC Target below.
    lakeFormationConfiguration CrawlerLakeFormationConfiguration
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    lineageConfiguration CrawlerLineageConfiguration
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    mongodbTargets CrawlerMongodbTarget[]
    List of nested MongoDB target arguments. See MongoDB Target below.
    name string
    Name of the crawler.
    recrawlPolicy CrawlerRecrawlPolicy
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    role string
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    s3Targets CrawlerS3Target[]
    List of nested Amazon S3 target arguments. See S3 Target below.
    schedule string
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    schemaChangePolicy CrawlerSchemaChangePolicy
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    securityConfiguration string
    The name of Security Configuration to be used by the crawler
    tablePrefix string
    The table prefix used for catalog tables that are created.
    tags {[key: string]: string}
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll {[key: string]: string}
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn str
    The ARN of the crawler
    catalog_targets Sequence[CrawlerCatalogTargetArgs]
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    classifiers Sequence[str]
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    configuration str
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    database_name str
    Glue database where results are written.
    delta_targets Sequence[CrawlerDeltaTargetArgs]
    List of nested Delta Lake target arguments. See Delta Target below.
    description str
    Description of the crawler.
    dynamodb_targets Sequence[CrawlerDynamodbTargetArgs]
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    hudi_targets Sequence[CrawlerHudiTargetArgs]
    List of nested Hudi target arguments. See Iceberg Target below.
    iceberg_targets Sequence[CrawlerIcebergTargetArgs]
    List of nested Iceberg target arguments. See Iceberg Target below.
    jdbc_targets Sequence[CrawlerJdbcTargetArgs]
    List of nested JDBC target arguments. See JDBC Target below.
    lake_formation_configuration CrawlerLakeFormationConfigurationArgs
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    lineage_configuration CrawlerLineageConfigurationArgs
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    mongodb_targets Sequence[CrawlerMongodbTargetArgs]
    List of nested MongoDB target arguments. See MongoDB Target below.
    name str
    Name of the crawler.
    recrawl_policy CrawlerRecrawlPolicyArgs
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    role str
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    s3_targets Sequence[CrawlerS3TargetArgs]
    List of nested Amazon S3 target arguments. See S3 Target below.
    schedule str
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    schema_change_policy CrawlerSchemaChangePolicyArgs
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    security_configuration str
    The name of Security Configuration to be used by the crawler
    table_prefix str
    The table prefix used for catalog tables that are created.
    tags Mapping[str, str]
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tags_all Mapping[str, str]
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    arn String
    The ARN of the crawler
    catalogTargets List<Property Map>
    List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
    classifiers List<String>
    List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
    configuration String
    JSON string of configuration information. For more details see Setting Crawler Configuration Options.
    databaseName String
    Glue database where results are written.
    deltaTargets List<Property Map>
    List of nested Delta Lake target arguments. See Delta Target below.
    description String
    Description of the crawler.
    dynamodbTargets List<Property Map>
    List of nested DynamoDB target arguments. See Dynamodb Target below.
    hudiTargets List<Property Map>
    List of nested Hudi target arguments. See Iceberg Target below.
    icebergTargets List<Property Map>
    List of nested Iceberg target arguments. See Iceberg Target below.
    jdbcTargets List<Property Map>
    List of nested JDBC target arguments. See JDBC Target below.
    lakeFormationConfiguration Property Map
    Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
    lineageConfiguration Property Map
    Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
    mongodbTargets List<Property Map>
    List of nested MongoDB target arguments. See MongoDB Target below.
    name String
    Name of the crawler.
    recrawlPolicy Property Map
    A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
    role String
    The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
    s3Targets List<Property Map>
    List of nested Amazon S3 target arguments. See S3 Target below.
    schedule String
    A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
    schemaChangePolicy Property Map
    Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
    securityConfiguration String
    The name of Security Configuration to be used by the crawler
    tablePrefix String
    The table prefix used for catalog tables that are created.
    tags Map<String>
    Key-value map of resource tags. .If configured with a provider default_tags configuration block present, tags with matching keys will overwrite those defined at the provider-level.
    tagsAll Map<String>
    A map of tags assigned to the resource, including those inherited from the provider default_tags configuration block.

    Deprecated: Please use tags instead.

    Supporting Types

    CrawlerCatalogTarget, CrawlerCatalogTargetArgs

    DatabaseName string
    The name of the Glue database to be synchronized.
    Tables List<string>
    A list of catalog tables to be synchronized.
    ConnectionName string
    The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORK Connection type.
    DlqEventQueueArn string

    A valid Amazon SQS ARN.

    Note: deletion_behavior of catalog target doesn't support DEPRECATE_IN_DATABASE.

    Note: configuration for catalog target crawlers will have { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} } by default.

    EventQueueArn string
    A valid Amazon SQS ARN.
    DatabaseName string
    The name of the Glue database to be synchronized.
    Tables []string
    A list of catalog tables to be synchronized.
    ConnectionName string
    The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORK Connection type.
    DlqEventQueueArn string

    A valid Amazon SQS ARN.

    Note: deletion_behavior of catalog target doesn't support DEPRECATE_IN_DATABASE.

    Note: configuration for catalog target crawlers will have { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} } by default.

    EventQueueArn string
    A valid Amazon SQS ARN.
    databaseName String
    The name of the Glue database to be synchronized.
    tables List<String>
    A list of catalog tables to be synchronized.
    connectionName String
    The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORK Connection type.
    dlqEventQueueArn String

    A valid Amazon SQS ARN.

    Note: deletion_behavior of catalog target doesn't support DEPRECATE_IN_DATABASE.

    Note: configuration for catalog target crawlers will have { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} } by default.

    eventQueueArn String
    A valid Amazon SQS ARN.
    databaseName string
    The name of the Glue database to be synchronized.
    tables string[]
    A list of catalog tables to be synchronized.
    connectionName string
    The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORK Connection type.
    dlqEventQueueArn string

    A valid Amazon SQS ARN.

    Note: deletion_behavior of catalog target doesn't support DEPRECATE_IN_DATABASE.

    Note: configuration for catalog target crawlers will have { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} } by default.

    eventQueueArn string
    A valid Amazon SQS ARN.
    database_name str
    The name of the Glue database to be synchronized.
    tables Sequence[str]
    A list of catalog tables to be synchronized.
    connection_name str
    The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORK Connection type.
    dlq_event_queue_arn str

    A valid Amazon SQS ARN.

    Note: deletion_behavior of catalog target doesn't support DEPRECATE_IN_DATABASE.

    Note: configuration for catalog target crawlers will have { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} } by default.

    event_queue_arn str
    A valid Amazon SQS ARN.
    databaseName String
    The name of the Glue database to be synchronized.
    tables List<String>
    A list of catalog tables to be synchronized.
    connectionName String
    The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORK Connection type.
    dlqEventQueueArn String

    A valid Amazon SQS ARN.

    Note: deletion_behavior of catalog target doesn't support DEPRECATE_IN_DATABASE.

    Note: configuration for catalog target crawlers will have { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} } by default.

    eventQueueArn String
    A valid Amazon SQS ARN.

    CrawlerDeltaTarget, CrawlerDeltaTargetArgs

    DeltaTables List<string>
    A list of the Amazon S3 paths to the Delta tables.
    WriteManifest bool
    Specifies whether to write the manifest files to the Delta table path.
    ConnectionName string
    The name of the connection to use to connect to the Delta table target.
    CreateNativeDeltaTable bool
    Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
    DeltaTables []string
    A list of the Amazon S3 paths to the Delta tables.
    WriteManifest bool
    Specifies whether to write the manifest files to the Delta table path.
    ConnectionName string
    The name of the connection to use to connect to the Delta table target.
    CreateNativeDeltaTable bool
    Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
    deltaTables List<String>
    A list of the Amazon S3 paths to the Delta tables.
    writeManifest Boolean
    Specifies whether to write the manifest files to the Delta table path.
    connectionName String
    The name of the connection to use to connect to the Delta table target.
    createNativeDeltaTable Boolean
    Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
    deltaTables string[]
    A list of the Amazon S3 paths to the Delta tables.
    writeManifest boolean
    Specifies whether to write the manifest files to the Delta table path.
    connectionName string
    The name of the connection to use to connect to the Delta table target.
    createNativeDeltaTable boolean
    Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
    delta_tables Sequence[str]
    A list of the Amazon S3 paths to the Delta tables.
    write_manifest bool
    Specifies whether to write the manifest files to the Delta table path.
    connection_name str
    The name of the connection to use to connect to the Delta table target.
    create_native_delta_table bool
    Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
    deltaTables List<String>
    A list of the Amazon S3 paths to the Delta tables.
    writeManifest Boolean
    Specifies whether to write the manifest files to the Delta table path.
    connectionName String
    The name of the connection to use to connect to the Delta table target.
    createNativeDeltaTable Boolean
    Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.

    CrawlerDynamodbTarget, CrawlerDynamodbTargetArgs

    Path string
    The name of the DynamoDB table to crawl.
    ScanAll bool
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
    ScanRate double
    The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
    Path string
    The name of the DynamoDB table to crawl.
    ScanAll bool
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
    ScanRate float64
    The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
    path String
    The name of the DynamoDB table to crawl.
    scanAll Boolean
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
    scanRate Double
    The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
    path string
    The name of the DynamoDB table to crawl.
    scanAll boolean
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
    scanRate number
    The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
    path str
    The name of the DynamoDB table to crawl.
    scan_all bool
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
    scan_rate float
    The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
    path String
    The name of the DynamoDB table to crawl.
    scanAll Boolean
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
    scanRate Number
    The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.

    CrawlerHudiTarget, CrawlerHudiTargetArgs

    MaximumTraversalDepth int
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    Paths List<string>
    One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
    ConnectionName string
    The name of the connection to use to connect to the Hudi target.
    Exclusions List<string>
    A list of glob patterns used to exclude from the crawl.
    MaximumTraversalDepth int
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    Paths []string
    One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
    ConnectionName string
    The name of the connection to use to connect to the Hudi target.
    Exclusions []string
    A list of glob patterns used to exclude from the crawl.
    maximumTraversalDepth Integer
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    paths List<String>
    One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
    connectionName String
    The name of the connection to use to connect to the Hudi target.
    exclusions List<String>
    A list of glob patterns used to exclude from the crawl.
    maximumTraversalDepth number
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    paths string[]
    One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
    connectionName string
    The name of the connection to use to connect to the Hudi target.
    exclusions string[]
    A list of glob patterns used to exclude from the crawl.
    maximum_traversal_depth int
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    paths Sequence[str]
    One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
    connection_name str
    The name of the connection to use to connect to the Hudi target.
    exclusions Sequence[str]
    A list of glob patterns used to exclude from the crawl.
    maximumTraversalDepth Number
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    paths List<String>
    One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
    connectionName String
    The name of the connection to use to connect to the Hudi target.
    exclusions List<String>
    A list of glob patterns used to exclude from the crawl.

    CrawlerIcebergTarget, CrawlerIcebergTargetArgs

    MaximumTraversalDepth int
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    Paths List<string>
    One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
    ConnectionName string
    The name of the connection to use to connect to the Iceberg target.
    Exclusions List<string>
    A list of glob patterns used to exclude from the crawl.
    MaximumTraversalDepth int
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    Paths []string
    One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
    ConnectionName string
    The name of the connection to use to connect to the Iceberg target.
    Exclusions []string
    A list of glob patterns used to exclude from the crawl.
    maximumTraversalDepth Integer
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    paths List<String>
    One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
    connectionName String
    The name of the connection to use to connect to the Iceberg target.
    exclusions List<String>
    A list of glob patterns used to exclude from the crawl.
    maximumTraversalDepth number
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    paths string[]
    One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
    connectionName string
    The name of the connection to use to connect to the Iceberg target.
    exclusions string[]
    A list of glob patterns used to exclude from the crawl.
    maximum_traversal_depth int
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    paths Sequence[str]
    One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
    connection_name str
    The name of the connection to use to connect to the Iceberg target.
    exclusions Sequence[str]
    A list of glob patterns used to exclude from the crawl.
    maximumTraversalDepth Number
    The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1 and 20.
    paths List<String>
    One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
    connectionName String
    The name of the connection to use to connect to the Iceberg target.
    exclusions List<String>
    A list of glob patterns used to exclude from the crawl.

    CrawlerJdbcTarget, CrawlerJdbcTargetArgs

    ConnectionName string
    The name of the connection to use to connect to the JDBC target.
    Path string
    The path of the JDBC target.
    EnableAdditionalMetadatas List<string>
    Specify a value of RAWTYPES or COMMENTS to enable additional metadata intable responses. RAWTYPES provides the native-level datatype. COMMENTS provides comments associated with a column or table in the database.
    Exclusions List<string>
    A list of glob patterns used to exclude from the crawl.
    ConnectionName string
    The name of the connection to use to connect to the JDBC target.
    Path string
    The path of the JDBC target.
    EnableAdditionalMetadatas []string
    Specify a value of RAWTYPES or COMMENTS to enable additional metadata intable responses. RAWTYPES provides the native-level datatype. COMMENTS provides comments associated with a column or table in the database.
    Exclusions []string
    A list of glob patterns used to exclude from the crawl.
    connectionName String
    The name of the connection to use to connect to the JDBC target.
    path String
    The path of the JDBC target.
    enableAdditionalMetadatas List<String>
    Specify a value of RAWTYPES or COMMENTS to enable additional metadata intable responses. RAWTYPES provides the native-level datatype. COMMENTS provides comments associated with a column or table in the database.
    exclusions List<String>
    A list of glob patterns used to exclude from the crawl.
    connectionName string
    The name of the connection to use to connect to the JDBC target.
    path string
    The path of the JDBC target.
    enableAdditionalMetadatas string[]
    Specify a value of RAWTYPES or COMMENTS to enable additional metadata intable responses. RAWTYPES provides the native-level datatype. COMMENTS provides comments associated with a column or table in the database.
    exclusions string[]
    A list of glob patterns used to exclude from the crawl.
    connection_name str
    The name of the connection to use to connect to the JDBC target.
    path str
    The path of the JDBC target.
    enable_additional_metadatas Sequence[str]
    Specify a value of RAWTYPES or COMMENTS to enable additional metadata intable responses. RAWTYPES provides the native-level datatype. COMMENTS provides comments associated with a column or table in the database.
    exclusions Sequence[str]
    A list of glob patterns used to exclude from the crawl.
    connectionName String
    The name of the connection to use to connect to the JDBC target.
    path String
    The path of the JDBC target.
    enableAdditionalMetadatas List<String>
    Specify a value of RAWTYPES or COMMENTS to enable additional metadata intable responses. RAWTYPES provides the native-level datatype. COMMENTS provides comments associated with a column or table in the database.
    exclusions List<String>
    A list of glob patterns used to exclude from the crawl.

    CrawlerLakeFormationConfiguration, CrawlerLakeFormationConfigurationArgs

    AccountId string
    Required for cross account crawls. For same account crawls as the target data, this can omitted.
    UseLakeFormationCredentials bool
    Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
    AccountId string
    Required for cross account crawls. For same account crawls as the target data, this can omitted.
    UseLakeFormationCredentials bool
    Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
    accountId String
    Required for cross account crawls. For same account crawls as the target data, this can omitted.
    useLakeFormationCredentials Boolean
    Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
    accountId string
    Required for cross account crawls. For same account crawls as the target data, this can omitted.
    useLakeFormationCredentials boolean
    Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
    account_id str
    Required for cross account crawls. For same account crawls as the target data, this can omitted.
    use_lake_formation_credentials bool
    Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
    accountId String
    Required for cross account crawls. For same account crawls as the target data, this can omitted.
    useLakeFormationCredentials Boolean
    Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.

    CrawlerLineageConfiguration, CrawlerLineageConfigurationArgs

    CrawlerLineageSettings string
    Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is DISABLE.
    CrawlerLineageSettings string
    Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is DISABLE.
    crawlerLineageSettings String
    Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is DISABLE.
    crawlerLineageSettings string
    Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is DISABLE.
    crawler_lineage_settings str
    Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is DISABLE.
    crawlerLineageSettings String
    Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLE and DISABLE. Default value is DISABLE.

    CrawlerMongodbTarget, CrawlerMongodbTargetArgs

    ConnectionName string
    The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
    Path string
    The path of the Amazon DocumentDB or MongoDB target (database/collection).
    ScanAll bool
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
    ConnectionName string
    The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
    Path string
    The path of the Amazon DocumentDB or MongoDB target (database/collection).
    ScanAll bool
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
    connectionName String
    The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
    path String
    The path of the Amazon DocumentDB or MongoDB target (database/collection).
    scanAll Boolean
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
    connectionName string
    The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
    path string
    The path of the Amazon DocumentDB or MongoDB target (database/collection).
    scanAll boolean
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
    connection_name str
    The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
    path str
    The path of the Amazon DocumentDB or MongoDB target (database/collection).
    scan_all bool
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
    connectionName String
    The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
    path String
    The path of the Amazon DocumentDB or MongoDB target (database/collection).
    scanAll Boolean
    Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.

    CrawlerRecrawlPolicy, CrawlerRecrawlPolicyArgs

    RecrawlBehavior string
    Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.
    RecrawlBehavior string
    Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.
    recrawlBehavior String
    Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.
    recrawlBehavior string
    Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.
    recrawl_behavior str
    Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.
    recrawlBehavior String
    Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE, CRAWL_EVERYTHING and CRAWL_NEW_FOLDERS_ONLY. Default value is CRAWL_EVERYTHING.

    CrawlerS3Target, CrawlerS3TargetArgs

    Path string
    The path to the Amazon S3 target.
    ConnectionName string
    The name of a connection which allows crawler to access data in S3 within a VPC.
    DlqEventQueueArn string
    The ARN of the dead-letter SQS queue.
    EventQueueArn string
    The ARN of the SQS queue to receive S3 notifications from.
    Exclusions List<string>
    A list of glob patterns used to exclude from the crawl.
    SampleSize int
    Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
    Path string
    The path to the Amazon S3 target.
    ConnectionName string
    The name of a connection which allows crawler to access data in S3 within a VPC.
    DlqEventQueueArn string
    The ARN of the dead-letter SQS queue.
    EventQueueArn string
    The ARN of the SQS queue to receive S3 notifications from.
    Exclusions []string
    A list of glob patterns used to exclude from the crawl.
    SampleSize int
    Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
    path String
    The path to the Amazon S3 target.
    connectionName String
    The name of a connection which allows crawler to access data in S3 within a VPC.
    dlqEventQueueArn String
    The ARN of the dead-letter SQS queue.
    eventQueueArn String
    The ARN of the SQS queue to receive S3 notifications from.
    exclusions List<String>
    A list of glob patterns used to exclude from the crawl.
    sampleSize Integer
    Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
    path string
    The path to the Amazon S3 target.
    connectionName string
    The name of a connection which allows crawler to access data in S3 within a VPC.
    dlqEventQueueArn string
    The ARN of the dead-letter SQS queue.
    eventQueueArn string
    The ARN of the SQS queue to receive S3 notifications from.
    exclusions string[]
    A list of glob patterns used to exclude from the crawl.
    sampleSize number
    Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
    path str
    The path to the Amazon S3 target.
    connection_name str
    The name of a connection which allows crawler to access data in S3 within a VPC.
    dlq_event_queue_arn str
    The ARN of the dead-letter SQS queue.
    event_queue_arn str
    The ARN of the SQS queue to receive S3 notifications from.
    exclusions Sequence[str]
    A list of glob patterns used to exclude from the crawl.
    sample_size int
    Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
    path String
    The path to the Amazon S3 target.
    connectionName String
    The name of a connection which allows crawler to access data in S3 within a VPC.
    dlqEventQueueArn String
    The ARN of the dead-letter SQS queue.
    eventQueueArn String
    The ARN of the SQS queue to receive S3 notifications from.
    exclusions List<String>
    A list of glob patterns used to exclude from the crawl.
    sampleSize Number
    Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.

    CrawlerSchemaChangePolicy, CrawlerSchemaChangePolicyArgs

    DeleteBehavior string
    The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.
    UpdateBehavior string
    The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.
    DeleteBehavior string
    The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.
    UpdateBehavior string
    The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.
    deleteBehavior String
    The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.
    updateBehavior String
    The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.
    deleteBehavior string
    The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.
    updateBehavior string
    The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.
    delete_behavior str
    The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.
    update_behavior str
    The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.
    deleteBehavior String
    The deletion behavior when the crawler finds a deleted object. Valid values: LOG, DELETE_FROM_DATABASE, or DEPRECATE_IN_DATABASE. Defaults to DEPRECATE_IN_DATABASE.
    updateBehavior String
    The update behavior when the crawler finds a changed schema. Valid values: LOG or UPDATE_IN_DATABASE. Defaults to UPDATE_IN_DATABASE.

    Import

    Using pulumi import, import Glue Crawlers using name. For example:

    $ pulumi import aws:glue/crawler:Crawler MyJob MyJob
    

    To learn more about importing existing cloud resources, see Importing resources.

    Package Details

    Repository
    AWS Classic pulumi/pulumi-aws
    License
    Apache-2.0
    Notes
    This Pulumi package is based on the aws Terraform Provider.
    aws logo
    AWS v6.60.0 published on Tuesday, Nov 19, 2024 by Pulumi