databricks.LakehouseMonitor
Explore with Pulumi AI
NOTE: This resource has been deprecated and will be removed soon. Please use the databricks.QualityMonitor resource instead.
This resource allows you to manage Lakehouse Monitors in Databricks.
A databricks.LakehouseMonitor
is attached to a databricks.SqlTable and can be of type timeseries, snapshot or inference.
Example Usage
import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";
const sandbox = new databricks.Catalog("sandbox", {
name: "sandbox",
comment: "this catalog is managed by terraform",
properties: {
purpose: "testing",
},
});
const things = new databricks.Schema("things", {
catalogName: sandbox.id,
name: "things",
comment: "this database is managed by terraform",
properties: {
kind: "various",
},
});
const myTestTable = new databricks.SqlTable("myTestTable", {
catalogName: "main",
schemaName: things.name,
name: "bar",
tableType: "MANAGED",
dataSourceFormat: "DELTA",
columns: [{
name: "timestamp",
type: "int",
}],
});
const testTimeseriesMonitor = new databricks.LakehouseMonitor("testTimeseriesMonitor", {
tableName: pulumi.interpolate`${sandbox.name}.${things.name}.${myTestTable.name}`,
assetsDir: pulumi.interpolate`/Shared/provider-test/databricks_lakehouse_monitoring/${myTestTable.name}`,
outputSchemaName: pulumi.interpolate`${sandbox.name}.${things.name}`,
timeSeries: {
granularities: ["1 hour"],
timestampCol: "timestamp",
},
});
import pulumi
import pulumi_databricks as databricks
sandbox = databricks.Catalog("sandbox",
name="sandbox",
comment="this catalog is managed by terraform",
properties={
"purpose": "testing",
})
things = databricks.Schema("things",
catalog_name=sandbox.id,
name="things",
comment="this database is managed by terraform",
properties={
"kind": "various",
})
my_test_table = databricks.SqlTable("myTestTable",
catalog_name="main",
schema_name=things.name,
name="bar",
table_type="MANAGED",
data_source_format="DELTA",
columns=[{
"name": "timestamp",
"type": "int",
}])
test_timeseries_monitor = databricks.LakehouseMonitor("testTimeseriesMonitor",
table_name=pulumi.Output.all(
sandboxName=sandbox.name,
thingsName=things.name,
myTestTableName=my_test_table.name
).apply(lambda resolved_outputs: f"{resolved_outputs['sandboxName']}.{resolved_outputs['thingsName']}.{resolved_outputs['myTestTableName']}")
,
assets_dir=my_test_table.name.apply(lambda name: f"/Shared/provider-test/databricks_lakehouse_monitoring/{name}"),
output_schema_name=pulumi.Output.all(
sandboxName=sandbox.name,
thingsName=things.name
).apply(lambda resolved_outputs: f"{resolved_outputs['sandboxName']}.{resolved_outputs['thingsName']}")
,
time_series={
"granularities": ["1 hour"],
"timestamp_col": "timestamp",
})
package main
import (
"fmt"
"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
sandbox, err := databricks.NewCatalog(ctx, "sandbox", &databricks.CatalogArgs{
Name: pulumi.String("sandbox"),
Comment: pulumi.String("this catalog is managed by terraform"),
Properties: pulumi.StringMap{
"purpose": pulumi.String("testing"),
},
})
if err != nil {
return err
}
things, err := databricks.NewSchema(ctx, "things", &databricks.SchemaArgs{
CatalogName: sandbox.ID(),
Name: pulumi.String("things"),
Comment: pulumi.String("this database is managed by terraform"),
Properties: pulumi.StringMap{
"kind": pulumi.String("various"),
},
})
if err != nil {
return err
}
myTestTable, err := databricks.NewSqlTable(ctx, "myTestTable", &databricks.SqlTableArgs{
CatalogName: pulumi.String("main"),
SchemaName: things.Name,
Name: pulumi.String("bar"),
TableType: pulumi.String("MANAGED"),
DataSourceFormat: pulumi.String("DELTA"),
Columns: databricks.SqlTableColumnArray{
&databricks.SqlTableColumnArgs{
Name: pulumi.String("timestamp"),
Type: pulumi.String("int"),
},
},
})
if err != nil {
return err
}
_, err = databricks.NewLakehouseMonitor(ctx, "testTimeseriesMonitor", &databricks.LakehouseMonitorArgs{
TableName: pulumi.All(sandbox.Name, things.Name, myTestTable.Name).ApplyT(func(_args []interface{}) (string, error) {
sandboxName := _args[0].(string)
thingsName := _args[1].(string)
myTestTableName := _args[2].(string)
return fmt.Sprintf("%v.%v.%v", sandboxName, thingsName, myTestTableName), nil
}).(pulumi.StringOutput),
AssetsDir: myTestTable.Name.ApplyT(func(name string) (string, error) {
return fmt.Sprintf("/Shared/provider-test/databricks_lakehouse_monitoring/%v", name), nil
}).(pulumi.StringOutput),
OutputSchemaName: pulumi.All(sandbox.Name, things.Name).ApplyT(func(_args []interface{}) (string, error) {
sandboxName := _args[0].(string)
thingsName := _args[1].(string)
return fmt.Sprintf("%v.%v", sandboxName, thingsName), nil
}).(pulumi.StringOutput),
TimeSeries: &databricks.LakehouseMonitorTimeSeriesArgs{
Granularities: pulumi.StringArray{
pulumi.String("1 hour"),
},
TimestampCol: pulumi.String("timestamp"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;
return await Deployment.RunAsync(() =>
{
var sandbox = new Databricks.Catalog("sandbox", new()
{
Name = "sandbox",
Comment = "this catalog is managed by terraform",
Properties =
{
{ "purpose", "testing" },
},
});
var things = new Databricks.Schema("things", new()
{
CatalogName = sandbox.Id,
Name = "things",
Comment = "this database is managed by terraform",
Properties =
{
{ "kind", "various" },
},
});
var myTestTable = new Databricks.SqlTable("myTestTable", new()
{
CatalogName = "main",
SchemaName = things.Name,
Name = "bar",
TableType = "MANAGED",
DataSourceFormat = "DELTA",
Columns = new[]
{
new Databricks.Inputs.SqlTableColumnArgs
{
Name = "timestamp",
Type = "int",
},
},
});
var testTimeseriesMonitor = new Databricks.LakehouseMonitor("testTimeseriesMonitor", new()
{
TableName = Output.Tuple(sandbox.Name, things.Name, myTestTable.Name).Apply(values =>
{
var sandboxName = values.Item1;
var thingsName = values.Item2;
var myTestTableName = values.Item3;
return $"{sandboxName}.{thingsName}.{myTestTableName}";
}),
AssetsDir = myTestTable.Name.Apply(name => $"/Shared/provider-test/databricks_lakehouse_monitoring/{name}"),
OutputSchemaName = Output.Tuple(sandbox.Name, things.Name).Apply(values =>
{
var sandboxName = values.Item1;
var thingsName = values.Item2;
return $"{sandboxName}.{thingsName}";
}),
TimeSeries = new Databricks.Inputs.LakehouseMonitorTimeSeriesArgs
{
Granularities = new[]
{
"1 hour",
},
TimestampCol = "timestamp",
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.Catalog;
import com.pulumi.databricks.CatalogArgs;
import com.pulumi.databricks.Schema;
import com.pulumi.databricks.SchemaArgs;
import com.pulumi.databricks.SqlTable;
import com.pulumi.databricks.SqlTableArgs;
import com.pulumi.databricks.inputs.SqlTableColumnArgs;
import com.pulumi.databricks.LakehouseMonitor;
import com.pulumi.databricks.LakehouseMonitorArgs;
import com.pulumi.databricks.inputs.LakehouseMonitorTimeSeriesArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var sandbox = new Catalog("sandbox", CatalogArgs.builder()
.name("sandbox")
.comment("this catalog is managed by terraform")
.properties(Map.of("purpose", "testing"))
.build());
var things = new Schema("things", SchemaArgs.builder()
.catalogName(sandbox.id())
.name("things")
.comment("this database is managed by terraform")
.properties(Map.of("kind", "various"))
.build());
var myTestTable = new SqlTable("myTestTable", SqlTableArgs.builder()
.catalogName("main")
.schemaName(things.name())
.name("bar")
.tableType("MANAGED")
.dataSourceFormat("DELTA")
.columns(SqlTableColumnArgs.builder()
.name("timestamp")
.type("int")
.build())
.build());
var testTimeseriesMonitor = new LakehouseMonitor("testTimeseriesMonitor", LakehouseMonitorArgs.builder()
.tableName(Output.tuple(sandbox.name(), things.name(), myTestTable.name()).applyValue(values -> {
var sandboxName = values.t1;
var thingsName = values.t2;
var myTestTableName = values.t3;
return String.format("%s.%s.%s", sandboxName,thingsName,myTestTableName);
}))
.assetsDir(myTestTable.name().applyValue(name -> String.format("/Shared/provider-test/databricks_lakehouse_monitoring/%s", name)))
.outputSchemaName(Output.tuple(sandbox.name(), things.name()).applyValue(values -> {
var sandboxName = values.t1;
var thingsName = values.t2;
return String.format("%s.%s", sandboxName,thingsName);
}))
.timeSeries(LakehouseMonitorTimeSeriesArgs.builder()
.granularities("1 hour")
.timestampCol("timestamp")
.build())
.build());
}
}
resources:
sandbox:
type: databricks:Catalog
properties:
name: sandbox
comment: this catalog is managed by terraform
properties:
purpose: testing
things:
type: databricks:Schema
properties:
catalogName: ${sandbox.id}
name: things
comment: this database is managed by terraform
properties:
kind: various
myTestTable:
type: databricks:SqlTable
properties:
catalogName: main
schemaName: ${things.name}
name: bar
tableType: MANAGED
dataSourceFormat: DELTA
columns:
- name: timestamp
type: int
testTimeseriesMonitor:
type: databricks:LakehouseMonitor
properties:
tableName: ${sandbox.name}.${things.name}.${myTestTable.name}
assetsDir: /Shared/provider-test/databricks_lakehouse_monitoring/${myTestTable.name}
outputSchemaName: ${sandbox.name}.${things.name}
timeSeries:
granularities:
- 1 hour
timestampCol: timestamp
Inference Monitor
import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";
const testMonitorInference = new databricks.LakehouseMonitor("testMonitorInference", {
tableName: `${sandbox.name}.${things.name}.${myTestTable.name}`,
assetsDir: `/Shared/provider-test/databricks_lakehouse_monitoring/${myTestTable.name}`,
outputSchemaName: `${sandbox.name}.${things.name}`,
inferenceLog: {
granularities: ["1 hour"],
timestampCol: "timestamp",
predictionCol: "prediction",
modelIdCol: "model_id",
problemType: "PROBLEM_TYPE_REGRESSION",
},
});
import pulumi
import pulumi_databricks as databricks
test_monitor_inference = databricks.LakehouseMonitor("testMonitorInference",
table_name=f"{sandbox['name']}.{things['name']}.{my_test_table['name']}",
assets_dir=f"/Shared/provider-test/databricks_lakehouse_monitoring/{my_test_table['name']}",
output_schema_name=f"{sandbox['name']}.{things['name']}",
inference_log={
"granularities": ["1 hour"],
"timestamp_col": "timestamp",
"prediction_col": "prediction",
"model_id_col": "model_id",
"problem_type": "PROBLEM_TYPE_REGRESSION",
})
package main
import (
"fmt"
"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := databricks.NewLakehouseMonitor(ctx, "testMonitorInference", &databricks.LakehouseMonitorArgs{
TableName: pulumi.Sprintf("%v.%v.%v", sandbox.Name, things.Name, myTestTable.Name),
AssetsDir: pulumi.Sprintf("/Shared/provider-test/databricks_lakehouse_monitoring/%v", myTestTable.Name),
OutputSchemaName: pulumi.Sprintf("%v.%v", sandbox.Name, things.Name),
InferenceLog: &databricks.LakehouseMonitorInferenceLogArgs{
Granularities: pulumi.StringArray{
pulumi.String("1 hour"),
},
TimestampCol: pulumi.String("timestamp"),
PredictionCol: pulumi.String("prediction"),
ModelIdCol: pulumi.String("model_id"),
ProblemType: pulumi.String("PROBLEM_TYPE_REGRESSION"),
},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;
return await Deployment.RunAsync(() =>
{
var testMonitorInference = new Databricks.LakehouseMonitor("testMonitorInference", new()
{
TableName = $"{sandbox.Name}.{things.Name}.{myTestTable.Name}",
AssetsDir = $"/Shared/provider-test/databricks_lakehouse_monitoring/{myTestTable.Name}",
OutputSchemaName = $"{sandbox.Name}.{things.Name}",
InferenceLog = new Databricks.Inputs.LakehouseMonitorInferenceLogArgs
{
Granularities = new[]
{
"1 hour",
},
TimestampCol = "timestamp",
PredictionCol = "prediction",
ModelIdCol = "model_id",
ProblemType = "PROBLEM_TYPE_REGRESSION",
},
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.LakehouseMonitor;
import com.pulumi.databricks.LakehouseMonitorArgs;
import com.pulumi.databricks.inputs.LakehouseMonitorInferenceLogArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var testMonitorInference = new LakehouseMonitor("testMonitorInference", LakehouseMonitorArgs.builder()
.tableName(String.format("%s.%s.%s", sandbox.name(),things.name(),myTestTable.name()))
.assetsDir(String.format("/Shared/provider-test/databricks_lakehouse_monitoring/%s", myTestTable.name()))
.outputSchemaName(String.format("%s.%s", sandbox.name(),things.name()))
.inferenceLog(LakehouseMonitorInferenceLogArgs.builder()
.granularities("1 hour")
.timestampCol("timestamp")
.predictionCol("prediction")
.modelIdCol("model_id")
.problemType("PROBLEM_TYPE_REGRESSION")
.build())
.build());
}
}
resources:
testMonitorInference:
type: databricks:LakehouseMonitor
properties:
tableName: ${sandbox.name}.${things.name}.${myTestTable.name}
assetsDir: /Shared/provider-test/databricks_lakehouse_monitoring/${myTestTable.name}
outputSchemaName: ${sandbox.name}.${things.name}
inferenceLog:
granularities:
- 1 hour
timestampCol: timestamp
predictionCol: prediction
modelIdCol: model_id
problemType: PROBLEM_TYPE_REGRESSION
Snapshot Monitor
import * as pulumi from "@pulumi/pulumi";
import * as databricks from "@pulumi/databricks";
const testMonitorInference = new databricks.LakehouseMonitor("testMonitorInference", {
tableName: `${sandbox.name}.${things.name}.${myTestTable.name}`,
assetsDir: `/Shared/provider-test/databricks_lakehouse_monitoring/${myTestTable.name}`,
outputSchemaName: `${sandbox.name}.${things.name}`,
snapshot: {},
});
import pulumi
import pulumi_databricks as databricks
test_monitor_inference = databricks.LakehouseMonitor("testMonitorInference",
table_name=f"{sandbox['name']}.{things['name']}.{my_test_table['name']}",
assets_dir=f"/Shared/provider-test/databricks_lakehouse_monitoring/{my_test_table['name']}",
output_schema_name=f"{sandbox['name']}.{things['name']}",
snapshot={})
package main
import (
"fmt"
"github.com/pulumi/pulumi-databricks/sdk/go/databricks"
"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
pulumi.Run(func(ctx *pulumi.Context) error {
_, err := databricks.NewLakehouseMonitor(ctx, "testMonitorInference", &databricks.LakehouseMonitorArgs{
TableName: pulumi.Sprintf("%v.%v.%v", sandbox.Name, things.Name, myTestTable.Name),
AssetsDir: pulumi.Sprintf("/Shared/provider-test/databricks_lakehouse_monitoring/%v", myTestTable.Name),
OutputSchemaName: pulumi.Sprintf("%v.%v", sandbox.Name, things.Name),
Snapshot: &databricks.LakehouseMonitorSnapshotArgs{},
})
if err != nil {
return err
}
return nil
})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Databricks = Pulumi.Databricks;
return await Deployment.RunAsync(() =>
{
var testMonitorInference = new Databricks.LakehouseMonitor("testMonitorInference", new()
{
TableName = $"{sandbox.Name}.{things.Name}.{myTestTable.Name}",
AssetsDir = $"/Shared/provider-test/databricks_lakehouse_monitoring/{myTestTable.Name}",
OutputSchemaName = $"{sandbox.Name}.{things.Name}",
Snapshot = null,
});
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.databricks.LakehouseMonitor;
import com.pulumi.databricks.LakehouseMonitorArgs;
import com.pulumi.databricks.inputs.LakehouseMonitorSnapshotArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var testMonitorInference = new LakehouseMonitor("testMonitorInference", LakehouseMonitorArgs.builder()
.tableName(String.format("%s.%s.%s", sandbox.name(),things.name(),myTestTable.name()))
.assetsDir(String.format("/Shared/provider-test/databricks_lakehouse_monitoring/%s", myTestTable.name()))
.outputSchemaName(String.format("%s.%s", sandbox.name(),things.name()))
.snapshot()
.build());
}
}
resources:
testMonitorInference:
type: databricks:LakehouseMonitor
properties:
tableName: ${sandbox.name}.${things.name}.${myTestTable.name}
assetsDir: /Shared/provider-test/databricks_lakehouse_monitoring/${myTestTable.name}
outputSchemaName: ${sandbox.name}.${things.name}
snapshot: {}
Related Resources
The following resources are often used in the same context:
- databricks.Catalog
- databricks.Schema
- databricks.SqlTable
Create LakehouseMonitor Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new LakehouseMonitor(name: string, args: LakehouseMonitorArgs, opts?: CustomResourceOptions);
@overload
def LakehouseMonitor(resource_name: str,
args: LakehouseMonitorArgs,
opts: Optional[ResourceOptions] = None)
@overload
def LakehouseMonitor(resource_name: str,
opts: Optional[ResourceOptions] = None,
assets_dir: Optional[str] = None,
table_name: Optional[str] = None,
output_schema_name: Optional[str] = None,
notifications: Optional[LakehouseMonitorNotificationsArgs] = None,
inference_log: Optional[LakehouseMonitorInferenceLogArgs] = None,
latest_monitor_failure_msg: Optional[str] = None,
data_classification_config: Optional[LakehouseMonitorDataClassificationConfigArgs] = None,
custom_metrics: Optional[Sequence[LakehouseMonitorCustomMetricArgs]] = None,
schedule: Optional[LakehouseMonitorScheduleArgs] = None,
skip_builtin_dashboard: Optional[bool] = None,
slicing_exprs: Optional[Sequence[str]] = None,
snapshot: Optional[LakehouseMonitorSnapshotArgs] = None,
baseline_table_name: Optional[str] = None,
time_series: Optional[LakehouseMonitorTimeSeriesArgs] = None,
warehouse_id: Optional[str] = None)
func NewLakehouseMonitor(ctx *Context, name string, args LakehouseMonitorArgs, opts ...ResourceOption) (*LakehouseMonitor, error)
public LakehouseMonitor(string name, LakehouseMonitorArgs args, CustomResourceOptions? opts = null)
public LakehouseMonitor(String name, LakehouseMonitorArgs args)
public LakehouseMonitor(String name, LakehouseMonitorArgs args, CustomResourceOptions options)
type: databricks:LakehouseMonitor
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args LakehouseMonitorArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args LakehouseMonitorArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args LakehouseMonitorArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args LakehouseMonitorArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args LakehouseMonitorArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var lakehouseMonitorResource = new Databricks.LakehouseMonitor("lakehouseMonitorResource", new()
{
AssetsDir = "string",
TableName = "string",
OutputSchemaName = "string",
Notifications = new Databricks.Inputs.LakehouseMonitorNotificationsArgs
{
OnFailure = new Databricks.Inputs.LakehouseMonitorNotificationsOnFailureArgs
{
EmailAddresses = new[]
{
"string",
},
},
OnNewClassificationTagDetected = new Databricks.Inputs.LakehouseMonitorNotificationsOnNewClassificationTagDetectedArgs
{
EmailAddresses = new[]
{
"string",
},
},
},
InferenceLog = new Databricks.Inputs.LakehouseMonitorInferenceLogArgs
{
Granularities = new[]
{
"string",
},
ModelIdCol = "string",
PredictionCol = "string",
ProblemType = "string",
TimestampCol = "string",
LabelCol = "string",
PredictionProbaCol = "string",
},
LatestMonitorFailureMsg = "string",
DataClassificationConfig = new Databricks.Inputs.LakehouseMonitorDataClassificationConfigArgs
{
Enabled = false,
},
CustomMetrics = new[]
{
new Databricks.Inputs.LakehouseMonitorCustomMetricArgs
{
Definition = "string",
InputColumns = new[]
{
"string",
},
Name = "string",
OutputDataType = "string",
Type = "string",
},
},
Schedule = new Databricks.Inputs.LakehouseMonitorScheduleArgs
{
QuartzCronExpression = "string",
TimezoneId = "string",
PauseStatus = "string",
},
SkipBuiltinDashboard = false,
SlicingExprs = new[]
{
"string",
},
Snapshot = null,
BaselineTableName = "string",
TimeSeries = new Databricks.Inputs.LakehouseMonitorTimeSeriesArgs
{
Granularities = new[]
{
"string",
},
TimestampCol = "string",
},
WarehouseId = "string",
});
example, err := databricks.NewLakehouseMonitor(ctx, "lakehouseMonitorResource", &databricks.LakehouseMonitorArgs{
AssetsDir: pulumi.String("string"),
TableName: pulumi.String("string"),
OutputSchemaName: pulumi.String("string"),
Notifications: &databricks.LakehouseMonitorNotificationsArgs{
OnFailure: &databricks.LakehouseMonitorNotificationsOnFailureArgs{
EmailAddresses: pulumi.StringArray{
pulumi.String("string"),
},
},
OnNewClassificationTagDetected: &databricks.LakehouseMonitorNotificationsOnNewClassificationTagDetectedArgs{
EmailAddresses: pulumi.StringArray{
pulumi.String("string"),
},
},
},
InferenceLog: &databricks.LakehouseMonitorInferenceLogArgs{
Granularities: pulumi.StringArray{
pulumi.String("string"),
},
ModelIdCol: pulumi.String("string"),
PredictionCol: pulumi.String("string"),
ProblemType: pulumi.String("string"),
TimestampCol: pulumi.String("string"),
LabelCol: pulumi.String("string"),
PredictionProbaCol: pulumi.String("string"),
},
LatestMonitorFailureMsg: pulumi.String("string"),
DataClassificationConfig: &databricks.LakehouseMonitorDataClassificationConfigArgs{
Enabled: pulumi.Bool(false),
},
CustomMetrics: databricks.LakehouseMonitorCustomMetricArray{
&databricks.LakehouseMonitorCustomMetricArgs{
Definition: pulumi.String("string"),
InputColumns: pulumi.StringArray{
pulumi.String("string"),
},
Name: pulumi.String("string"),
OutputDataType: pulumi.String("string"),
Type: pulumi.String("string"),
},
},
Schedule: &databricks.LakehouseMonitorScheduleArgs{
QuartzCronExpression: pulumi.String("string"),
TimezoneId: pulumi.String("string"),
PauseStatus: pulumi.String("string"),
},
SkipBuiltinDashboard: pulumi.Bool(false),
SlicingExprs: pulumi.StringArray{
pulumi.String("string"),
},
Snapshot: &databricks.LakehouseMonitorSnapshotArgs{},
BaselineTableName: pulumi.String("string"),
TimeSeries: &databricks.LakehouseMonitorTimeSeriesArgs{
Granularities: pulumi.StringArray{
pulumi.String("string"),
},
TimestampCol: pulumi.String("string"),
},
WarehouseId: pulumi.String("string"),
})
var lakehouseMonitorResource = new LakehouseMonitor("lakehouseMonitorResource", LakehouseMonitorArgs.builder()
.assetsDir("string")
.tableName("string")
.outputSchemaName("string")
.notifications(LakehouseMonitorNotificationsArgs.builder()
.onFailure(LakehouseMonitorNotificationsOnFailureArgs.builder()
.emailAddresses("string")
.build())
.onNewClassificationTagDetected(LakehouseMonitorNotificationsOnNewClassificationTagDetectedArgs.builder()
.emailAddresses("string")
.build())
.build())
.inferenceLog(LakehouseMonitorInferenceLogArgs.builder()
.granularities("string")
.modelIdCol("string")
.predictionCol("string")
.problemType("string")
.timestampCol("string")
.labelCol("string")
.predictionProbaCol("string")
.build())
.latestMonitorFailureMsg("string")
.dataClassificationConfig(LakehouseMonitorDataClassificationConfigArgs.builder()
.enabled(false)
.build())
.customMetrics(LakehouseMonitorCustomMetricArgs.builder()
.definition("string")
.inputColumns("string")
.name("string")
.outputDataType("string")
.type("string")
.build())
.schedule(LakehouseMonitorScheduleArgs.builder()
.quartzCronExpression("string")
.timezoneId("string")
.pauseStatus("string")
.build())
.skipBuiltinDashboard(false)
.slicingExprs("string")
.snapshot()
.baselineTableName("string")
.timeSeries(LakehouseMonitorTimeSeriesArgs.builder()
.granularities("string")
.timestampCol("string")
.build())
.warehouseId("string")
.build());
lakehouse_monitor_resource = databricks.LakehouseMonitor("lakehouseMonitorResource",
assets_dir="string",
table_name="string",
output_schema_name="string",
notifications={
"on_failure": {
"email_addresses": ["string"],
},
"on_new_classification_tag_detected": {
"email_addresses": ["string"],
},
},
inference_log={
"granularities": ["string"],
"model_id_col": "string",
"prediction_col": "string",
"problem_type": "string",
"timestamp_col": "string",
"label_col": "string",
"prediction_proba_col": "string",
},
latest_monitor_failure_msg="string",
data_classification_config={
"enabled": False,
},
custom_metrics=[{
"definition": "string",
"input_columns": ["string"],
"name": "string",
"output_data_type": "string",
"type": "string",
}],
schedule={
"quartz_cron_expression": "string",
"timezone_id": "string",
"pause_status": "string",
},
skip_builtin_dashboard=False,
slicing_exprs=["string"],
snapshot={},
baseline_table_name="string",
time_series={
"granularities": ["string"],
"timestamp_col": "string",
},
warehouse_id="string")
const lakehouseMonitorResource = new databricks.LakehouseMonitor("lakehouseMonitorResource", {
assetsDir: "string",
tableName: "string",
outputSchemaName: "string",
notifications: {
onFailure: {
emailAddresses: ["string"],
},
onNewClassificationTagDetected: {
emailAddresses: ["string"],
},
},
inferenceLog: {
granularities: ["string"],
modelIdCol: "string",
predictionCol: "string",
problemType: "string",
timestampCol: "string",
labelCol: "string",
predictionProbaCol: "string",
},
latestMonitorFailureMsg: "string",
dataClassificationConfig: {
enabled: false,
},
customMetrics: [{
definition: "string",
inputColumns: ["string"],
name: "string",
outputDataType: "string",
type: "string",
}],
schedule: {
quartzCronExpression: "string",
timezoneId: "string",
pauseStatus: "string",
},
skipBuiltinDashboard: false,
slicingExprs: ["string"],
snapshot: {},
baselineTableName: "string",
timeSeries: {
granularities: ["string"],
timestampCol: "string",
},
warehouseId: "string",
});
type: databricks:LakehouseMonitor
properties:
assetsDir: string
baselineTableName: string
customMetrics:
- definition: string
inputColumns:
- string
name: string
outputDataType: string
type: string
dataClassificationConfig:
enabled: false
inferenceLog:
granularities:
- string
labelCol: string
modelIdCol: string
predictionCol: string
predictionProbaCol: string
problemType: string
timestampCol: string
latestMonitorFailureMsg: string
notifications:
onFailure:
emailAddresses:
- string
onNewClassificationTagDetected:
emailAddresses:
- string
outputSchemaName: string
schedule:
pauseStatus: string
quartzCronExpression: string
timezoneId: string
skipBuiltinDashboard: false
slicingExprs:
- string
snapshot: {}
tableName: string
timeSeries:
granularities:
- string
timestampCol: string
warehouseId: string
LakehouseMonitor Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The LakehouseMonitor resource accepts the following input properties:
- Assets
Dir string - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- Output
Schema stringName - Schema where output metric tables are created
- Table
Name string - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- Baseline
Table stringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- Custom
Metrics List<LakehouseMonitor Custom Metric> - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- Data
Classification LakehouseConfig Monitor Data Classification Config - The data classification config for the monitor
- Inference
Log LakehouseMonitor Inference Log - Configuration for the inference log monitor
- Latest
Monitor stringFailure Msg - Notifications
Lakehouse
Monitor Notifications - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - Schedule
Lakehouse
Monitor Schedule - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- Skip
Builtin boolDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- Slicing
Exprs List<string> - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- Snapshot
Lakehouse
Monitor Snapshot - Configuration for monitoring snapshot tables.
- Time
Series LakehouseMonitor Time Series - Configuration for monitoring timeseries tables.
- Warehouse
Id string - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- Assets
Dir string - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- Output
Schema stringName - Schema where output metric tables are created
- Table
Name string - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- Baseline
Table stringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- Custom
Metrics []LakehouseMonitor Custom Metric Args - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- Data
Classification LakehouseConfig Monitor Data Classification Config Args - The data classification config for the monitor
- Inference
Log LakehouseMonitor Inference Log Args - Configuration for the inference log monitor
- Latest
Monitor stringFailure Msg - Notifications
Lakehouse
Monitor Notifications Args - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - Schedule
Lakehouse
Monitor Schedule Args - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- Skip
Builtin boolDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- Slicing
Exprs []string - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- Snapshot
Lakehouse
Monitor Snapshot Args - Configuration for monitoring snapshot tables.
- Time
Series LakehouseMonitor Time Series Args - Configuration for monitoring timeseries tables.
- Warehouse
Id string - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- assets
Dir String - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- output
Schema StringName - Schema where output metric tables are created
- table
Name String - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- baseline
Table StringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- custom
Metrics List<LakehouseMonitor Custom Metric> - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- data
Classification LakehouseConfig Monitor Data Classification Config - The data classification config for the monitor
- inference
Log LakehouseMonitor Inference Log - Configuration for the inference log monitor
- latest
Monitor StringFailure Msg - notifications
Lakehouse
Monitor Notifications - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - schedule
Lakehouse
Monitor Schedule - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- skip
Builtin BooleanDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- slicing
Exprs List<String> - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- snapshot
Lakehouse
Monitor Snapshot - Configuration for monitoring snapshot tables.
- time
Series LakehouseMonitor Time Series - Configuration for monitoring timeseries tables.
- warehouse
Id String - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- assets
Dir string - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- output
Schema stringName - Schema where output metric tables are created
- table
Name string - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- baseline
Table stringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- custom
Metrics LakehouseMonitor Custom Metric[] - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- data
Classification LakehouseConfig Monitor Data Classification Config - The data classification config for the monitor
- inference
Log LakehouseMonitor Inference Log - Configuration for the inference log monitor
- latest
Monitor stringFailure Msg - notifications
Lakehouse
Monitor Notifications - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - schedule
Lakehouse
Monitor Schedule - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- skip
Builtin booleanDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- slicing
Exprs string[] - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- snapshot
Lakehouse
Monitor Snapshot - Configuration for monitoring snapshot tables.
- time
Series LakehouseMonitor Time Series - Configuration for monitoring timeseries tables.
- warehouse
Id string - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- assets_
dir str - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- output_
schema_ strname - Schema where output metric tables are created
- table_
name str - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- baseline_
table_ strname - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- custom_
metrics Sequence[LakehouseMonitor Custom Metric Args] - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- data_
classification_ Lakehouseconfig Monitor Data Classification Config Args - The data classification config for the monitor
- inference_
log LakehouseMonitor Inference Log Args - Configuration for the inference log monitor
- latest_
monitor_ strfailure_ msg - notifications
Lakehouse
Monitor Notifications Args - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - schedule
Lakehouse
Monitor Schedule Args - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- skip_
builtin_ booldashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- slicing_
exprs Sequence[str] - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- snapshot
Lakehouse
Monitor Snapshot Args - Configuration for monitoring snapshot tables.
- time_
series LakehouseMonitor Time Series Args - Configuration for monitoring timeseries tables.
- warehouse_
id str - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- assets
Dir String - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- output
Schema StringName - Schema where output metric tables are created
- table
Name String - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- baseline
Table StringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- custom
Metrics List<Property Map> - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- data
Classification Property MapConfig - The data classification config for the monitor
- inference
Log Property Map - Configuration for the inference log monitor
- latest
Monitor StringFailure Msg - notifications Property Map
- The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - schedule Property Map
- The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- skip
Builtin BooleanDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- slicing
Exprs List<String> - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- snapshot Property Map
- Configuration for monitoring snapshot tables.
- time
Series Property Map - Configuration for monitoring timeseries tables.
- warehouse
Id String - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
Outputs
All input properties are implicitly available as output properties. Additionally, the LakehouseMonitor resource produces the following output properties:
- Dashboard
Id string - The ID of the generated dashboard.
- Drift
Metrics stringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- Id string
- The provider-assigned unique ID for this managed resource.
- Monitor
Version string - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- Profile
Metrics stringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- Status string
- Status of the Monitor
- Dashboard
Id string - The ID of the generated dashboard.
- Drift
Metrics stringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- Id string
- The provider-assigned unique ID for this managed resource.
- Monitor
Version string - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- Profile
Metrics stringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- Status string
- Status of the Monitor
- dashboard
Id String - The ID of the generated dashboard.
- drift
Metrics StringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- id String
- The provider-assigned unique ID for this managed resource.
- monitor
Version String - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- profile
Metrics StringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- status String
- Status of the Monitor
- dashboard
Id string - The ID of the generated dashboard.
- drift
Metrics stringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- id string
- The provider-assigned unique ID for this managed resource.
- monitor
Version string - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- profile
Metrics stringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- status string
- Status of the Monitor
- dashboard_
id str - The ID of the generated dashboard.
- drift_
metrics_ strtable_ name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- id str
- The provider-assigned unique ID for this managed resource.
- monitor_
version str - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- profile_
metrics_ strtable_ name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- status str
- Status of the Monitor
- dashboard
Id String - The ID of the generated dashboard.
- drift
Metrics StringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- id String
- The provider-assigned unique ID for this managed resource.
- monitor
Version String - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- profile
Metrics StringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- status String
- Status of the Monitor
Look up Existing LakehouseMonitor Resource
Get an existing LakehouseMonitor resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: LakehouseMonitorState, opts?: CustomResourceOptions): LakehouseMonitor
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
assets_dir: Optional[str] = None,
baseline_table_name: Optional[str] = None,
custom_metrics: Optional[Sequence[LakehouseMonitorCustomMetricArgs]] = None,
dashboard_id: Optional[str] = None,
data_classification_config: Optional[LakehouseMonitorDataClassificationConfigArgs] = None,
drift_metrics_table_name: Optional[str] = None,
inference_log: Optional[LakehouseMonitorInferenceLogArgs] = None,
latest_monitor_failure_msg: Optional[str] = None,
monitor_version: Optional[str] = None,
notifications: Optional[LakehouseMonitorNotificationsArgs] = None,
output_schema_name: Optional[str] = None,
profile_metrics_table_name: Optional[str] = None,
schedule: Optional[LakehouseMonitorScheduleArgs] = None,
skip_builtin_dashboard: Optional[bool] = None,
slicing_exprs: Optional[Sequence[str]] = None,
snapshot: Optional[LakehouseMonitorSnapshotArgs] = None,
status: Optional[str] = None,
table_name: Optional[str] = None,
time_series: Optional[LakehouseMonitorTimeSeriesArgs] = None,
warehouse_id: Optional[str] = None) -> LakehouseMonitor
func GetLakehouseMonitor(ctx *Context, name string, id IDInput, state *LakehouseMonitorState, opts ...ResourceOption) (*LakehouseMonitor, error)
public static LakehouseMonitor Get(string name, Input<string> id, LakehouseMonitorState? state, CustomResourceOptions? opts = null)
public static LakehouseMonitor get(String name, Output<String> id, LakehouseMonitorState state, CustomResourceOptions options)
Resource lookup is not supported in YAML
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Assets
Dir string - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- Baseline
Table stringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- Custom
Metrics List<LakehouseMonitor Custom Metric> - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- Dashboard
Id string - The ID of the generated dashboard.
- Data
Classification LakehouseConfig Monitor Data Classification Config - The data classification config for the monitor
- Drift
Metrics stringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- Inference
Log LakehouseMonitor Inference Log - Configuration for the inference log monitor
- Latest
Monitor stringFailure Msg - Monitor
Version string - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- Notifications
Lakehouse
Monitor Notifications - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - Output
Schema stringName - Schema where output metric tables are created
- Profile
Metrics stringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- Schedule
Lakehouse
Monitor Schedule - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- Skip
Builtin boolDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- Slicing
Exprs List<string> - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- Snapshot
Lakehouse
Monitor Snapshot - Configuration for monitoring snapshot tables.
- Status string
- Status of the Monitor
- Table
Name string - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- Time
Series LakehouseMonitor Time Series - Configuration for monitoring timeseries tables.
- Warehouse
Id string - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- Assets
Dir string - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- Baseline
Table stringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- Custom
Metrics []LakehouseMonitor Custom Metric Args - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- Dashboard
Id string - The ID of the generated dashboard.
- Data
Classification LakehouseConfig Monitor Data Classification Config Args - The data classification config for the monitor
- Drift
Metrics stringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- Inference
Log LakehouseMonitor Inference Log Args - Configuration for the inference log monitor
- Latest
Monitor stringFailure Msg - Monitor
Version string - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- Notifications
Lakehouse
Monitor Notifications Args - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - Output
Schema stringName - Schema where output metric tables are created
- Profile
Metrics stringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- Schedule
Lakehouse
Monitor Schedule Args - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- Skip
Builtin boolDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- Slicing
Exprs []string - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- Snapshot
Lakehouse
Monitor Snapshot Args - Configuration for monitoring snapshot tables.
- Status string
- Status of the Monitor
- Table
Name string - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- Time
Series LakehouseMonitor Time Series Args - Configuration for monitoring timeseries tables.
- Warehouse
Id string - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- assets
Dir String - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- baseline
Table StringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- custom
Metrics List<LakehouseMonitor Custom Metric> - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- dashboard
Id String - The ID of the generated dashboard.
- data
Classification LakehouseConfig Monitor Data Classification Config - The data classification config for the monitor
- drift
Metrics StringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- inference
Log LakehouseMonitor Inference Log - Configuration for the inference log monitor
- latest
Monitor StringFailure Msg - monitor
Version String - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- notifications
Lakehouse
Monitor Notifications - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - output
Schema StringName - Schema where output metric tables are created
- profile
Metrics StringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- schedule
Lakehouse
Monitor Schedule - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- skip
Builtin BooleanDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- slicing
Exprs List<String> - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- snapshot
Lakehouse
Monitor Snapshot - Configuration for monitoring snapshot tables.
- status String
- Status of the Monitor
- table
Name String - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- time
Series LakehouseMonitor Time Series - Configuration for monitoring timeseries tables.
- warehouse
Id String - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- assets
Dir string - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- baseline
Table stringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- custom
Metrics LakehouseMonitor Custom Metric[] - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- dashboard
Id string - The ID of the generated dashboard.
- data
Classification LakehouseConfig Monitor Data Classification Config - The data classification config for the monitor
- drift
Metrics stringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- inference
Log LakehouseMonitor Inference Log - Configuration for the inference log monitor
- latest
Monitor stringFailure Msg - monitor
Version string - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- notifications
Lakehouse
Monitor Notifications - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - output
Schema stringName - Schema where output metric tables are created
- profile
Metrics stringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- schedule
Lakehouse
Monitor Schedule - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- skip
Builtin booleanDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- slicing
Exprs string[] - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- snapshot
Lakehouse
Monitor Snapshot - Configuration for monitoring snapshot tables.
- status string
- Status of the Monitor
- table
Name string - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- time
Series LakehouseMonitor Time Series - Configuration for monitoring timeseries tables.
- warehouse
Id string - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- assets_
dir str - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- baseline_
table_ strname - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- custom_
metrics Sequence[LakehouseMonitor Custom Metric Args] - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- dashboard_
id str - The ID of the generated dashboard.
- data_
classification_ Lakehouseconfig Monitor Data Classification Config Args - The data classification config for the monitor
- drift_
metrics_ strtable_ name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- inference_
log LakehouseMonitor Inference Log Args - Configuration for the inference log monitor
- latest_
monitor_ strfailure_ msg - monitor_
version str - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- notifications
Lakehouse
Monitor Notifications Args - The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - output_
schema_ strname - Schema where output metric tables are created
- profile_
metrics_ strtable_ name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- schedule
Lakehouse
Monitor Schedule Args - The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- skip_
builtin_ booldashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- slicing_
exprs Sequence[str] - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- snapshot
Lakehouse
Monitor Snapshot Args - Configuration for monitoring snapshot tables.
- status str
- Status of the Monitor
- table_
name str - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- time_
series LakehouseMonitor Time Series Args - Configuration for monitoring timeseries tables.
- warehouse_
id str - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
- assets
Dir String - The directory to store the monitoring assets (Eg. Dashboard and Metric Tables)
- baseline
Table StringName - Name of the baseline table from which drift metrics are computed from.Columns in the monitored table should also be present in the baseline table.
- custom
Metrics List<Property Map> - Custom metrics to compute on the monitored table. These can be aggregate metrics, derived metrics (from already computed aggregate metrics), or drift metrics (comparing metrics across time windows).
- dashboard
Id String - The ID of the generated dashboard.
- data
Classification Property MapConfig - The data classification config for the monitor
- drift
Metrics StringTable Name - The full name of the drift metrics table. Format: catalog_name.schema_name.table_name.
- inference
Log Property Map - Configuration for the inference log monitor
- latest
Monitor StringFailure Msg - monitor
Version String - The version of the monitor config (e.g. 1,2,3). If negative, the monitor may be corrupted
- notifications Property Map
- The notification settings for the monitor. The following optional blocks are supported, each consisting of the single string array field with name
email_addresses
containing a list of emails to notify: - output
Schema StringName - Schema where output metric tables are created
- profile
Metrics StringTable Name - The full name of the profile metrics table. Format: catalog_name.schema_name.table_name.
- schedule Property Map
- The schedule for automatically updating and refreshing metric tables. This block consists of following fields:
- skip
Builtin BooleanDashboard - Whether to skip creating a default dashboard summarizing data quality metrics.
- slicing
Exprs List<String> - List of column expressions to slice data with for targeted analysis. The data is grouped by each expression independently, resulting in a separate slice for each predicate and its complements. For high-cardinality columns, only the top 100 unique values by frequency will generate slices.
- snapshot Property Map
- Configuration for monitoring snapshot tables.
- status String
- Status of the Monitor
- table
Name String - The full name of the table to attach the monitor too. Its of the format {catalog}.{schema}.{tableName}
- time
Series Property Map - Configuration for monitoring timeseries tables.
- warehouse
Id String - Optional argument to specify the warehouse for dashboard creation. If not specified, the first running warehouse will be used.
Supporting Types
LakehouseMonitorCustomMetric, LakehouseMonitorCustomMetricArgs
- Definition string
- create metric definition
- Input
Columns List<string> - Columns on the monitored table to apply the custom metrics to.
- Name string
- Name of the custom metric.
- Output
Data stringType - The output type of the custom metric.
- Type string
- The type of the custom metric.
- Definition string
- create metric definition
- Input
Columns []string - Columns on the monitored table to apply the custom metrics to.
- Name string
- Name of the custom metric.
- Output
Data stringType - The output type of the custom metric.
- Type string
- The type of the custom metric.
- definition String
- create metric definition
- input
Columns List<String> - Columns on the monitored table to apply the custom metrics to.
- name String
- Name of the custom metric.
- output
Data StringType - The output type of the custom metric.
- type String
- The type of the custom metric.
- definition string
- create metric definition
- input
Columns string[] - Columns on the monitored table to apply the custom metrics to.
- name string
- Name of the custom metric.
- output
Data stringType - The output type of the custom metric.
- type string
- The type of the custom metric.
- definition str
- create metric definition
- input_
columns Sequence[str] - Columns on the monitored table to apply the custom metrics to.
- name str
- Name of the custom metric.
- output_
data_ strtype - The output type of the custom metric.
- type str
- The type of the custom metric.
- definition String
- create metric definition
- input
Columns List<String> - Columns on the monitored table to apply the custom metrics to.
- name String
- Name of the custom metric.
- output
Data StringType - The output type of the custom metric.
- type String
- The type of the custom metric.
LakehouseMonitorDataClassificationConfig, LakehouseMonitorDataClassificationConfigArgs
- Enabled bool
- Enabled bool
- enabled Boolean
- enabled boolean
- enabled bool
- enabled Boolean
LakehouseMonitorInferenceLog, LakehouseMonitorInferenceLogArgs
- Granularities List<string>
- List of granularities to use when aggregating data into time windows based on their timestamp.
- Model
Id stringCol - Column of the model id or version
- Prediction
Col string - Column of the model prediction
- Problem
Type string - Problem type the model aims to solve. Either
PROBLEM_TYPE_CLASSIFICATION
orPROBLEM_TYPE_REGRESSION
- Timestamp
Col string - Column of the timestamp of predictions
- Label
Col string - Column of the model label
- Prediction
Proba stringCol - Column of the model prediction probabilities
- Granularities []string
- List of granularities to use when aggregating data into time windows based on their timestamp.
- Model
Id stringCol - Column of the model id or version
- Prediction
Col string - Column of the model prediction
- Problem
Type string - Problem type the model aims to solve. Either
PROBLEM_TYPE_CLASSIFICATION
orPROBLEM_TYPE_REGRESSION
- Timestamp
Col string - Column of the timestamp of predictions
- Label
Col string - Column of the model label
- Prediction
Proba stringCol - Column of the model prediction probabilities
- granularities List<String>
- List of granularities to use when aggregating data into time windows based on their timestamp.
- model
Id StringCol - Column of the model id or version
- prediction
Col String - Column of the model prediction
- problem
Type String - Problem type the model aims to solve. Either
PROBLEM_TYPE_CLASSIFICATION
orPROBLEM_TYPE_REGRESSION
- timestamp
Col String - Column of the timestamp of predictions
- label
Col String - Column of the model label
- prediction
Proba StringCol - Column of the model prediction probabilities
- granularities string[]
- List of granularities to use when aggregating data into time windows based on their timestamp.
- model
Id stringCol - Column of the model id or version
- prediction
Col string - Column of the model prediction
- problem
Type string - Problem type the model aims to solve. Either
PROBLEM_TYPE_CLASSIFICATION
orPROBLEM_TYPE_REGRESSION
- timestamp
Col string - Column of the timestamp of predictions
- label
Col string - Column of the model label
- prediction
Proba stringCol - Column of the model prediction probabilities
- granularities Sequence[str]
- List of granularities to use when aggregating data into time windows based on their timestamp.
- model_
id_ strcol - Column of the model id or version
- prediction_
col str - Column of the model prediction
- problem_
type str - Problem type the model aims to solve. Either
PROBLEM_TYPE_CLASSIFICATION
orPROBLEM_TYPE_REGRESSION
- timestamp_
col str - Column of the timestamp of predictions
- label_
col str - Column of the model label
- prediction_
proba_ strcol - Column of the model prediction probabilities
- granularities List<String>
- List of granularities to use when aggregating data into time windows based on their timestamp.
- model
Id StringCol - Column of the model id or version
- prediction
Col String - Column of the model prediction
- problem
Type String - Problem type the model aims to solve. Either
PROBLEM_TYPE_CLASSIFICATION
orPROBLEM_TYPE_REGRESSION
- timestamp
Col String - Column of the timestamp of predictions
- label
Col String - Column of the model label
- prediction
Proba StringCol - Column of the model prediction probabilities
LakehouseMonitorNotifications, LakehouseMonitorNotificationsArgs
- On
Failure LakehouseMonitor Notifications On Failure - who to send notifications to on monitor failure.
- On
New LakehouseClassification Tag Detected Monitor Notifications On New Classification Tag Detected - Who to send notifications to when new data classification tags are detected.
- On
Failure LakehouseMonitor Notifications On Failure - who to send notifications to on monitor failure.
- On
New LakehouseClassification Tag Detected Monitor Notifications On New Classification Tag Detected - Who to send notifications to when new data classification tags are detected.
- on
Failure LakehouseMonitor Notifications On Failure - who to send notifications to on monitor failure.
- on
New LakehouseClassification Tag Detected Monitor Notifications On New Classification Tag Detected - Who to send notifications to when new data classification tags are detected.
- on
Failure LakehouseMonitor Notifications On Failure - who to send notifications to on monitor failure.
- on
New LakehouseClassification Tag Detected Monitor Notifications On New Classification Tag Detected - Who to send notifications to when new data classification tags are detected.
- on_
failure LakehouseMonitor Notifications On Failure - who to send notifications to on monitor failure.
- on_
new_ Lakehouseclassification_ tag_ detected Monitor Notifications On New Classification Tag Detected - Who to send notifications to when new data classification tags are detected.
- on
Failure Property Map - who to send notifications to on monitor failure.
- on
New Property MapClassification Tag Detected - Who to send notifications to when new data classification tags are detected.
LakehouseMonitorNotificationsOnFailure, LakehouseMonitorNotificationsOnFailureArgs
- Email
Addresses List<string>
- Email
Addresses []string
- email
Addresses List<String>
- email
Addresses string[]
- email_
addresses Sequence[str]
- email
Addresses List<String>
LakehouseMonitorNotificationsOnNewClassificationTagDetected, LakehouseMonitorNotificationsOnNewClassificationTagDetectedArgs
- Email
Addresses List<string>
- Email
Addresses []string
- email
Addresses List<String>
- email
Addresses string[]
- email_
addresses Sequence[str]
- email
Addresses List<String>
LakehouseMonitorSchedule, LakehouseMonitorScheduleArgs
- Quartz
Cron stringExpression - string expression that determines when to run the monitor. See Quartz documentation for examples.
- Timezone
Id string - string with timezone id (e.g.,
PST
) in which to evaluate the Quartz expression. - Pause
Status string - optional string field that indicates whether a schedule is paused (
PAUSED
) or not (UNPAUSED
).
- Quartz
Cron stringExpression - string expression that determines when to run the monitor. See Quartz documentation for examples.
- Timezone
Id string - string with timezone id (e.g.,
PST
) in which to evaluate the Quartz expression. - Pause
Status string - optional string field that indicates whether a schedule is paused (
PAUSED
) or not (UNPAUSED
).
- quartz
Cron StringExpression - string expression that determines when to run the monitor. See Quartz documentation for examples.
- timezone
Id String - string with timezone id (e.g.,
PST
) in which to evaluate the Quartz expression. - pause
Status String - optional string field that indicates whether a schedule is paused (
PAUSED
) or not (UNPAUSED
).
- quartz
Cron stringExpression - string expression that determines when to run the monitor. See Quartz documentation for examples.
- timezone
Id string - string with timezone id (e.g.,
PST
) in which to evaluate the Quartz expression. - pause
Status string - optional string field that indicates whether a schedule is paused (
PAUSED
) or not (UNPAUSED
).
- quartz_
cron_ strexpression - string expression that determines when to run the monitor. See Quartz documentation for examples.
- timezone_
id str - string with timezone id (e.g.,
PST
) in which to evaluate the Quartz expression. - pause_
status str - optional string field that indicates whether a schedule is paused (
PAUSED
) or not (UNPAUSED
).
- quartz
Cron StringExpression - string expression that determines when to run the monitor. See Quartz documentation for examples.
- timezone
Id String - string with timezone id (e.g.,
PST
) in which to evaluate the Quartz expression. - pause
Status String - optional string field that indicates whether a schedule is paused (
PAUSED
) or not (UNPAUSED
).
LakehouseMonitorTimeSeries, LakehouseMonitorTimeSeriesArgs
- Granularities List<string>
- List of granularities to use when aggregating data into time windows based on their timestamp.
- Timestamp
Col string - Column of the timestamp of predictions
- Granularities []string
- List of granularities to use when aggregating data into time windows based on their timestamp.
- Timestamp
Col string - Column of the timestamp of predictions
- granularities List<String>
- List of granularities to use when aggregating data into time windows based on their timestamp.
- timestamp
Col String - Column of the timestamp of predictions
- granularities string[]
- List of granularities to use when aggregating data into time windows based on their timestamp.
- timestamp
Col string - Column of the timestamp of predictions
- granularities Sequence[str]
- List of granularities to use when aggregating data into time windows based on their timestamp.
- timestamp_
col str - Column of the timestamp of predictions
- granularities List<String>
- List of granularities to use when aggregating data into time windows based on their timestamp.
- timestamp
Col String - Column of the timestamp of predictions
Package Details
- Repository
- databricks pulumi/pulumi-databricks
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the
databricks
Terraform Provider.