-
Notifications
You must be signed in to change notification settings - Fork 286
feat(snowflake): add scheduler support for dynamic tables #1710
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3b2447d
09f35e2
309d6a5
2bb2bed
d9454a6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| kind: Features | ||
| body: Add scheduler configuration support for Snowflake dynamic tables, enabling dbt-managed refresh scheduling that is decoupled from upstream and downstream dynamic table dependencies. | ||
| time: 2026-03-12T15:07:24-07:00 | ||
| custom: | ||
| Author: ibelianski | ||
| Issue: none |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -33,6 +33,11 @@ def default(cls) -> Self: | |
| return cls("ON_CREATE") | ||
|
|
||
|
|
||
| class Scheduler(StrEnum): | ||
| ENABLE = "ENABLE" | ||
| DISABLE = "DISABLE" | ||
|
|
||
|
|
||
| @dataclass(frozen=True, eq=True, unsafe_hash=True) | ||
| class SnowflakeDynamicTableConfig(SnowflakeRelationConfigBase): | ||
| """ | ||
|
|
@@ -47,6 +52,7 @@ class SnowflakeDynamicTableConfig(SnowflakeRelationConfigBase): | |
| - snowflake_initialization_warehouse: the name of the warehouse used for the initializations and reinitializations of the dynamic table | ||
| - refresh_mode: specifies the refresh type for the dynamic table | ||
| - initialize: specifies the behavior of the initial refresh of the dynamic table | ||
| - scheduler: specifies whether to ENABLE or DISABLE the dynamic table's scheduler | ||
| - cluster_by: specifies the columns to cluster on | ||
| - immutable_where: specifies an immutability constraint expression | ||
| - transient: specifies whether the dynamic table is transient (no fail-safe). snowflake_default_transient_dynamic_tables determines the default value | ||
|
|
@@ -58,11 +64,12 @@ class SnowflakeDynamicTableConfig(SnowflakeRelationConfigBase): | |
| schema_name: str | ||
| database_name: str | ||
| query: str | ||
| target_lag: str | ||
| snowflake_warehouse: str | ||
| target_lag: Optional[str] = None | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. target lag is not mandatory any more PS |
||
| snowflake_initialization_warehouse: Optional[str] = None | ||
| refresh_mode: Optional[RefreshMode] = RefreshMode.default() | ||
| initialize: Optional[Initialize] = Initialize.default() | ||
| scheduler: Optional[Scheduler] = None | ||
| row_access_policy: Optional[str] = None | ||
| table_tag: Optional[str] = None | ||
| cluster_by: Optional[Union[str, list[str]]] = None | ||
|
|
@@ -89,6 +96,7 @@ def from_dict(cls, config_dict: Dict[str, Any]) -> Self: | |
| ), | ||
| "refresh_mode": config_dict.get("refresh_mode"), | ||
| "initialize": config_dict.get("initialize"), | ||
| "scheduler": config_dict.get("scheduler"), | ||
| "row_access_policy": config_dict.get("row_access_policy"), | ||
| "table_tag": config_dict.get("table_tag"), | ||
| "cluster_by": config_dict.get("cluster_by"), | ||
|
|
@@ -129,6 +137,13 @@ def parse_relation_config(cls, relation_config: RelationConfig) -> Dict[str, Any | |
| if initialize := relation_config.config.extra.get("initialize"): # type:ignore | ||
| config_dict["initialize"] = initialize.upper() | ||
|
|
||
| if scheduler := relation_config.config.extra.get("scheduler"): # type:ignore | ||
| config_dict["scheduler"] = scheduler.upper() | ||
| elif config_dict.get("target_lag"): | ||
| config_dict["scheduler"] = Scheduler.ENABLE.value | ||
| else: | ||
| config_dict["scheduler"] = Scheduler.DISABLE.value | ||
|
|
||
| return config_dict | ||
|
|
||
| @classmethod | ||
|
|
@@ -163,15 +178,21 @@ def parse_relation_results(cls, relation_results: RelationResults) -> Dict[str, | |
| else: | ||
| cluster_by = None | ||
|
|
||
| scheduler = dynamic_table.get("scheduler") | ||
| target_lag = dynamic_table.get("target_lag") | ||
| if scheduler is None: | ||
| scheduler = Scheduler.ENABLE.value if target_lag else Scheduler.DISABLE.value | ||
|
|
||
| config_dict = { | ||
| "name": dynamic_table.get("name"), | ||
| "schema_name": dynamic_table.get("schema_name"), | ||
| "database_name": dynamic_table.get("database_name"), | ||
| "query": dynamic_table.get("text"), | ||
| "target_lag": dynamic_table.get("target_lag"), | ||
| "target_lag": target_lag, | ||
| "snowflake_warehouse": dynamic_table.get("warehouse"), | ||
| "snowflake_initialization_warehouse": init_warehouse, | ||
| "refresh_mode": dynamic_table.get("refresh_mode"), | ||
| "scheduler": scheduler, | ||
| "row_access_policy": dynamic_table.get("row_access_policy"), | ||
| "table_tag": dynamic_table.get("table_tag"), | ||
| "cluster_by": cluster_by, | ||
|
|
@@ -239,6 +260,15 @@ def requires_full_refresh(self) -> bool: | |
| return False | ||
|
|
||
|
|
||
| @dataclass(frozen=True, eq=True, unsafe_hash=True) | ||
| class SnowflakeDynamicTableSchedulerConfigChange(RelationConfigChange): | ||
| context: Optional[str] = None | ||
|
|
||
| @property | ||
| def requires_full_refresh(self) -> bool: | ||
| return False | ||
|
|
||
|
|
||
| @dataclass(frozen=True, eq=True, unsafe_hash=True) | ||
| class SnowflakeDynamicTableTransientConfigChange(RelationConfigChange): | ||
| context: Optional[bool] = None | ||
|
|
@@ -257,6 +287,7 @@ class SnowflakeDynamicTableConfigChangeset: | |
| SnowflakeDynamicTableInitializationWarehouseConfigChange | ||
| ] = None | ||
| refresh_mode: Optional[SnowflakeDynamicTableRefreshModeConfigChange] = None | ||
| scheduler: Optional[SnowflakeDynamicTableSchedulerConfigChange] = None | ||
| immutable_where: Optional[SnowflakeDynamicTableImmutableWhereConfigChange] = None | ||
| cluster_by: Optional[SnowflakeDynamicTableClusterByConfigChange] = None | ||
| transient: Optional[SnowflakeDynamicTableTransientConfigChange] = None | ||
|
|
@@ -277,6 +308,7 @@ def requires_full_refresh(self) -> bool: | |
| else False | ||
| ), | ||
| self.refresh_mode.requires_full_refresh if self.refresh_mode else False, | ||
| self.scheduler.requires_full_refresh if self.scheduler else False, | ||
| self.immutable_where.requires_full_refresh if self.immutable_where else False, | ||
| self.cluster_by.requires_full_refresh if self.cluster_by else False, | ||
| self.transient.requires_full_refresh if self.transient else False, | ||
|
|
@@ -291,6 +323,7 @@ def has_changes(self) -> bool: | |
| self.snowflake_warehouse, | ||
| self.snowflake_initialization_warehouse, | ||
| self.refresh_mode, | ||
| self.scheduler, | ||
| self.immutable_where, | ||
| self.cluster_by, | ||
| self.transient, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if there's actually three states:
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are 2 states:
ENABLE ( default like Dynamic TAbles /MV ) work today subject to be scheduled by Snowflake directly or part of the downstream schedule
DISABLED ( only manual refreshes allowed , Dynamic table scheduler will never touch it. directly or through upstream dependency). => DT's
i do not see a meaningful scenario to justify 3rd mode where we want to disable Dynamic tables from both DBT refreshes and Snowflake Dynamic table scheduler.
( i guess target_lag = downstream on all DTs in the pipeline achieves similar effect functionally , but i am not seeing meaningful scenario for that case)
moreover 2 modes provide really simple syntax for users ( they just need to specify warehouse, absence of target lag implies disabling Dynmic table scheduler and DBT running refreshes)