Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 5a8ec27

Browse files
authored
Merge pull request #584 from dlawin/issue_542
handle column type changes
2 parents 6e33035 + e9f97f9 commit 5a8ec27

File tree

3 files changed

+76
-34
lines changed

3 files changed

+76
-34
lines changed

data_diff/dbt.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
columns_added_template,
1313
columns_removed_template,
1414
no_differences_template,
15+
columns_type_changed_template,
1516
)
16-
from pathlib import Path
1717

1818
import keyring
1919

@@ -191,26 +191,36 @@ def _local_diff(diff_vars: TDiffVars) -> None:
191191
diff_vars.connection, prod_qualified_str, tuple(diff_vars.primary_keys), diff_vars.threads
192192
)
193193

194-
table1_columns = list(table1.get_schema())
194+
table1_columns = table1.get_schema()
195195
try:
196-
table2_columns = list(table2.get_schema())
196+
table2_columns = table2.get_schema()
197197
# Not ideal, but we don't have more specific exceptions yet
198198
except Exception as ex:
199199
logger.debug(ex)
200200
diff_output_str += "[red]New model or no access to prod table.[/] \n"
201201
rich.print(diff_output_str)
202202
return
203203

204-
column_set = set(table1_columns).intersection(table2_columns)
205-
columns_added = set(table1_columns).difference(table2_columns)
206-
columns_removed = set(table2_columns).difference(table1_columns)
204+
table1_column_names = set(table1_columns.keys())
205+
table2_column_names = set(table2_columns.keys())
206+
column_set = table1_column_names.intersection(table2_column_names)
207+
columns_added = table1_column_names.difference(table2_column_names)
208+
columns_removed = table2_column_names.difference(table1_column_names)
209+
# col type is i = 1 in tuple
210+
columns_type_changed = {
211+
k for k, v in table1_columns.items() if k in table2_columns and v[1] != table2_columns[k][1]
212+
}
207213

208214
if columns_added:
209215
diff_output_str += columns_added_template(columns_added)
210216

211217
if columns_removed:
212218
diff_output_str += columns_removed_template(columns_removed)
213219

220+
if columns_type_changed:
221+
diff_output_str += columns_type_changed_template(columns_type_changed)
222+
column_set = column_set.difference(columns_type_changed)
223+
214224
column_set = column_set - set(diff_vars.primary_keys)
215225

216226
if diff_vars.include_columns:
@@ -321,7 +331,7 @@ def _cloud_diff(diff_vars: TDiffVars, datasource_id: int, api: DatafoldAPI, org_
321331
diff_output_str += columns_removed_template(columns_removed)
322332

323333
if column_type_changes:
324-
diff_output_str += "Type change: " + str(column_type_changes) + "\n"
334+
diff_output_str += columns_type_changed_template(column_type_changes)
325335

326336
if any([rows_added_count, rows_removed_count, rows_updated]):
327337
diff_output = dbt_diff_string_template(

data_diff/utils.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,14 +171,19 @@ def diffs_are_equiv_jsons(diff: list, json_cols: dict):
171171
return match, overriden_diff_cols
172172

173173

174-
def columns_removed_template(table2_set_diff) -> str:
175-
columns_removed = "Column(s) removed: " + str(table2_set_diff) + "\n"
176-
return columns_removed
174+
def columns_removed_template(columns_removed) -> str:
175+
columns_removed_str = f"Column(s) removed: {columns_removed}\n"
176+
return columns_removed_str
177177

178178

179-
def columns_added_template(table1_set_diff) -> str:
180-
columns_added = "Column(s) added: " + str(table1_set_diff) + "\n"
181-
return columns_added
179+
def columns_added_template(columns_added) -> str:
180+
columns_added_str = f"Column(s) added: {columns_added}\n"
181+
return columns_added_str
182+
183+
184+
def columns_type_changed_template(columns_type_changed) -> str:
185+
columns_type_changed_str = f"Type change: {columns_type_changed}\n"
186+
return columns_type_changed_str
182187

183188

184189
def no_differences_template() -> str:

tests/test_dbt.py

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -442,21 +442,6 @@ def test_get_connection_no_type(self, mock_open, mock_profile_renderer, mock_yam
442442
_, _ = DbtParser.get_connection_creds(mock_self)
443443

444444

445-
EXAMPLE_DIFF_RESULTS = {
446-
"pks": {"exclusives": [5, 3]},
447-
"values": {
448-
"rows_with_differences": 2,
449-
"total_rows": 10,
450-
"columns_diff_stats": [
451-
{"column_name": "name", "match": 80.0},
452-
{"column_name": "age", "match": 100.0},
453-
{"column_name": "city", "match": 0.0},
454-
{"column_name": "country", "match": 100.0},
455-
],
456-
},
457-
}
458-
459-
460445
class TestDbtDiffer(unittest.TestCase):
461446
# Set DATA_DIFF_DBT_PROJ to use your own dbt project, otherwise uses the duckdb project in tests/dbt_artifacts
462447
def test_integration_basic_dbt(self):
@@ -488,10 +473,10 @@ def test_integration_cloud_dbt(self):
488473
def test_local_diff(self, mock_diff_tables):
489474
connection = {}
490475
mock_table1 = Mock()
491-
column_set = {"col1", "col2"}
492-
mock_table1.get_schema.return_value = column_set
476+
column_dictionary = {"col1": ("col1", "type"), "col2": ("col2", "type")}
477+
mock_table1.get_schema.return_value = column_dictionary
493478
mock_table2 = Mock()
494-
mock_table2.get_schema.return_value = column_set
479+
mock_table2.get_schema.return_value = column_dictionary
495480
mock_diff = MagicMock()
496481
mock_diff_tables.return_value = mock_diff
497482
mock_diff.__iter__.return_value = [1, 2, 3]
@@ -527,14 +512,56 @@ def test_local_diff(self, mock_diff_tables):
527512
mock_connect.assert_any_call(connection, ".".join(prod_qualified_list), tuple(expected_primary_keys), threads)
528513
mock_diff.get_stats_string.assert_called_once()
529514

515+
@patch("data_diff.dbt.diff_tables")
516+
def test_local_diff_types_differ(self, mock_diff_tables):
517+
connection = {}
518+
mock_table1 = Mock()
519+
mock_table2 = Mock()
520+
table1_column_dictionary = {"col1": ("col1", "type"), "col2": ("col2", "type")}
521+
table2_column_dictionary = {"col1": ("col1", "type"), "col2": ("col2", "differing_type")}
522+
mock_table1.get_schema.return_value = table1_column_dictionary
523+
mock_table2.get_schema.return_value = table2_column_dictionary
524+
mock_diff = MagicMock()
525+
mock_diff_tables.return_value = mock_diff
526+
mock_diff.__iter__.return_value = [1, 2, 3]
527+
threads = None
528+
where = "a_string"
529+
dev_qualified_list = ["dev_db", "dev_schema", "dev_table"]
530+
prod_qualified_list = ["prod_db", "prod_schema", "prod_table"]
531+
expected_primary_keys = ["key"]
532+
diff_vars = TDiffVars(
533+
dev_path=dev_qualified_list,
534+
prod_path=prod_qualified_list,
535+
primary_keys=expected_primary_keys,
536+
connection=connection,
537+
threads=threads,
538+
where_filter=where,
539+
include_columns=[],
540+
exclude_columns=[],
541+
)
542+
with patch("data_diff.dbt.connect_to_table", side_effect=[mock_table1, mock_table2]) as mock_connect:
543+
_local_diff(diff_vars)
544+
545+
mock_diff_tables.assert_called_once_with(
546+
mock_table1,
547+
mock_table2,
548+
threaded=True,
549+
algorithm=Algorithm.JOINDIFF,
550+
extra_columns=ANY,
551+
where=where,
552+
)
553+
self.assertEqual(len(mock_diff_tables.call_args[1]["extra_columns"]), 1)
554+
self.assertEqual(mock_connect.call_count, 2)
555+
mock_diff.get_stats_string.assert_called_once()
556+
530557
@patch("data_diff.dbt.diff_tables")
531558
def test_local_diff_no_diffs(self, mock_diff_tables):
532559
connection = {}
533-
column_set = {"col1", "col2"}
560+
column_dictionary = {"col1": ("col1", "type"), "col2": ("col2", "type")}
534561
mock_table1 = Mock()
535-
mock_table1.get_schema.return_value = column_set
562+
mock_table1.get_schema.return_value = column_dictionary
536563
mock_table2 = Mock()
537-
mock_table2.get_schema.return_value = column_set
564+
mock_table2.get_schema.return_value = column_dictionary
538565
mock_diff = MagicMock()
539566
mock_diff_tables.return_value = mock_diff
540567
mock_diff.__iter__.return_value = []

0 commit comments

Comments
 (0)