Skip to content
This repository was archived by the owner on May 17, 2024. It is now read-only.

Commit 3234f27

Browse files
committed
Merge branch 'master' into issue_527
2 parents d388905 + 5a8ec27 commit 3234f27

File tree

3 files changed

+76
-34
lines changed

3 files changed

+76
-34
lines changed

data_diff/dbt.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
columns_added_template,
1313
columns_removed_template,
1414
no_differences_template,
15+
columns_type_changed_template,
1516
)
16-
from pathlib import Path
1717

1818
import keyring
1919

@@ -191,26 +191,36 @@ def _local_diff(diff_vars: TDiffVars) -> None:
191191
diff_vars.connection, prod_qualified_str, tuple(diff_vars.primary_keys), diff_vars.threads
192192
)
193193

194-
table1_columns = list(table1.get_schema())
194+
table1_columns = table1.get_schema()
195195
try:
196-
table2_columns = list(table2.get_schema())
196+
table2_columns = table2.get_schema()
197197
# Not ideal, but we don't have more specific exceptions yet
198198
except Exception as ex:
199199
logger.debug(ex)
200200
diff_output_str += "[red]New model or no access to prod table.[/] \n"
201201
rich.print(diff_output_str)
202202
return
203203

204-
column_set = set(table1_columns).intersection(table2_columns)
205-
columns_added = set(table1_columns).difference(table2_columns)
206-
columns_removed = set(table2_columns).difference(table1_columns)
204+
table1_column_names = set(table1_columns.keys())
205+
table2_column_names = set(table2_columns.keys())
206+
column_set = table1_column_names.intersection(table2_column_names)
207+
columns_added = table1_column_names.difference(table2_column_names)
208+
columns_removed = table2_column_names.difference(table1_column_names)
209+
# col type is i = 1 in tuple
210+
columns_type_changed = {
211+
k for k, v in table1_columns.items() if k in table2_columns and v[1] != table2_columns[k][1]
212+
}
207213

208214
if columns_added:
209215
diff_output_str += columns_added_template(columns_added)
210216

211217
if columns_removed:
212218
diff_output_str += columns_removed_template(columns_removed)
213219

220+
if columns_type_changed:
221+
diff_output_str += columns_type_changed_template(columns_type_changed)
222+
column_set = column_set.difference(columns_type_changed)
223+
214224
column_set = column_set - set(diff_vars.primary_keys)
215225

216226
if diff_vars.include_columns:
@@ -322,7 +332,7 @@ def _cloud_diff(diff_vars: TDiffVars, datasource_id: int, api: DatafoldAPI, org_
322332
diff_output_str += columns_removed_template(columns_removed)
323333

324334
if column_type_changes:
325-
diff_output_str += "Type change: " + str(column_type_changes) + "\n"
335+
diff_output_str += columns_type_changed_template(column_type_changes)
326336

327337
if any([rows_added_count, rows_removed_count, rows_updated]):
328338
diff_output = dbt_diff_string_template(

data_diff/utils.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,14 +171,19 @@ def diffs_are_equiv_jsons(diff: list, json_cols: dict):
171171
return match, overriden_diff_cols
172172

173173

174-
def columns_removed_template(table2_set_diff) -> str:
175-
columns_removed = "Column(s) removed: " + str(table2_set_diff) + "\n"
176-
return columns_removed
174+
def columns_removed_template(columns_removed) -> str:
175+
columns_removed_str = f"Column(s) removed: {columns_removed}\n"
176+
return columns_removed_str
177177

178178

179-
def columns_added_template(table1_set_diff) -> str:
180-
columns_added = "Column(s) added: " + str(table1_set_diff) + "\n"
181-
return columns_added
179+
def columns_added_template(columns_added) -> str:
180+
columns_added_str = f"Column(s) added: {columns_added}\n"
181+
return columns_added_str
182+
183+
184+
def columns_type_changed_template(columns_type_changed) -> str:
185+
columns_type_changed_str = f"Type change: {columns_type_changed}\n"
186+
return columns_type_changed_str
182187

183188

184189
def no_differences_template() -> str:

tests/test_dbt.py

Lines changed: 48 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -442,21 +442,6 @@ def test_get_connection_no_type(self, mock_open, mock_profile_renderer, mock_yam
442442
_, _ = DbtParser.get_connection_creds(mock_self)
443443

444444

445-
EXAMPLE_DIFF_RESULTS = {
446-
"pks": {"exclusives": [5, 3]},
447-
"values": {
448-
"rows_with_differences": 2,
449-
"total_rows": 10,
450-
"columns_diff_stats": [
451-
{"column_name": "name", "match": 80.0},
452-
{"column_name": "age", "match": 100.0},
453-
{"column_name": "city", "match": 0.0},
454-
{"column_name": "country", "match": 100.0},
455-
],
456-
},
457-
}
458-
459-
460445
class TestDbtDiffer(unittest.TestCase):
461446
# Set DATA_DIFF_DBT_PROJ to use your own dbt project, otherwise uses the duckdb project in tests/dbt_artifacts
462447
def test_integration_basic_dbt(self):
@@ -488,10 +473,10 @@ def test_integration_cloud_dbt(self):
488473
def test_local_diff(self, mock_diff_tables):
489474
connection = {}
490475
mock_table1 = Mock()
491-
column_set = {"col1", "col2"}
492-
mock_table1.get_schema.return_value = column_set
476+
column_dictionary = {"col1": ("col1", "type"), "col2": ("col2", "type")}
477+
mock_table1.get_schema.return_value = column_dictionary
493478
mock_table2 = Mock()
494-
mock_table2.get_schema.return_value = column_set
479+
mock_table2.get_schema.return_value = column_dictionary
495480
mock_diff = MagicMock()
496481
mock_diff_tables.return_value = mock_diff
497482
mock_diff.__iter__.return_value = [1, 2, 3]
@@ -528,14 +513,56 @@ def test_local_diff(self, mock_diff_tables):
528513
mock_connect.assert_any_call(connection, ".".join(prod_qualified_list), tuple(expected_primary_keys), threads)
529514
mock_diff.get_stats_string.assert_called_once()
530515

516+
@patch("data_diff.dbt.diff_tables")
517+
def test_local_diff_types_differ(self, mock_diff_tables):
518+
connection = {}
519+
mock_table1 = Mock()
520+
mock_table2 = Mock()
521+
table1_column_dictionary = {"col1": ("col1", "type"), "col2": ("col2", "type")}
522+
table2_column_dictionary = {"col1": ("col1", "type"), "col2": ("col2", "differing_type")}
523+
mock_table1.get_schema.return_value = table1_column_dictionary
524+
mock_table2.get_schema.return_value = table2_column_dictionary
525+
mock_diff = MagicMock()
526+
mock_diff_tables.return_value = mock_diff
527+
mock_diff.__iter__.return_value = [1, 2, 3]
528+
threads = None
529+
where = "a_string"
530+
dev_qualified_list = ["dev_db", "dev_schema", "dev_table"]
531+
prod_qualified_list = ["prod_db", "prod_schema", "prod_table"]
532+
expected_primary_keys = ["key"]
533+
diff_vars = TDiffVars(
534+
dev_path=dev_qualified_list,
535+
prod_path=prod_qualified_list,
536+
primary_keys=expected_primary_keys,
537+
connection=connection,
538+
threads=threads,
539+
where_filter=where,
540+
include_columns=[],
541+
exclude_columns=[],
542+
)
543+
with patch("data_diff.dbt.connect_to_table", side_effect=[mock_table1, mock_table2]) as mock_connect:
544+
_local_diff(diff_vars)
545+
546+
mock_diff_tables.assert_called_once_with(
547+
mock_table1,
548+
mock_table2,
549+
threaded=True,
550+
algorithm=Algorithm.JOINDIFF,
551+
extra_columns=ANY,
552+
where=where,
553+
)
554+
self.assertEqual(len(mock_diff_tables.call_args[1]["extra_columns"]), 1)
555+
self.assertEqual(mock_connect.call_count, 2)
556+
mock_diff.get_stats_string.assert_called_once()
557+
531558
@patch("data_diff.dbt.diff_tables")
532559
def test_local_diff_no_diffs(self, mock_diff_tables):
533560
connection = {}
534-
column_set = {"col1", "col2"}
561+
column_dictionary = {"col1": ("col1", "type"), "col2": ("col2", "type")}
535562
mock_table1 = Mock()
536-
mock_table1.get_schema.return_value = column_set
563+
mock_table1.get_schema.return_value = column_dictionary
537564
mock_table2 = Mock()
538-
mock_table2.get_schema.return_value = column_set
565+
mock_table2.get_schema.return_value = column_dictionary
539566
mock_diff = MagicMock()
540567
mock_diff_tables.return_value = mock_diff
541568
mock_diff.__iter__.return_value = []

0 commit comments

Comments
 (0)