Skip to content

Commit 5ab7878

Browse files
authored
Added --fields and --ignorefields options allowing specification of which fields will be compared
1 parent 6bae40a commit 5ab7878

File tree

2 files changed

+58
-16
lines changed

2 files changed

+58
-16
lines changed

db_diff/__init__.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def _simplify_json_row(r, common_keys):
5252
return r
5353

5454

55-
def compare(previous, current, show_unchanged=False):
55+
def compare(previous, current, show_unchanged=False, fields=None, ignorefields=None):
5656
result = {
5757
"added": [],
5858
"removed": [],
@@ -64,45 +64,60 @@ def compare(previous, current, show_unchanged=False):
6464
previous_columns = set(next(iter(previous.values())).keys())
6565
current_columns = set(next(iter(current.values())).keys())
6666
ignore_columns = None
67-
if previous_columns != current_columns:
68-
result["columns_added"] = [
69-
c for c in current_columns if c not in previous_columns
70-
]
71-
result["columns_removed"] = [
72-
c for c in previous_columns if c not in current_columns
73-
]
74-
ignore_columns = current_columns.symmetric_difference(previous_columns)
67+
68+
# Apply fields/ignorefields filtering
69+
if fields:
70+
compare_columns = set(fields)
71+
elif ignorefields:
72+
compare_columns = previous_columns | current_columns
73+
compare_columns = compare_columns - set(ignorefields)
74+
else:
75+
compare_columns = previous_columns | current_columns
76+
77+
# Adjust columns_added/removed based on compare_columns
78+
result["columns_added"] = [c for c in current_columns if c not in previous_columns and c in compare_columns]
79+
result["columns_removed"] = [c for c in previous_columns if c not in current_columns and c in compare_columns]
80+
ignore_columns = (previous_columns | current_columns) - compare_columns
81+
7582
# Have any rows been removed or added?
7683
removed = [id for id in previous if id not in current]
7784
added = [id for id in current if id not in previous]
78-
# How about changed?
7985
removed_or_added = set(removed) | set(added)
8086
potential_changes = [id for id in current if id not in removed_or_added]
81-
changed = [id for id in potential_changes if current[id] != previous[id]]
87+
changed = [
88+
id for id in potential_changes
89+
if any(
90+
(k in compare_columns) and (current[id].get(k) != previous[id].get(k))
91+
for k in compare_columns
92+
)
93+
]
8294
if added:
8395
result["added"] = [current[id] for id in added]
8496
if removed:
8597
result["removed"] = [previous[id] for id in removed]
8698
if changed:
8799
for id in changed:
88-
diffs = list(diff(previous[id], current[id], ignore=ignore_columns))
100+
diffs = list(diff(
101+
previous[id], current[id],
102+
ignore=ignore_columns if ignore_columns else None
103+
))
89104
if diffs:
90105
changes = {
91106
"key": id,
92107
"changes": {
93-
# field can be a list if id contained '.' - #7
94108
field[0] if isinstance(field, list) else field: [
95109
prev_value,
96110
current_value,
97111
]
98112
for _, field, (prev_value, current_value) in diffs
113+
if (field[0] if isinstance(field, list) else field) in compare_columns
99114
},
100115
}
101116
if show_unchanged:
102117
changes["unchanged"] = {
103118
field: value
104119
for field, value in previous[id].items()
105-
if field not in changes["changes"] and field != "id"
120+
if field not in changes["changes"] and field != "id" and field in compare_columns
106121
}
107122
result["changed"].append(changes)
108123
return result

db_diff/cli.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,23 @@
6969
show_default=True,
7070
help="Input File Encoding. Available: (utf-8|utf-16|utf-16le|utf-16be|latin1|cp1252|ascii|...).",
7171
)
72+
@click.option(
73+
"--fields",
74+
type=str,
75+
default=None,
76+
help="Comma-separated list of fields to compare (all others ignored). Mutually exclusive with --ignorefields.",
77+
)
78+
@click.option(
79+
"--ignorefields",
80+
type=str,
81+
default=None,
82+
help="Comma-separated list of fields to ignore during comparison. Mutually exclusive with --fields.",
83+
)
7284
@click.version_option()
7385
def cli(
7486
previous, current, key, input_format,
75-
show_unchanged, encoding, show_time, output, output_file, output_path
87+
show_unchanged, encoding, show_time, output, output_file, output_path,
88+
fields, ignorefields
7689
):
7790
"""Compare the differences between two CSV or JSON files."""
7891
dialect = {
@@ -127,9 +140,23 @@ def load(filename, key):
127140
if not key:
128141
raise click.ClickException("Could not determine key column from the previous file.")
129142

143+
# Enforce mutual exclusivity
144+
if fields and ignorefields:
145+
raise click.ClickException("You cannot use --fields and --ignorefields at the same time.")
146+
147+
# Parse fields/ignorefields into sets
148+
fields_set = set(f.strip() for f in fields.split(",")) if fields else None
149+
ignorefields_set = set(f.strip() for f in ignorefields.split(",")) if ignorefields else None
150+
130151
previous_data = load(previous, key)
131152
current_data = load(current, key)
132-
diff = compare(previous_data, current_data, show_unchanged)
153+
diff = compare(
154+
previous_data,
155+
current_data,
156+
show_unchanged,
157+
fields=fields_set,
158+
ignorefields=ignorefields_set,
159+
)
133160

134161
if output == "json":
135162
print(std_json.dumps(diff, indent=4))

0 commit comments

Comments
 (0)