@@ -123,6 +123,76 @@ def compare(previous, current, show_unchanged=False, fields=None, ignorefields=N
123
123
return result
124
124
125
125
126
+ def streaming_compare_csv (prev_path , curr_path , key , compare_columns = None , ignorefields = None , encoding = 'utf-8' , dialect = 'excel' ):
127
+ """
128
+ Compare two sorted CSV files by streaming, returning a diff dict.
129
+ """
130
+ import csv
131
+ result = {
132
+ "added" : [],
133
+ "removed" : [],
134
+ "changed" : [],
135
+ "columns_added" : [],
136
+ "columns_removed" : [],
137
+ }
138
+
139
+ with open (prev_path , newline = '' , encoding = encoding ) as f1 , open (curr_path , newline = '' , encoding = encoding ) as f2 :
140
+ reader1 = csv .DictReader (f1 , dialect = dialect )
141
+ reader2 = csv .DictReader (f2 , dialect = dialect )
142
+ prev_row = next (reader1 , None )
143
+ curr_row = next (reader2 , None )
144
+
145
+ prev_columns = set (reader1 .fieldnames )
146
+ curr_columns = set (reader2 .fieldnames )
147
+
148
+ # Determine columns to compare
149
+ if compare_columns :
150
+ compare_columns = set (compare_columns )
151
+ elif ignorefields :
152
+ compare_columns = (prev_columns | curr_columns ) - set (ignorefields )
153
+ else :
154
+ compare_columns = prev_columns | curr_columns
155
+
156
+ result ["columns_added" ] = [c for c in curr_columns if c not in prev_columns and c in compare_columns ]
157
+ result ["columns_removed" ] = [c for c in prev_columns if c not in curr_columns and c in compare_columns ]
158
+
159
+ while prev_row or curr_row :
160
+ if prev_row and curr_row :
161
+ if key not in prev_row or key not in curr_row :
162
+ raise KeyError (f"Key column '{ key } ' missing in one of the rows." )
163
+ prev_key = str (prev_row [key ])
164
+ curr_key = str (curr_row [key ])
165
+ if prev_key == curr_key :
166
+ # Check for changes
167
+ changed_fields = {
168
+ col : [prev_row .get (col ), curr_row .get (col )]
169
+ for col in compare_columns
170
+ if prev_row .get (col ) != curr_row .get (col )
171
+ }
172
+ if changed_fields :
173
+ result ["changed" ].append ({
174
+ "key" : prev_key ,
175
+ "changes" : changed_fields
176
+ })
177
+ prev_row = next (reader1 , None )
178
+ curr_row = next (reader2 , None )
179
+ elif prev_key < curr_key :
180
+ # Row removed
181
+ result ["removed" ].append (prev_row )
182
+ prev_row = next (reader1 , None )
183
+ else :
184
+ # Row added
185
+ result ["added" ].append (curr_row )
186
+ curr_row = next (reader2 , None )
187
+ elif prev_row :
188
+ result ["removed" ].append (prev_row )
189
+ prev_row = next (reader1 , None )
190
+ elif curr_row :
191
+ result ["added" ].append (curr_row )
192
+ curr_row = next (reader2 , None )
193
+ return result
194
+
195
+
126
196
def human_text (result , key = None , current = None , extras = None ):
127
197
title = []
128
198
summary = []
0 commit comments