|
1 | 1 | import click
|
2 | 2 | import json as std_json
|
| 3 | +import time as time_module |
3 | 4 | from . import load_csv, load_json, compare, human_text
|
4 | 5 |
|
5 |
| - |
6 |
| -@click.command() |
7 |
| -@click.version_option() |
| 6 | +@click.command( |
| 7 | + context_settings={"help_option_names": ["-h", "--help"]}, |
| 8 | + help="Compare the differences between two CSV or JSON files.", |
| 9 | + epilog="Example: csv-diff old.csv new.csv --key=Id --output=jsonfile --output-file=diff.json", |
| 10 | +) |
8 | 11 | @click.argument(
|
9 | 12 | "previous",
|
10 | 13 | type=click.Path(exists=True, file_okay=True, dir_okay=False, allow_dash=False),
|
|
14 | 17 | type=click.Path(exists=True, file_okay=True, dir_okay=False, allow_dash=False),
|
15 | 18 | )
|
16 | 19 | @click.option(
|
17 |
| - "--key", type=str, default=None, help="Column to use as a unique ID for each row" |
| 20 | + "-k", "--key", type=str, default=None, help="Column to use as a unique ID for each row (default: first column if not specified) (Example: --key=Id)" |
18 | 21 | )
|
19 | 22 | @click.option(
|
20 | 23 | "--format",
|
21 |
| - type=click.Choice(["csv", "tsv", "json"]), |
22 |
| - default=None, |
23 |
| - help="Explicitly specify input format (csv, tsv, json) instead of auto-detecting", |
24 |
| -) |
25 |
| -@click.option( |
26 |
| - "--json", type=bool, default=False, help="Output changes as JSON", is_flag=True |
27 |
| -) |
28 |
| -@click.option( |
29 |
| - "--singular", |
30 |
| - type=str, |
| 24 | + "input_format", |
31 | 25 | default=None,
|
32 |
| - help="Singular word to use, e.g. 'tree' for '1 tree'", |
33 |
| -) |
34 |
| -@click.option( |
35 |
| - "--plural", |
36 |
| - type=str, |
37 |
| - default=None, |
38 |
| - help="Plural word to use, e.g. 'trees' for '2 trees'", |
| 26 | + help="Explicitly specify input format. Available (csv|tsv|json) (default: auto-detect based on file extension)", |
39 | 27 | )
|
40 | 28 | @click.option(
|
41 | 29 | "--show-unchanged",
|
|
49 | 37 | multiple=True,
|
50 | 38 | help="key: format string - define extra fields to display",
|
51 | 39 | )
|
52 |
| -def cli(previous, current, key, format, json, singular, plural, show_unchanged, extras): |
53 |
| - "Diff two CSV or JSON files" |
| 40 | +@click.option( |
| 41 | + "--encoding", |
| 42 | + type=str, |
| 43 | + default="utf-8", |
| 44 | + help="Input File Encoding. Available: (utf-8|utf-16|utf-16le|utf-16be|latin1|cp1252|ascii|...) (default: utf-8)", |
| 45 | +) |
| 46 | +@click.option( |
| 47 | + "--time", |
| 48 | + "show_time", |
| 49 | + is_flag=True, |
| 50 | + help="Measure and display elapsed time for the diff operation", |
| 51 | +) |
| 52 | +@click.option( |
| 53 | + "-o", "--output", |
| 54 | + default="readable", |
| 55 | + show_default=True, |
| 56 | + help="Output format. Available: (readable|json|jsonfile) (default: readable)", |
| 57 | +) |
| 58 | +@click.option( |
| 59 | + "--output-file", |
| 60 | + type=click.Path(file_okay=True, dir_okay=False, writable=True, resolve_path=True), |
| 61 | + default=None, |
| 62 | + help="File to write JSON output to (only used with --output=jsonfile)", |
| 63 | +) |
| 64 | +@click.version_option() |
| 65 | +def cli( |
| 66 | + previous, current, key, input_format, |
| 67 | + show_unchanged, extras, encoding, show_time, output, output_file |
| 68 | +): |
| 69 | + """Compare the differences between two CSV or JSON files.""" |
54 | 70 | dialect = {
|
55 | 71 | "csv": "excel",
|
56 | 72 | "tsv": "excel-tab",
|
57 | 73 | }
|
58 | 74 |
|
59 |
| - if extras and json: |
| 75 | + if extras and output in ("json", "jsonfile"): |
60 | 76 | raise click.UsageError(
|
61 | 77 | "Extra fields are not supported in JSON output mode",
|
62 | 78 | ctx=click.get_current_context(),
|
63 | 79 | )
|
64 | 80 |
|
| 81 | + if output == "jsonfile" and not output_file: |
| 82 | + raise click.UsageError( |
| 83 | + "--output-file must be specified when --output=jsonfile", |
| 84 | + ctx=click.get_current_context(), |
| 85 | + ) |
| 86 | + |
65 | 87 | def load(filename):
|
66 |
| - if format == "json": |
67 |
| - return load_json(open(filename), key=key) |
68 |
| - else: |
69 |
| - return load_csv( |
70 |
| - open(filename, newline=""), key=key, dialect=dialect.get(format) |
71 |
| - ) |
| 88 | + try: |
| 89 | + if input_format == "json": |
| 90 | + with open(filename, encoding=encoding) as f: |
| 91 | + return load_json(f, key=key) |
| 92 | + else: |
| 93 | + with open(filename, newline="", encoding=encoding) as f: |
| 94 | + return load_csv(f, key=key, dialect=dialect.get(input_format)) |
| 95 | + except Exception as e: |
| 96 | + raise click.ClickException(f"Failed to load '{filename}': {e}") |
72 | 97 |
|
| 98 | + if show_time: |
| 99 | + start = time_module.time() |
| 100 | + # Load previous data first to determine key if not set |
73 | 101 | previous_data = load(previous)
|
| 102 | + # If key is not set, use the first column from previous_data |
| 103 | + if key is None: |
| 104 | + if previous_data and isinstance(previous_data, dict): |
| 105 | + first_row = next(iter(previous_data.values())) |
| 106 | + key = list(first_row.keys())[0] |
| 107 | + else: |
| 108 | + raise click.ClickException("Could not determine key column from the previous file.") |
| 109 | + # Now load current data with the determined key |
74 | 110 | current_data = load(current)
|
75 |
| - |
76 | 111 | diff = compare(previous_data, current_data, show_unchanged)
|
77 |
| - if json: |
| 112 | + |
| 113 | + if output == "json": |
78 | 114 | print(std_json.dumps(diff, indent=4))
|
| 115 | + elif output == "jsonfile": |
| 116 | + with open(output_file, "w", encoding=encoding) as f: |
| 117 | + f.write(std_json.dumps(diff, indent=4)) |
79 | 118 | else:
|
80 | 119 | print(
|
81 |
| - human_text(diff, key, singular, plural, current=current_data, extras=extras) |
| 120 | + human_text(diff, key, current=current_data, extras=extras) |
82 | 121 | )
|
| 122 | + |
| 123 | + if show_time: |
| 124 | + end = time_module.time() |
| 125 | + print(f"\nElapsed time: {end - start:.3f} seconds") |
0 commit comments