From b006a4c685dd3244fef823c0bd44b4daef7592a4 Mon Sep 17 00:00:00 2001 From: lbellomo Date: Sun, 31 Mar 2024 17:45:23 -0300 Subject: [PATCH 1/4] add lazy=True to avoid opening the file early in case of failure --- sqlite_utils/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index b7de4191..5678d3f7 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -865,7 +865,7 @@ def inner(fn): required=True, ), click.argument("table"), - click.argument("file", type=click.File("rb"), required=True), + click.argument("file", type=click.File("rb", lazy=True), required=True), click.option( "--pk", help="Columns to use as the primary key, e.g. id", From d3f097901ccc2366078399775cc682d0297d8e45 Mon Sep 17 00:00:00 2001 From: lbellomo Date: Sun, 31 Mar 2024 18:35:43 -0300 Subject: [PATCH 2/4] add io.buffer in _extra_key_strategy to ensure is closed in the end --- sqlite_utils/utils.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sqlite_utils/utils.py b/sqlite_utils/utils.py index 9e9882a9..8c010f08 100644 --- a/sqlite_utils/utils.py +++ b/sqlite_utils/utils.py @@ -212,6 +212,7 @@ def _extra_key_strategy( reader: Iterable[dict], ignore_extras: Optional[bool] = False, extras_key: Optional[str] = None, + buffer: Optional[io.TextIOWrapper] = None, ) -> Iterable[dict]: # Logic for handling CSV rows with more values than there are headings for row in reader: @@ -231,6 +232,8 @@ def _extra_key_strategy( else: row[extras_key] = row.pop(None) # type: ignore yield row + if buffer: + buffer.close() def rows_from_file( @@ -299,7 +302,10 @@ class Format(enum.Enum): reader = csv.DictReader(decoded_fp, dialect=dialect) else: reader = csv.DictReader(decoded_fp) - return _extra_key_strategy(reader, ignore_extras, extras_key), Format.CSV + return ( + _extra_key_strategy(reader, ignore_extras, extras_key, decoded_fp), + Format.CSV, + ) elif format == Format.TSV: rows = rows_from_file( fp, format=Format.CSV, dialect=csv.excel_tab, encoding=encoding From 6e342b89eed2b25e8fd73120df152708863463d8 Mon Sep 17 00:00:00 2001 From: lbellomo Date: Sun, 31 Mar 2024 19:07:52 -0300 Subject: [PATCH 3/4] make sure to close fp in case of UnicodeDecodeError --- sqlite_utils/cli.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index 5678d3f7..17b7866a 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -1949,12 +1949,16 @@ def memory( fp = file_path.open("rb") rows, format_used = rows_from_file(fp, format=format, encoding=encoding) tracker = None - if format_used in (Format.CSV, Format.TSV) and not no_detect_types: - tracker = TypeTracker() - rows = tracker.wrap(rows) - if flatten: - rows = (_flatten(row) for row in rows) - db[file_table].insert_all(rows, alter=True) + try: + if format_used in (Format.CSV, Format.TSV) and not no_detect_types: + tracker = TypeTracker() + rows = tracker.wrap(rows) + if flatten: + rows = (_flatten(row) for row in rows) + db[file_table].insert_all(rows, alter=True) + except UnicodeDecodeError as e: + fp.close() + raise e if tracker is not None: db[file_table].transform(types=tracker.types) # Add convenient t / t1 / t2 views From 478372e4a8dbe3c84d8649c393f2e40d6f095b2b Mon Sep 17 00:00:00 2001 From: lbellomo Date: Sun, 31 Mar 2024 19:39:46 -0300 Subject: [PATCH 4/4] change deprecated datetime.utcfromtimestamp to datetime.fromtimestamp --- sqlite_utils/cli.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sqlite_utils/cli.py b/sqlite_utils/cli.py index 17b7866a..5c772c88 100644 --- a/sqlite_utils/cli.py +++ b/sqlite_utils/cli.py @@ -1,7 +1,7 @@ import base64 import click from click_default_group import DefaultGroup # type: ignore -from datetime import datetime +from datetime import datetime, timezone import hashlib import pathlib from runpy import run_module @@ -3200,8 +3200,12 @@ def __init__(self, exception, path): "ctime": lambda p: p.stat().st_ctime, "mtime_int": lambda p: int(p.stat().st_mtime), "ctime_int": lambda p: int(p.stat().st_ctime), - "mtime_iso": lambda p: datetime.utcfromtimestamp(p.stat().st_mtime).isoformat(), - "ctime_iso": lambda p: datetime.utcfromtimestamp(p.stat().st_ctime).isoformat(), + "mtime_iso": lambda p: datetime.fromtimestamp(p.stat().st_mtime, timezone.utc) + .isoformat() + .rstrip("+00:00"), + "ctime_iso": lambda p: datetime.fromtimestamp(p.stat().st_mtime, timezone.utc) + .isoformat() + .rstrip("+00:00"), "size": lambda p: p.stat().st_size, "stem": lambda p: p.stem, "suffix": lambda p: p.suffix,