|
1 | 1 | # ruff: noqa: TRY003, EM101 |
| 2 | +import json |
2 | 3 | import logging |
3 | 4 | from datetime import timedelta |
4 | 5 | from time import perf_counter |
5 | 6 |
|
6 | 7 | import rich_click as click |
7 | | - |
8 | 8 | from tim import errors, helpers |
9 | 9 | from tim import opensearch as tim_os |
10 | 10 | from tim.config import PRIMARY_ALIAS, VALID_SOURCES, configure_logger, configure_sentry |
| 11 | +from tim.errors import BulkIndexingError |
| 12 | +from timdex_dataset_api import TIMDEXDataset |
11 | 13 |
|
12 | 14 | logger = logging.getLogger(__name__) |
13 | 15 |
|
@@ -252,6 +254,7 @@ def promote(ctx: click.Context, index: str, alias: list[str]) -> None: |
252 | 254 | # Bulk record processing commands |
253 | 255 |
|
254 | 256 |
|
| 257 | +# NOTE: FEATURE FLAG: 'bulk_index' may be removed entirely when v2 work done |
255 | 258 | @main.command() |
256 | 259 | @click.option("-i", "--index", help="Name of the index to bulk index records into.") |
257 | 260 | @click.option( |
@@ -295,6 +298,7 @@ def bulk_index(ctx: click.Context, index: str, source: str, filepath: str) -> No |
295 | 298 | ) |
296 | 299 |
|
297 | 300 |
|
| 301 | +# NOTE: FEATURE FLAG: 'bulk_index' may be removed entirely when v2 work done |
298 | 302 | @main.command() |
299 | 303 | @click.option("-i", "--index", help="Name of the index to bulk delete records from.") |
300 | 304 | @click.option( |
@@ -334,3 +338,62 @@ def bulk_delete(ctx: click.Context, index: str, source: str, filepath: str) -> N |
334 | 338 | results["deleted"], |
335 | 339 | results["total"], |
336 | 340 | ) |
| 341 | + |
| 342 | + |
| 343 | +@main.command() |
| 344 | +@click.option("-i", "--index", help="Name of the index to bulk index records into.") |
| 345 | +@click.option( |
| 346 | + "-s", |
| 347 | + "--source", |
| 348 | + type=click.Choice(VALID_SOURCES), |
| 349 | + help="Source whose primary-aliased index to bulk index records into.", |
| 350 | +) |
| 351 | +@click.option("-d", "--run-date", help="Run date, formatted as YYYY-MM-DD.") |
| 352 | +@click.option("-rid", "--run-id", help="Run ID.") |
| 353 | +@click.argument("dataset_path", type=click.Path()) |
| 354 | +@click.pass_context |
| 355 | +def bulk_update( |
| 356 | + ctx: click.Context, |
| 357 | + index: str, |
| 358 | + source: str, |
| 359 | + run_date: str, |
| 360 | + run_id: str, |
| 361 | + dataset_path: str, |
| 362 | +): |
| 363 | + """Bulk update records for an index. |
| 364 | +
|
| 365 | + Must provide either the name of an existing index in the cluster or a valid source. |
| 366 | + If source is provided, it will perform indexing and/or deletion of records for |
| 367 | + the primary-aliased index for the source. |
| 368 | +
|
| 369 | + The method will read transformed records from a TIMDEXDataset |
| 370 | + located at dataset_path using the 'timdex-dataset-api' library. The dataset |
| 371 | + is filtered by run date and run ID. |
| 372 | +
|
| 373 | + Logs an error and aborts if the provided index doesn't exist in the cluster. |
| 374 | + """ |
| 375 | + client = ctx.obj["CLIENT"] |
| 376 | + index = helpers.validate_bulk_cli_options(index, source, client) |
| 377 | + |
| 378 | + logger.info(f"Bulk updating records from dataset '{dataset_path}' into '{index}'") |
| 379 | + |
| 380 | + index_results = {"created": 0, "updated": 0, "errors": 0, "total": 0} |
| 381 | + delete_results = {"deleted": 0, "errors": 0, "total": 0} |
| 382 | + |
| 383 | + td = TIMDEXDataset(location=dataset_path) |
| 384 | + td.load(run_date=run_date, run_id=run_id) |
| 385 | + |
| 386 | + # bulk index records |
| 387 | + records_to_index = td.read_transformed_records_iter(action="index") |
| 388 | + try: |
| 389 | + index_results.update(tim_os.bulk_index(client, index, records_to_index)) |
| 390 | + except BulkIndexingError as exception: |
| 391 | + logger.info(f"Bulk indexing failed: {exception}") |
| 392 | + |
| 393 | + # bulk delete records |
| 394 | + records_to_delete = td.read_transformed_records_iter(action="delete") |
| 395 | + delete_results.update(tim_os.bulk_delete(client, index, records_to_delete)) |
| 396 | + |
| 397 | + logger.info( |
| 398 | + f"Bulk update complete: {helpers.sum_bulk_update_results(index_results, delete_results)}" |
| 399 | + ) |
0 commit comments