diff --git a/aodn_cloud_optimised/config/dataset/nrmn_m13_survey.json b/aodn_cloud_optimised/config/dataset/nrmn_m13_survey.json new file mode 100644 index 00000000..2b24072b --- /dev/null +++ b/aodn_cloud_optimised/config/dataset/nrmn_m13_survey.json @@ -0,0 +1,249 @@ +{ + "dataset_name": "nrmn_m13_survey_qc", + "logger_name": "nrmn_m13_survey_qc", + "cloud_optimised_format": "parquet", + "gattrs_to_variables": [], + "partition_keys": [ + "timestamp", + "polygon" + ], + "time_extent": { + "time": "date", + "partition_timestamp_period": "Y" + }, + "spatial_extent": { + "lat": "lat", + "lon": "lon", + "spatial_resolution": 5 + }, + "metadata_uuid": "", + "pandas_read_csv_config": { + "delimiter": ",", + "header": 0, + "index_col": null, + "na_values": null, + "encoding": "utf-8", + "parse_dates": [ + "date" + ], + "date_format": "ISO8601", + "low_memory": false + }, + "schema": { + "index": { + "type": "int64", + "nullable": false + }, + "survey_id": { + "name": "survey_id", + "type": "int64", + "nullable": false, + "long_name": "Survey Identifier" + }, + "dataset_id": { + "type": "string", + "nullable": false, + "long_name": "Dataset Identifier" + }, + "country": { + "type": "string", + "nullable": true, + "long_name": "Country of the survey" + }, + "area": { + "type": "string", + "nullable": true, + "long_name": "Area of the survey" + }, + "location": { + "type": "string", + "nullable": true, + "long_name": "Location of the survey" + }, + "site_code": { + "type": "string", + "nullable": true, + "long_name": "NRMN site Code identifier" + }, + "site_name": { + "type": "string", + "nullable": true, + "long_name": "NRMN Site Name identifier" + }, + "date": { + "type": "string", + "nullable": false, + "long_name": "Survey Date" + }, + "lat": { + "type": "float", + "nullable": false, + "long_name": "Latitude of the survey site", + "standard_name": "latitude", + "units": "degree_north" + }, + "lon": { + "type": "float", + "nullable": false, + "long_name": "Longitude of the survey site", + "standard_name": "longitude", + "units": "degree_east" + }, + "depth": { + "type": "float", + "nullable": false, + "description": "Depth below sea level", + "standard_name": "sea_floor_depth_below_sea_level", + "units": "m" + }, + "program": { + "type": "string", + "nullable": false, + "long_name": "Program Name" + }, + "visibility": { + "type": "float", + "nullable": true, + "long_name": "Visibility during survey", + "units": "meters" + }, + "original_label_scheme": { + "type": "string", + "nullable": false, + "description": "The original labeling scheme before translation to RLS and Catami", + "long_name": "Original Labeling Scheme" + }, + "rls_category": { + "type": "string", + "nullable": false, + "long_name": "RLS Category" + }, + "rls_lineage": { + "type": "string", + "nullable": false, + "long_name": "RLS Lineage", + "standard_name": "rls_lineage" + }, + "catami_category": { + "type": "string", + "nullable": false, + "long_name": "CATAMI Category" + }, + "catami_lineage": { + "type": "string", + "nullable": false, + "long_name": "CATAMI Lineage" + }, + "dead": { + "type": "bool", + "nullable": false, + "long_name": "Is the organism dead" + }, + "bleached": { + "type": "bool", + "nullable": false, + "long_name": "Is the organism bleached" + }, + "category_annotation_count": { + "type": "int64", + "nullable": true, + "long_name": "Count of organisms", + "standard_name": "count" + }, + "survey_annotation_count": { + "type": "int64", + "nullable": true, + "long_name": "Label Count" + }, + "coverage": { + "type": "float", + "nullable": false, + "long_name": "Coverage Percentage", + "units": "%" + }, + "source": { + "type": "string", + "nullable": false, + "description": "The data source of the row. Can be SQ+ or NRMN", + "long_name": "Data Source" + }, + "deployment_id": { + "type": "float", + "nullable": true, + "long_name": "Deployment Identifier" + }, + "campaign_id": { + "type": "float", + "nullable": true, + "long_name": "Campaign Identifier" + }, + "annotation_method": { + "type": "string", + "nullable": true, + "description": "The annotation method applied to the survey", + "long_name": "Annotation Method" + }, + "sq_annotation_set_id": { + "type": "float", + "nullable": true, + "long_name": "SQ Annotation Set Identifier" + }, + "sq_annotation_set_owner": { + "type": "string", + "nullable": true, + "long_name": "SQ Annotation Set Owner" + }, + "filename": { + "type": "string" + }, + "timestamp": { + "type": "int64" + }, + "polygon": { + "type": "string" + } + }, + "aws_opendata_registry": { + "Name": "test", + "Description": "test", + "Documentation": "test", + "Contact": "info@aodn.org.au", + "ManagedBy": "AODN", + "UpdateFrequency": "never", + "Tags": [ + "coral", + "macroalgae" + ], + "License": "http://creativecommons.org/licenses/by/4.0/", + "Resources": [ + { + "Description": "test", + "ARN": "test", + "Region": "test", + "Type": "S3 Bucket" + } + ], + "DataAtWork": { + "Tutorials": [] + }, + "Citation": "test" + }, + "run_settings": { + "batch_size": 1, + "cluster": { + "mode": "local", + "restart_every_path": false + }, + "paths": [ + { + "s3_uri": "s3://aodn-dataflow-dev/thomas.galindo/processing/stored/", + "filter": [ + "transformed_surveys.csv" + ] + } + ], + "clear_existing_data": true, + "raise_error": true, + "force_previous_parquet_deletion": true + } +}