Skip to content

Commit 075defc

Browse files
authored
fix: case insensitivity (#106)
* feat: add AUDIT log level for upload * chore: update outdated tests * fix: allow empty str as RESULT_PATH_PREFIX & replace w/ default val * fix: allowing optional original stac item * feat: add catalog endpoint + manual test for case insensitive * fix: ancillary upload needs valid bbox * fix: ancillary upload bugfix * fix: <URN>:<ORG>:<Project>:<Tenant>:<Venue> are always upper case * fix: update tests
1 parent ec8ba60 commit 075defc

File tree

5 files changed

+119
-39
lines changed

5 files changed

+119
-39
lines changed

mdps_ds_lib/ds_client/ds_client_user.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,17 @@ def query_collections(self, limit=10):
9393

9494
return response
9595

96+
def query_catalog(self):
97+
request_url = f'{self._uds_url}catalog/'
98+
s = requests.session()
99+
s.trust_env = self._trust_env
100+
response = s.get(url=request_url, headers={
101+
'Authorization': f'Bearer {self._token_retriever.get_token()}',
102+
}, verify=self._trust_env)
103+
response.raise_for_status()
104+
response = json.loads(response.text)
105+
return response
106+
96107
def query_collections_next(self):
97108
if self.__collection_query_next_page is None:
98109
return None

mdps_ds_lib/lib/cumulus_stac/granules_catalog.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,31 @@
99

1010

1111
class GranulesCatalog:
12+
13+
@staticmethod
14+
def standardize_stage_out_collection_id_format(current_collection_id: str):
15+
collection_identifier_parts = current_collection_id.split(':')
16+
if len(collection_identifier_parts) < 6:
17+
raise ValueError(
18+
f'invalid collection ID. Need to be in <URN>:<ORG>:<Project>:<Tenant>:<Venue>:<Collection ID> but it is {current_collection_id}')
19+
for i in range(5):
20+
collection_identifier_parts[i] = collection_identifier_parts[i].upper()
21+
current_collection_id = ':'.join(collection_identifier_parts)
22+
current_collection_id = f'{current_collection_id}___001' if '___' not in current_collection_id else current_collection_id
23+
return current_collection_id
24+
1225
@staticmethod
1326
def get_unity_formatted_collection_id(current_collection_id: str, project_venue_set: tuple):
1427
if current_collection_id == '' or current_collection_id is None:
1528
raise ValueError(f'NULL or EMPTY collection_id: {current_collection_id}')
1629
collection_identifier_parts = current_collection_id.split(':')
1730
if len(collection_identifier_parts) >= 6:
1831
LOGGER.debug(f'current_collection_id is assumed to be in UNITY format: {current_collection_id}')
19-
current_collection_id = f'{current_collection_id}___001' if '___' not in current_collection_id else current_collection_id
20-
return current_collection_id
21-
32+
return GranulesCatalog.standardize_stage_out_collection_id_format(current_collection_id)
2233
LOGGER.info(f'current_collection_id is not UNITY formatted ID: {current_collection_id}')
2334
if project_venue_set[0] is None or project_venue_set[1] is None:
2435
raise ValueError(f'missing project or venue in ENV which is needed due to current_collection_id not UNITY format: {project_venue_set}')
25-
new_collection = f'URN:NASA:UNITY:{project_venue_set[0]}:{project_venue_set[1]}:{current_collection_id}'
36+
new_collection = f'URN:NASA:UNITY:{project_venue_set[0].upper()}:{project_venue_set[1].upper()}:{current_collection_id}'
2637
new_collection = f'{new_collection}___001' if '___' not in new_collection else new_collection
2738
LOGGER.info(f'UNITY formatted ID: {new_collection}')
2839
return new_collection

mdps_ds_lib/stage_in_out/upload_arbitrary_files_as_granules.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ def generate_sample_stac(self, filepath: str):
5151
"type": "Point",
5252
"coordinates": [0.0, 0.0]
5353
},
54-
bbox=[0.0, 0.0, 0.0, 0.0],
54+
bbox=[-180, -90, 180, 90],
5555
datetime=TimeUtils().parse_from_unix(0, True).get_datetime_obj(),
5656
properties={
5757
"start_datetime": TimeUtils.get_current_time(),
@@ -84,10 +84,11 @@ def execute_job(self, job_obj, lock) -> bool:
8484
s3_url = self.__s3.upload(job_obj, self.__staging_bucket, f'{self.__collection_id}/{sample_stac_item.id}', self.__delete_files)
8585
updating_assets[os.path.basename(s3_url)] = s3_url
8686
uploading_current_granule_stac = f'{s3_url}.stac.json'
87-
self.__s3.set_s3_url(uploading_current_granule_stac)
88-
self.__s3.upload_bytes(json.dumps(sample_stac_item.to_dict(False, False),indent=4).encode())
8987
updating_assets[os.path.basename(uploading_current_granule_stac)] = uploading_current_granule_stac
9088
self.__gc.update_assets_href(sample_stac_item, updating_assets)
89+
90+
self.__s3.set_s3_url(uploading_current_granule_stac)
91+
self.__s3.upload_bytes(json.dumps(sample_stac_item.to_dict(False, False),indent=4).encode())
9192
self.__result_list.put(sample_stac_item.to_dict(False, False))
9293
except Exception as e:
9394
sample_stac_item.properties['upload_error'] = str(e)
@@ -116,6 +117,7 @@ def upload(self, **kwargs) -> str:
116117
self._set_props_from_env()
117118
if self._collection_id is None:
118119
raise ValueError(f'missing COLLECTION ID in ENV')
120+
self._collection_id = GranulesCatalog.standardize_stage_out_collection_id_format(self._collection_id)
119121
output_dir = os.environ.get(self.OUTPUT_DIRECTORY)
120122
if not FileUtils.dir_exist(output_dir):
121123
raise ValueError(f'OUTPUT_DIRECTORY: {output_dir} does not exist')

0 commit comments

Comments
 (0)