Skip to content

Commit b0b1cb1

Browse files
authored
feat: Using opensearch instead of elasticsearch (#116)
* feat: add AUDIT log level for upload * chore: update outdated tests * fix: allow empty str as RESULT_PATH_PREFIX & replace w/ default val * fix: allowing optional original stac item * fix: using opensearch * fix: remove unused class * fix: backward compatible * fix: use_ssl option re-added * fix: replace lsmd pointer with uds pointer
1 parent 348f7d3 commit b0b1cb1

File tree

8 files changed

+158
-70
lines changed

8 files changed

+158
-70
lines changed

mdps_ds_lib/lib/aws/es_abstract.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,9 @@
55

66

77
class ESAbstract(ABC):
8+
89
@abstractmethod
9-
def migrate_index_data(self, old_index, new_index):
10+
def migrate_index_data(self, old_index, new_index, remove_old_data=True):
1011
return
1112
@abstractmethod
1213
def create_index(self, index_name, index_body):
@@ -62,11 +63,11 @@ def query_with_scroll(self, dsl, querying_index=None):
6263
return
6364

6465
@abstractmethod
65-
def query(self, dsl, querying_index=None):
66+
def delete_by_query(self, dsl, querying_index=None):
6667
return
6768

6869
@abstractmethod
69-
def delete_by_query(self, dsl, querying_index=None):
70+
def query(self, dsl, querying_index=None):
7071
return
7172

7273
@abstractmethod

mdps_ds_lib/lib/aws/es_factory.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,20 @@
44
class ESFactory(FactoryAbstract):
55
NO_AUTH = 'NO_AUTH'
66
AWS = 'AWS'
7+
AWS_ES = 'AWS_ES'
8+
9+
def get_instance_from_env(self, **kwargs):
10+
raise NotImplementedError(f'not yet')
711

812
def get_instance(self, class_type, **kwargs):
9-
if 'use_ssl' not in kwargs:
10-
kwargs['use_ssl'] = True
1113
ct = class_type.upper()
1214
if ct == self.NO_AUTH:
13-
from mdps_ds_lib.lib.aws.es_middleware import ESMiddleware
14-
return ESMiddleware(kwargs['index'], kwargs['base_url'], port=kwargs['port'], use_ssl=kwargs['use_ssl'])
15+
from mdps_ds_lib.lib.aws.es_middleware_no_auth import ESMiddlewareNoAuth
16+
return ESMiddlewareNoAuth(kwargs['index'], kwargs['base_url'], port=kwargs['port'], use_ssl=kwargs['use_ssl'])
1517
if ct == self.AWS:
18+
from mdps_ds_lib.lib.aws.os_middleware_aws import OsMiddlewareAws
19+
return OsMiddlewareAws(kwargs['index'], kwargs['base_url'], port=kwargs['port'], use_ssl=kwargs['use_ssl'])
20+
if ct == self.AWS_ES:
1621
from mdps_ds_lib.lib.aws.es_middleware_aws import EsMiddlewareAws
1722
return EsMiddlewareAws(kwargs['index'], kwargs['base_url'], port=kwargs['port'], use_ssl=kwargs['use_ssl'])
1823
raise ModuleNotFoundError(f'cannot find ES class for {ct}')

mdps_ds_lib/lib/aws/es_middleware.py renamed to mdps_ds_lib/lib/aws/es_middleware_abstract.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,15 @@
11
import logging
2-
3-
from elasticsearch import Elasticsearch
4-
from elasticsearch.exceptions import NotFoundError
5-
62
from mdps_ds_lib.lib.aws.es_abstract import ESAbstract, DEFAULT_TYPE
73

84
LOGGER = logging.getLogger(__name__)
95

106

11-
class ESMiddleware(ESAbstract):
12-
7+
class ESMiddlewareAbstract(ESAbstract):
138
def __init__(self, index, base_url, port=443, use_ssl=True) -> None:
149
if any([k is None for k in [index, base_url]]):
1510
raise ValueError(f'index or base_url is None')
1611
self.__index = index
17-
base_url = base_url.replace('https://', '') # hide https
18-
# https://elasticsearch-py.readthedocs.io/en/v7.13.4/api.html#elasticsearch.Elasticsearch
19-
self._engine = Elasticsearch(hosts=[{'host': base_url, 'port': port, 'use_ssl': use_ssl}])
12+
self._engine = None
2013

2114
def __validate_index(self, index):
2215
if index is not None:
@@ -73,9 +66,11 @@ def create_index(self, index_name, index_body):
7366
def get_index_mapping(self, index_name):
7467
try:
7568
result = self._engine.indices.get_mapping(index=index_name)
76-
except NotFoundError as e:
77-
return None
78-
return result
69+
except Exception as e:
70+
if e.error == 'index_not_found_exception':
71+
return None
72+
raise e
73+
return result[index_name]
7974

8075
def has_index(self, index_name):
8176
result = self._engine.indices.exists(index=index_name)
@@ -84,18 +79,21 @@ def has_index(self, index_name):
8479
def swap_index_for_alias(self, alias_name, old_index_name, new_index_name):
8580
try:
8681
temp_result = self._engine.indices.delete_alias(index=old_index_name, name=alias_name)
87-
except NotFoundError as ee:
88-
LOGGER.exception(f'error while unlinking {old_index_name} from {alias_name}')
89-
temp_result = {}
82+
except Exception as ee:
83+
if 'NotFoundError' in str(ee):
84+
return {}
85+
raise ee
9086
result = self.create_alias(new_index_name, alias_name)
9187
return result
9288

9389
def get_alias(self, alias_name):
9490
# /Users/wphyo/anaconda3/envs/cumulus_py_3.9/lib/python3.9/site-packages/elasticsearch-7.13.4-py3.9.egg/elasticsearch/client/indices.py
9591
try:
9692
result = self._engine.indices.get_alias(name=alias_name)
97-
except NotFoundError as ee:
98-
return {}
93+
except Exception as ee:
94+
if 'NotFoundError' in str(ee):
95+
return {}
96+
raise ee
9997
return result
10098

10199
def create_alias(self, index_name, alias_name):
@@ -124,7 +122,7 @@ def index_many(self, docs=None, doc_ids=None, doc_dict=None, index=None):
124122
body=body, doc_type=DEFAULT_TYPE)
125123
LOGGER.info('indexed. result: {}'.format(index_result))
126124
return self.__check_errors_for_bulk(index_result)
127-
except:
125+
except Exception as e:
128126
LOGGER.exception('cannot add indices with ids: {} for index: {}'.format(list(doc_dict.keys()), index))
129127
return doc_dict
130128
return
@@ -136,9 +134,9 @@ def index_one(self, doc, doc_id, index=None):
136134
body=doc, doc_type=DEFAULT_TYPE, id=doc_id)
137135
LOGGER.info('indexed. result: {}'.format(index_result))
138136
pass
139-
except Exception as e:
137+
except:
140138
LOGGER.exception('cannot add a new index with id: {} for index: {}'.format(doc_id, index))
141-
raise e
139+
return None
142140
return self
143141

144142
def update_many(self, docs=None, doc_ids=None, doc_dict=None, index=None):
@@ -208,13 +206,13 @@ def query_with_scroll(self, dsl, querying_index=None):
208206
first_batch['hits']['hits'].extend(scrolled_result['hits']['hits'])
209207
return first_batch
210208

211-
def query(self, dsl, querying_index=None):
209+
def delete_by_query(self, dsl, querying_index=None):
212210
index = self.__validate_index(querying_index)
213-
return self._engine.search(body=dsl, index=index)
211+
return self._engine.delete_by_query(body=dsl, index=index, conflicts='proceed', request_timeout=120)
214212

215-
def delete_by_query(self, dsl, querying_index=None):
213+
def query(self, dsl, querying_index=None):
216214
index = self.__validate_index(querying_index)
217-
return self._engine.delete_by_query(body=dsl, index=index)
215+
return self._engine.search(body=dsl, index=index)
218216

219217
def __is_querying_next_page(self, targeted_size: int, current_size: int, total_size: int):
220218
if targeted_size < 0:
Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,17 @@
11
import logging
22

3-
from requests_aws4auth import AWS4Auth
4-
53
from mdps_ds_lib.lib.aws.aws_cred import AwsCred
6-
from mdps_ds_lib.lib.aws.es_middleware import ESMiddleware
74
from elasticsearch import Elasticsearch, RequestsHttpConnection
5+
from requests_aws4auth import AWS4Auth
6+
from mdps_ds_lib.lib.aws.es_middleware_abstract import ESMiddlewareAbstract
87

98
LOGGER = logging.getLogger(__name__)
109

1110

12-
class EsMiddlewareAws(ESMiddleware):
11+
class EsMiddlewareAws(ESMiddlewareAbstract):
1312

1413
def __init__(self, index, base_url, port=443, use_ssl=True) -> None:
15-
super().__init__(index, base_url, port, use_ssl)
14+
super().__init__(index, base_url, port)
1615
base_url = base_url.replace('https://', '') # hide https
1716
self._index = index
1817
aws_cred = AwsCred()
@@ -23,7 +22,7 @@ def __init__(self, index, base_url, port=443, use_ssl=True) -> None:
2322
self._engine = Elasticsearch(
2423
hosts=[{'host': base_url, 'port': port}],
2524
http_auth=aws_auth,
26-
use_ssl=True,
25+
use_ssl=use_ssl,
2726
verify_certs=True,
2827
connection_class=RequestsHttpConnection
2928
)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import logging
2+
3+
from elasticsearch import Elasticsearch
4+
from mdps_ds_lib.lib.aws.es_middleware_abstract import ESMiddlewareAbstract
5+
6+
LOGGER = logging.getLogger(__name__)
7+
8+
9+
class ESMiddlewareNoAuth(ESMiddlewareAbstract):
10+
11+
def __init__(self, index, base_url, port=443, use_ssl=True) -> None:
12+
super().__init__(index, base_url, port)
13+
base_url = base_url.replace('https://', '') # hide https
14+
self._engine = Elasticsearch(hosts=[{'host': base_url, 'port': port}])
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import logging
2+
3+
from mdps_ds_lib.lib.aws.aws_cred import AwsCred
4+
from opensearchpy import AWSV4SignerAuth, OpenSearch, RequestsHttpConnection
5+
from mdps_ds_lib.lib.aws.es_middleware_abstract import ESMiddlewareAbstract
6+
7+
LOGGER = logging.getLogger(__name__)
8+
9+
10+
class OsMiddlewareAws(ESMiddlewareAbstract):
11+
12+
def __init__(self, index, base_url, port=443, use_ssl=True) -> None:
13+
super().__init__(index, base_url, port)
14+
base_url = base_url.replace('https://', '') # hide https
15+
self._index = index
16+
aws_cred = AwsCred()
17+
service = 'es'
18+
credentials = aws_cred.get_session().get_credentials()
19+
# https://opensearch.org/blog/aws-sigv4-support-for-clients/
20+
# This works
21+
auth = AWSV4SignerAuth(credentials, aws_cred.region)
22+
23+
self._engine = OpenSearch(
24+
hosts=[{'host': base_url, 'port': port}],
25+
http_auth=auth,
26+
use_ssl=use_ssl,
27+
verify_certs=True,
28+
connection_class=RequestsHttpConnection
29+
)

0 commit comments

Comments
 (0)