Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 24 additions & 7 deletions src/retriever/data_tiers/tier_1/elasticsearch/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,11 @@ async def legacy_get_operations(
return operations, nodes

@override
async def get_metadata(self) -> dict[str, Any] | None:
async def get_metadata(self, bypass_cache: bool = False) -> dict[str, Any] | None:
return await get_t1_metadata(
es_connection=self.es_connection,
indices_alias=CONFIG.tier1.elasticsearch.index_name,
bypass_cache=bypass_cache,
)

@override
Expand All @@ -263,11 +264,11 @@ async def get_operations(
# return await self.legacy_get_metadata()
return await self.get_t1_operations()

async def get_t1_operations(
self,
) -> tuple[list[Operation], dict[BiolinkEntity, OperationNode]]:
"""Get tier1 operations based on metadata."""
metadata_blob = await self.get_metadata()
async def get_valid_metadata(
self, bypass_cache: bool = False
) -> tuple[dict[str, Any], list[str]]:
"""Get valid metadata and raise exception if failed."""
metadata_blob = await self.get_metadata(bypass_cache)

if metadata_blob is None:
raise ValueError(
Expand All @@ -280,8 +281,24 @@ async def get_t1_operations(

indices = await get_t1_indices(self.es_connection)

metadata_list = extract_metadata_entries_from_blob(metadata_blob, indices)
# ensure metadata matches indices
mismatched = any(metadata_blob.get(i) is None for i in indices)

if mismatched:
if not bypass_cache:
log.error("Possibly stale data got from cache. Refetching remotely.")
return await self.get_valid_metadata(bypass_cache=True)
else:
raise ValueError("Invalid metadata retrieved.")

return metadata_blob, indices

async def get_t1_operations(
self,
) -> tuple[list[Operation], dict[BiolinkEntity, OperationNode]]:
"""Get tier1 operations based on metadata."""
metadata_blob, indices = await self.get_valid_metadata()
metadata_list = extract_metadata_entries_from_blob(metadata_blob, indices)
operations, nodes = await generate_operations(metadata_list)

return operations, nodes
Expand Down
11 changes: 8 additions & 3 deletions src/retriever/data_tiers/tier_1/elasticsearch/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,13 @@ async def retrieve_metadata_from_es(


async def get_t1_metadata(
es_connection: AsyncElasticsearch | None, indices_alias: str, retries: int = 0
es_connection: AsyncElasticsearch | None,
indices_alias: str,
bypass_cache: bool,
retries: int = 0,
) -> T1MetaData | None:
"""Caller to orchestrate retrieving t1 metadata."""
meta_blob = await read_metadata_cache(CACHE_KEY)
meta_blob = None if bypass_cache else await read_metadata_cache(CACHE_KEY)
if not meta_blob:
try:
if es_connection is None:
Expand All @@ -133,7 +136,9 @@ async def get_t1_metadata(
"Invalid Elasticsearch connection"
):
return None
return await get_t1_metadata(es_connection, indices_alias, retries + 1)
return await get_t1_metadata(
es_connection, indices_alias, bypass_cache=True, retries=retries + 1
)

log.success("DINGO Metadata retrieved!")
return meta_blob
Expand Down
13 changes: 9 additions & 4 deletions tests/data_tiers/tier_1/elasticsearch_tests/test_tier1_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,12 @@ def mock_elasticsearch_config(monkeypatch: pytest.MonkeyPatch) -> Iterator[None]
@pytest.mark.parametrize(
"payload, expected",
[
(PAYLOAD_0, 1),
(PAYLOAD_0, 0),
(PAYLOAD_1, 4),
(PAYLOAD_2, 32),
(
[PAYLOAD_0, PAYLOAD_1, PAYLOAD_2],
[1, 4, 32]
[0, 4, 32]
)
],
ids=[
Expand All @@ -117,6 +117,11 @@ async def test_elasticsearch_driver(payload: ESPayload | list[ESPayload], expect
hits: list[ESEdge] | list[ESEdge] = await driver.run_query(payload)

def assert_single_result(res, expected_result_num: int):
if res is None:
if expected_result_num != 0:
raise AssertionError(f"Expected empty result, got {type(res)}")
else:
return
if not isinstance(res, list):
raise AssertionError(f"Expected results to be list, got {type(res)}")
if not len(res) == expected_result_num:
Expand Down Expand Up @@ -201,7 +206,7 @@ async def test_metadata_retrieval():
"qgraph, expected_hits",
[
(DINGO_QGRAPH, 8),
(ID_BYPASS_PAYLOAD, 6395), # <-- adjust to the real number
(ID_BYPASS_PAYLOAD, 6776), # <-- adjust to the real number
],
)
async def test_end_to_end(qgraph, expected_hits):
Expand Down Expand Up @@ -244,4 +249,4 @@ async def test_ubergraph_info_retrieval():
# print(k, v)

assert "mapping" in info
assert len(info["mapping"]) == 581143
assert len(info["mapping"]) == 122707