Skip to content

Commit a068aa1

Browse files
committed
perf(meta_process): parallelize SPARQL uploads and update oc-ocdm
- parallelize data and provenance SPARQL uploads using multiprocessing fork - update oc-ocdm to 9.4.0 which uses set difference instead of graph isomorphism (no blank nodes in data model) - fix plotting to handle null duration values
1 parent 5a02db1 commit a068aa1

File tree

7 files changed

+287
-1897
lines changed

7 files changed

+287
-1897
lines changed

oc_meta/run/benchmark/benchmark_config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ cache_db: 11
8080

8181
# Virtuoso bulk load settings
8282
virtuoso_bulk_load:
83-
enabled: True
83+
enabled: false
8484
data_container: oc-meta-test-virtuoso
8585
prov_container: oc-meta-test-virtuoso-prov
8686
bulk_load_dir: /database/bulk_load

oc_meta/run/benchmark/plotting.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ def get_phase_duration_by_name(run: Dict[str, Any], phase_name: str) -> float:
6666
"""Get phase duration by name instead of index."""
6767
for phase in run["phases"]:
6868
if phase["name"] == phase_name:
69-
return phase["duration_seconds"]
69+
return phase["duration_seconds"] or 0
7070
return 0
7171

7272

@@ -75,7 +75,7 @@ def get_curation_total(run: Dict[str, Any]) -> float:
7575
total = 0
7676
for phase in run["phases"]:
7777
if phase["name"].startswith("curation__"):
78-
total += phase["duration_seconds"]
78+
total += phase["duration_seconds"] or 0
7979
return total
8080

8181

@@ -356,7 +356,7 @@ def _get_phase_duration_from_report(report: Dict[str, Any], phase_name: str) ->
356356
"""Get phase duration from a report dict by phase name."""
357357
for phase in report.get("phases", []):
358358
if phase["name"] == phase_name:
359-
return phase["duration_seconds"]
359+
return phase["duration_seconds"] or 0.0
360360
return 0.0
361361

362362

0 commit comments

Comments
 (0)