Skip to content
This repository was archived by the owner on Apr 28, 2023. It is now read-only.

Commit 568cab7

Browse files
serjoshuaJosh Lagrimas
andauthored
issue-550: fix duplicate Individuals issue (#574)
* issue-550: fix indexing duplicate Individuals fix for #550 * issue-550: display anonymous type for individuals related fix for #550 * issue-550: fix indexing duplication for related Individuals fix for #550 * issue-550: rename anonymous type key related fix for #550 * issue-550: fix indexing duplicate Individuals fix for #550 * issue-550: display anonymous type for individuals related fix for #550 * issue-550: fix indexing duplication for related Individuals fix for #550 * issue-550: rename anonymous type key related fix for #550 Co-authored-by: Josh Lagrimas <[email protected]>
1 parent 7d7456f commit 568cab7

File tree

9 files changed

+952
-909
lines changed

9 files changed

+952
-909
lines changed

ols-neo4j/src/main/java/uk/ac/ebi/spot/ols/loader/BatchNeo4JIndexer.java

Lines changed: 36 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,6 @@
11
package uk.ac.ebi.spot.ols.loader;
22

3-
import static uk.ac.ebi.spot.ols.loader.Neo4JIndexerConstants.*;
4-
import static uk.ac.ebi.spot.ols.config.OntologyDefaults.THING;
5-
6-
import java.util.Arrays;
7-
import java.util.Collection;
8-
import java.util.Collections;
9-
import java.util.HashMap;
10-
import java.util.LinkedList;
11-
import java.util.Map;
12-
import java.util.concurrent.TimeUnit;
13-
14-
import org.neo4j.graphdb.DynamicLabel;
15-
import org.neo4j.graphdb.GraphDatabaseService;
16-
import org.neo4j.graphdb.Label;
17-
import org.neo4j.graphdb.Result;
18-
import org.neo4j.graphdb.Transaction;
3+
import org.neo4j.graphdb.*;
194
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
205
import org.neo4j.graphdb.index.IndexHits;
216
import org.neo4j.graphdb.schema.IndexDefinition;
@@ -30,12 +15,17 @@
3015
import org.slf4j.LoggerFactory;
3116
import org.springframework.beans.factory.annotation.Autowired;
3217
import org.springframework.stereotype.Component;
33-
3418
import uk.ac.ebi.spot.ols.config.OlsNeo4jConfiguration;
3519
import uk.ac.ebi.spot.ols.exception.IndexingException;
3620
import uk.ac.ebi.spot.ols.model.OntologyIndexer;
3721
import uk.ac.ebi.spot.ols.util.LocalizedStrings;
3822

23+
import java.util.*;
24+
import java.util.concurrent.TimeUnit;
25+
26+
import static uk.ac.ebi.spot.ols.config.OntologyDefaults.THING;
27+
import static uk.ac.ebi.spot.ols.loader.Neo4JIndexerConstants.*;
28+
3929
/**
4030
* @author Simon Jupp
4131
* @date 17/06/2015
@@ -110,7 +100,7 @@ private Long getOrCreateMergedNode(BatchInserter inserter, Map<String, Long> mer
110100
}
111101
}
112102

113-
properties.put("label", labels.getFirstString("en"));
103+
properties.put("label", labels.getFirstString("en"));
114104

115105
Long hit = inserter.createNode(properties, nodeLabel);
116106
index.add(hit, properties);
@@ -220,6 +210,11 @@ private void indexIndividuals(BatchInserter inserter, OntologyLoader loader,
220210

221211
for (IRI individualIri : loader.getAllIndividualIRIs()) {
222212

213+
// avoid duplicating individuals already related to a class
214+
if (classNodeMap.containsKey(individualIri.toString())) {
215+
nodeMap.put(individualIri.toString(), classNodeMap.get(individualIri.toString()));
216+
}
217+
223218
Long node = NodeCreator.getOrCreateNode(inserter, nodeMap, loader, individualIri,
224219
new LinkedList<Label>(Arrays.asList(instanceLabel, _instanceLabel,
225220
nodeOntologyLabel)));
@@ -250,14 +245,19 @@ private void indexIndividuals(BatchInserter inserter, OntologyLoader loader,
250245
}
251246

252247
// add relations
253-
indexRelations(node, loader.getRelatedIndividuals(individualIri),
254-
inserter, loader, nodeMap,
255-
new LinkedList<Label>(Arrays.asList(instanceLabel, nodeOntologyLabel, _instanceLabel)));
256-
257248
indexRelations(node, loader.getRelatedClassesToIndividual(individualIri),
258249
inserter, loader, classNodeMap,
259250
new LinkedList<Label>(Arrays.asList(nodeLabel, nodeOntologyLabel, _nodeLabel)));
260251
}
252+
253+
// add related individuals only after indexing all individuals to avoid duplication
254+
for (IRI individualIri : loader.getAllIndividualIRIs()) {
255+
if (loader.getRelatedIndividuals(individualIri).size() > 0) {
256+
indexRelations(nodeMap.get(individualIri.toString()), loader.getRelatedIndividuals(individualIri),
257+
inserter, loader, nodeMap,
258+
new LinkedList<Label>(Arrays.asList(instanceLabel, nodeOntologyLabel, _instanceLabel)));
259+
}
260+
}
261261
}
262262

263263
private void indexRelations(Long node, Map<IRI, Collection<IRI>> relatedIndividuals,
@@ -429,7 +429,7 @@ private static void addLocalizedProperties(
429429

430430
properties.put(propertyName, localizedStrings.getFirstString("en"));
431431

432-
for(String language : localizedStrings.getLanguages()) {
432+
for (String language : localizedStrings.getLanguages()) {
433433
properties.put(propertyName + "_" + language, localizedStrings.getFirstString(language));
434434
}
435435
}
@@ -449,7 +449,7 @@ public void createIndex(Collection<OntologyLoader> loaders) throws IndexingExcep
449449
for (OntologyLoader loader : loaders) {
450450

451451
BatchInserter inserter = getBatchIndexer(loader.getOntologyName());
452-
452+
453453
setOntologyLabel(loader.getOntologyName());
454454
// index classes
455455
indexClasses(inserter, loader, classNodeMap, mergedNodeMap);
@@ -492,27 +492,24 @@ public void createIndex(Collection<OntologyLoader> loaders) throws IndexingExcep
492492
} catch (IllegalStateException e) {
493493
throw new IndexingException("Building Neo4j index failed as the schema index didn't finish in time", e);
494494
}
495-
}
496-
else if (state.equals(Schema.IndexState.FAILED)) {
495+
} else if (state.equals(Schema.IndexState.FAILED)) {
497496
throw new Exception("Index failed: " + indexDefinition.getLabel().name());
498497
}
499498
}
500499

501500
tx.success();
502-
}
503-
catch (Exception e) {
504-
logger.debug(e.getMessage(), e);
501+
} catch (Exception e) {
502+
logger.debug(e.getMessage(), e);
505503
tx.failure();
506504
throw new IndexingException("Building Neo4j index failed as the schema index creation failed", e);
507-
}
508-
finally {
505+
} finally {
509506
tx.close();
510507
db.shutdown();
511508
}
512509
}
513510

514-
protected GraphDatabaseService getGraphDatabase () {
515-
return new GraphDatabaseFactory().newEmbeddedDatabase(neo4jConfiguration.getNeo4JPath());
511+
protected GraphDatabaseService getGraphDatabase() {
512+
return new GraphDatabaseFactory().newEmbeddedDatabase(neo4jConfiguration.getNeo4JPath());
516513
}
517514

518515
public void dropIndex(OntologyLoader loader) throws IndexingException {
@@ -538,14 +535,14 @@ private void deleteNodes(String ontologyName) {
538535
int count = getNodeCount(
539536
"match (n:" + ontologyName.toUpperCase() + ")-[r]->() return count(r) as count", ontologyName);
540537

541-
for (int x = 0; x < count ; x +=DELETE_SIZE) {
538+
for (int x = 0; x < count; x += DELETE_SIZE) {
542539

543540
Transaction tx = db.beginTx();
544541

545542
try {
546543
String cypherDelete =
547-
"match (n:" + ontologyName.toUpperCase() + ")-[r]->() with r limit " +
548-
DELETE_SIZE + " delete r";
544+
"match (n:" + ontologyName.toUpperCase() + ")-[r]->() with r limit " +
545+
DELETE_SIZE + " delete r";
549546
getLogger().info("executing delete: " + cypherDelete);
550547
Result result = db.execute(cypherDelete);
551548
getLogger().info(result.resultAsString());
@@ -561,7 +558,7 @@ private void deleteNodes(String ontologyName) {
561558
count = getNodeCount(
562559
"match (n:" + ontologyName.toUpperCase() + ") return count(n) as count", ontologyName
563560
);
564-
for (int x = 0; x < count ; x +=DELETE_SIZE) {
561+
for (int x = 0; x < count; x += DELETE_SIZE) {
565562

566563
Transaction tx = db.beginTx();
567564

@@ -593,12 +590,10 @@ private int getNodeCount(String nodeCountCypher, String ontologyName) {
593590
count = (Long) result.next().get("count");
594591
getLogger().debug("query count " + count);
595592
tx.success();
596-
}
597-
catch (Exception e) {
593+
} catch (Exception e) {
598594
tx.failure();
599595
throw new IndexingException("Couldn't count: " + ontologyName, e);
600-
}
601-
finally {
596+
} finally {
602597
tx.close();
603598
}
604599
return count.intValue();

0 commit comments

Comments
 (0)