Skip to content

Commit a8b3ad7

Browse files
committed
Fix simulate pipeline API
The simulate pipeline API uses a special deep copy method to write out its content via toXContent - this method does not support writing out sets, resulting in an error as this processor writes out data using sets. This commit fixes this issue by replacing the set with a list, so that the serialization works again, at the small price of creating more objects when running. Closes #27
1 parent d3d4371 commit a8b3ad7

File tree

3 files changed

+78
-20
lines changed

3 files changed

+78
-20
lines changed

src/main/java/de/spinscale/elasticsearch/ingest/opennlp/OpenNlpProcessor.java

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@
2222
import org.elasticsearch.ingest.IngestDocument;
2323
import org.elasticsearch.ingest.Processor;
2424

25-
import java.io.IOException;
25+
import java.util.ArrayList;
2626
import java.util.HashMap;
2727
import java.util.HashSet;
28+
import java.util.Iterator;
2829
import java.util.List;
2930
import java.util.Map;
3031
import java.util.Set;
@@ -34,15 +35,14 @@
3435

3536
public class OpenNlpProcessor extends AbstractProcessor {
3637

37-
public static final String TYPE = "opennlp";
38+
static final String TYPE = "opennlp";
3839

3940
private final OpenNlpService openNlpService;
4041
private final String sourceField;
4142
private final String targetField;
4243
private final Set<String> fields;
4344

44-
OpenNlpProcessor(OpenNlpService openNlpService, String tag, String sourceField, String targetField, Set<String> fields) throws
45-
IOException {
45+
OpenNlpProcessor(OpenNlpService openNlpService, String tag, String sourceField, String targetField, Set<String> fields) {
4646
super(tag);
4747
this.openNlpService = openNlpService;
4848
this.sourceField = sourceField;
@@ -51,7 +51,7 @@ public class OpenNlpProcessor extends AbstractProcessor {
5151
}
5252

5353
@Override
54-
public void execute(IngestDocument ingestDocument) throws Exception {
54+
public void execute(IngestDocument ingestDocument) {
5555
String content = ingestDocument.getFieldValue(sourceField, String.class);
5656

5757
if (Strings.hasLength(content)) {
@@ -63,7 +63,15 @@ public void execute(IngestDocument ingestDocument) throws Exception {
6363
merge(entities, field, data);
6464
}
6565

66-
ingestDocument.setFieldValue(targetField, entities);
66+
// convert set to list, otherwise toXContent serialization in simulate pipeline fails
67+
Map<String, List<String>> entitiesToStore = new HashMap<>();
68+
Iterator<Map.Entry<String, Set<String>>> iterator = entities.entrySet().iterator();
69+
while (iterator.hasNext()) {
70+
Map.Entry<String, Set<String>> entry = iterator.next();
71+
entitiesToStore.put(entry.getKey(), new ArrayList<>(entry.getValue()));
72+
}
73+
74+
ingestDocument.setFieldValue(targetField, entitiesToStore);
6775
}
6876
}
6977

@@ -81,8 +89,7 @@ public static final class Factory implements Processor.Factory {
8189
}
8290

8391
@Override
84-
public OpenNlpProcessor create(Map<String, Processor.Factory> registry, String processorTag, Map<String, Object> config)
85-
throws Exception {
92+
public OpenNlpProcessor create(Map<String, Processor.Factory> registry, String processorTag, Map<String, Object> config) {
8693
String field = readStringProperty(TYPE, processorTag, config, "field");
8794
String targetField = readStringProperty(TYPE, processorTag, config, "target_field", "entities");
8895
List<String> fields = readOptionalList(TYPE, processorTag, config, "fields");
@@ -104,8 +111,9 @@ private static void mergeExisting(Map<String, Set<String>> entities, IngestDocum
104111
private static void merge(Map<String, Set<String>> map, String key, Set<String> values) {
105112
if (values.size() == 0) return;
106113

107-
if (map.containsKey(key))
114+
if (map.containsKey(key)) {
108115
values.addAll(map.get(key));
116+
}
109117

110118
map.put(key, values);
111119
}

src/test/java/de/spinscale/elasticsearch/ingest/opennlp/OpenNlpProcessorTests.java

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -17,39 +17,46 @@
1717

1818
package de.spinscale.elasticsearch.ingest.opennlp;
1919

20+
import org.elasticsearch.action.ingest.SimulateProcessorResult;
21+
import org.elasticsearch.common.io.PathUtils;
2022
import org.elasticsearch.common.settings.Settings;
23+
import org.elasticsearch.common.xcontent.ToXContent;
24+
import org.elasticsearch.common.xcontent.XContentBuilder;
25+
import org.elasticsearch.common.xcontent.XContentFactory;
2126
import org.elasticsearch.ingest.IngestDocument;
2227
import org.elasticsearch.ingest.Processor;
2328
import org.elasticsearch.ingest.RandomDocumentPicks;
2429
import org.elasticsearch.test.ESTestCase;
25-
import org.junit.Before;
30+
import org.junit.BeforeClass;
2631

27-
import java.io.IOException;
32+
import java.nio.file.Path;
2833
import java.util.Arrays;
2934
import java.util.Collections;
3035
import java.util.HashMap;
3136
import java.util.HashSet;
37+
import java.util.List;
3238
import java.util.Map;
33-
import java.util.Set;
3439

3540
import static org.hamcrest.Matchers.containsInAnyOrder;
3641
import static org.hamcrest.Matchers.hasKey;
42+
import static org.hamcrest.Matchers.hasSize;
3743
import static org.hamcrest.Matchers.instanceOf;
3844
import static org.hamcrest.Matchers.not;
3945

4046
public class OpenNlpProcessorTests extends ESTestCase {
4147

42-
private OpenNlpService service;
48+
private static OpenNlpService service;
4349

44-
@Before
45-
public void createOpenNlpService() throws IOException {
50+
@BeforeClass
51+
public static void createOpenNlpService() throws Exception {
4652
Settings settings = Settings.builder()
4753
.put("ingest.opennlp.model.file.names", "en-ner-persons.bin")
4854
.put("ingest.opennlp.model.file.locations", "en-ner-locations.bin")
4955
.put("ingest.opennlp.model.file.dates", "en-ner-dates.bin")
5056
.build();
5157

52-
service = new OpenNlpService(getDataPath("/models/en-ner-persons.bin").getParent(), settings).start();
58+
Path path = PathUtils.get(OpenNlpProcessorTests.class.getResource("/models/en-ner-persons.bin").toURI());
59+
service = new OpenNlpService(path.getParent(), settings).start();
5360
}
5461

5562
public void testThatExtractionsWork() throws Exception {
@@ -110,7 +117,20 @@ public void testConstructorNoFieldsSpecified() throws Exception {
110117
assertThatHasElements(entityData, "names", "Kobe Bryant", "Michael Jordan");
111118
assertThatHasElements(entityData, "dates", "Yesterday");
112119
assertThatHasElements(entityData, "locations", "Munich", "New York");
120+
}
121+
122+
public void testToXContent() throws Exception {
123+
OpenNlpProcessor processor = new OpenNlpProcessor(service, randomAlphaOfLength(10), "source_field", "target_field",
124+
new HashSet<>(Arrays.asList("names", "dates", "locations")));
125+
126+
IngestDocument ingestDocument = getIngestDocument();
127+
processor.execute(ingestDocument);
128+
129+
SimulateProcessorResult result = new SimulateProcessorResult("tag", ingestDocument);
113130

131+
try (XContentBuilder builder = XContentFactory.jsonBuilder()) {
132+
result.toXContent(builder, ToXContent.EMPTY_PARAMS);
133+
}
114134
}
115135

116136
private Map<String, Object> getIngestDocumentData(OpenNlpProcessor processor) throws Exception {
@@ -138,15 +158,16 @@ private Map<String, Object> getIngestDocumentData(IngestDocument ingestDocument)
138158
}
139159

140160
private void assertThatHasElements(Map<String, Object> entityData, String field, String ... items) {
141-
Set<String> values = getValues(entityData, field);
161+
List<String> values = getValues(entityData, field);
162+
assertThat(values, hasSize(items.length));
142163
assertThat(values, containsInAnyOrder(items));
143164
}
144165

145-
private Set<String> getValues(Map<String, Object> entityData, String field) {
166+
private List<String> getValues(Map<String, Object> entityData, String field) {
146167
assertThat(entityData, hasKey(field));
147-
assertThat(entityData.get(field), instanceOf(Set.class));
168+
assertThat(entityData.get(field), instanceOf(List.class));
148169
@SuppressWarnings("unchecked")
149-
Set<String> values = (Set<String>) entityData.get(field);
170+
List<String> values = (List<String>) entityData.get(field);
150171
return values;
151172
}
152173
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
---
2+
"Test opennlp processor with defaults":
3+
- do:
4+
ingest.put_pipeline:
5+
id: "my_pipeline"
6+
body: >
7+
{
8+
"description": "_description",
9+
"processors": [
10+
{
11+
"opennlp" : {
12+
"field" : "field1"
13+
}
14+
}
15+
]
16+
}
17+
- match: { acknowledged: true }
18+
19+
- do:
20+
ingest.simulate:
21+
id: "my_pipeline"
22+
body: {docs: [ { _source: { field1: "Kobe Bryant was one of the best basketball players of all times. Not even Michael Jordan has ever scored 81 points in one game. Munich is really an awesome city, but New York is as well. Yesterday has been the hottest day of the year." } } ] }
23+
24+
- match: { docs.0.doc._source.field1: "Kobe Bryant was one of the best basketball players of all times. Not even Michael Jordan has ever scored 81 points in one game. Munich is really an awesome city, but New York is as well. Yesterday has been the hottest day of the year." }
25+
- length: { docs.0.doc._source.entities: 3 }
26+
- match: { docs.0.doc._source.entities.dates: [ "Yesterday" ] }
27+
- match: { docs.0.doc._source.entities.names: [ "Kobe Bryant", "Michael Jordan" ] }
28+
- match: { docs.0.doc._source.entities.locations: [ "Munich", "New York" ] }
29+

0 commit comments

Comments
 (0)