Skip to content

Commit 205f795

Browse files
committed
Abstracted how Text fields use Keyword fields inside of Text fields
1 parent c9b45d8 commit 205f795

File tree

38 files changed

+758
-545
lines changed

38 files changed

+758
-545
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java

Lines changed: 177 additions & 128 deletions
Large diffs are not rendered by default.

plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java

Lines changed: 69 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,21 @@
2020
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
2121
import org.apache.lucene.document.Field;
2222
import org.apache.lucene.document.FieldType;
23+
import org.apache.lucene.document.StoredField;
2324
import org.apache.lucene.index.IndexOptions;
2425
import org.elasticsearch.ElasticsearchParseException;
2526
import org.elasticsearch.index.IndexVersion;
2627
import org.elasticsearch.index.analysis.AnalyzerScope;
2728
import org.elasticsearch.index.analysis.IndexAnalyzers;
2829
import org.elasticsearch.index.analysis.NamedAnalyzer;
30+
import org.elasticsearch.index.mapper.CompositeSyntheticFieldLoader;
2931
import org.elasticsearch.index.mapper.DocumentParserContext;
3032
import org.elasticsearch.index.mapper.FieldMapper;
3133
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3234
import org.elasticsearch.index.mapper.MapperBuilderContext;
33-
import org.elasticsearch.index.mapper.SourceFieldMapper;
35+
import org.elasticsearch.index.mapper.SourceLoader;
3436
import org.elasticsearch.index.mapper.StringStoredFieldFieldLoader;
37+
import org.elasticsearch.index.mapper.TextFamilyFieldMapper;
3538
import org.elasticsearch.index.mapper.TextFieldMapper;
3639
import org.elasticsearch.index.mapper.TextParams;
3740
import org.elasticsearch.index.mapper.TextSearchInfo;
@@ -61,7 +64,7 @@
6164
* This code is largely a copy of TextFieldMapper which is less than ideal -
6265
* my attempts to subclass TextFieldMapper failed but we can revisit this.
6366
**/
64-
public class AnnotatedTextFieldMapper extends FieldMapper {
67+
public class AnnotatedTextFieldMapper extends TextFamilyFieldMapper {
6568

6669
public static final String CONTENT_TYPE = "annotated_text";
6770

@@ -84,28 +87,26 @@ public static class Builder extends FieldMapper.Builder {
8487
final Parameter<String> indexOptions = TextParams.textIndexOptions(m -> builder(m).indexOptions.getValue());
8588
final Parameter<Boolean> norms = TextParams.norms(true, m -> builder(m).norms.getValue());
8689
final Parameter<String> termVectors = TextParams.termVectors(m -> builder(m).termVectors.getValue());
90+
private final Parameter<Boolean> store = Parameter.storeParam(m -> builder(m).store.getValue(), false);
8791

8892
private final Parameter<Map<String, String>> meta = Parameter.metaParam();
8993

9094
private final IndexVersion indexCreatedVersion;
9195
private final TextParams.Analyzers analyzers;
92-
private final boolean isSyntheticSourceEnabled;
93-
private final Parameter<Boolean> store;
96+
private final boolean isWithinMultiField;
9497

95-
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isSyntheticSourceEnabled) {
98+
private boolean isSyntheticSourceEnabled;
99+
100+
public Builder(String name, IndexVersion indexCreatedVersion, IndexAnalyzers indexAnalyzers, boolean isWithinMultiField) {
96101
super(name);
97102
this.indexCreatedVersion = indexCreatedVersion;
103+
this.isWithinMultiField = isWithinMultiField;
98104
this.analyzers = new TextParams.Analyzers(
99105
indexAnalyzers,
100106
m -> builder(m).analyzers.getIndexAnalyzer(),
101107
m -> builder(m).analyzers.positionIncrementGap.getValue(),
102108
indexCreatedVersion
103109
);
104-
this.isSyntheticSourceEnabled = isSyntheticSourceEnabled;
105-
this.store = Parameter.storeParam(
106-
m -> builder(m).store.getValue(),
107-
() -> isSyntheticSourceEnabled && multiFieldsBuilder.hasSyntheticSourceCompatibleKeywordField() == false
108-
);
109110
}
110111

111112
@Override
@@ -135,6 +136,7 @@ private AnnotatedTextFieldType buildFieldType(FieldType fieldType, MapperBuilder
135136
store.getValue(),
136137
tsi,
137138
context.isSourceSynthetic(),
139+
isWithinMultiField,
138140
TextFieldMapper.SyntheticSourceHelper.syntheticSourceDelegate(fieldType, multiFields),
139141
meta.getValue()
140142
);
@@ -154,6 +156,7 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
154156
}
155157
}
156158
BuilderParams builderParams = builderParams(this, context);
159+
this.isSyntheticSourceEnabled = context.isSourceSynthetic();
157160
return new AnnotatedTextFieldMapper(
158161
leafName(),
159162
fieldType,
@@ -165,7 +168,7 @@ public AnnotatedTextFieldMapper build(MapperBuilderContext context) {
165168
}
166169

167170
public static final TypeParser PARSER = new TypeParser(
168-
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), SourceFieldMapper.isSynthetic(c.getIndexSettings()))
171+
(n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers(), c.isWithinMultiField())
169172
);
170173

171174
/**
@@ -484,15 +487,17 @@ private void emitAnnotation(int firstSpannedTextPosInc, int annotationPosLen) th
484487
}
485488

486489
public static final class AnnotatedTextFieldType extends TextFieldMapper.TextFieldType {
490+
487491
private AnnotatedTextFieldType(
488492
String name,
489493
boolean store,
490494
TextSearchInfo tsi,
491495
boolean isSyntheticSource,
496+
boolean isWithinMultiField,
492497
KeywordFieldMapper.KeywordFieldType syntheticSourceDelegate,
493498
Map<String, String> meta
494499
) {
495-
super(name, true, store, tsi, isSyntheticSource, syntheticSourceDelegate, meta, false, false);
500+
super(name, true, store, tsi, isSyntheticSource, isWithinMultiField, syntheticSourceDelegate, meta, false, false);
496501
}
497502

498503
public AnnotatedTextFieldType(String name, Map<String, String> meta) {
@@ -505,9 +510,9 @@ public String typeName() {
505510
}
506511
}
507512

513+
private final IndexVersion indexCreatedVersion;
508514
private final FieldType fieldType;
509515
private final Builder builder;
510-
511516
private final NamedAnalyzer indexAnalyzer;
512517

513518
protected AnnotatedTextFieldMapper(
@@ -517,11 +522,26 @@ protected AnnotatedTextFieldMapper(
517522
BuilderParams builderParams,
518523
Builder builder
519524
) {
520-
super(simpleName, mappedFieldType, builderParams);
525+
super(
526+
simpleName,
527+
builder.indexCreatedVersion,
528+
builder.isSyntheticSourceEnabled,
529+
builder.isWithinMultiField,
530+
mappedFieldType,
531+
builderParams
532+
);
533+
521534
assert fieldType.tokenized();
535+
522536
this.fieldType = freezeAndDeduplicateFieldType(fieldType);
523537
this.builder = builder;
524538
this.indexAnalyzer = wrapAnalyzer(builder.analyzers.getIndexAnalyzer());
539+
this.indexCreatedVersion = builder.indexCreatedVersion;
540+
}
541+
542+
@Override
543+
public AnnotatedTextFieldType fieldType() {
544+
return (AnnotatedTextFieldType) super.fieldType();
525545
}
526546

527547
@Override
@@ -543,6 +563,18 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
543563
if (fieldType.omitNorms()) {
544564
context.addToFieldNames(fieldType().name());
545565
}
566+
} else if (needsToSupportSyntheticSource() && fieldType.stored() == false) {
567+
// if synthetic source needs to be supported, yet the field isn't stored, then we need to rely on something
568+
// else to support synthetic source
569+
570+
// if we can rely on the synthetic source delegate for synthetic source, then return
571+
if (fieldType().canUseSyntheticSourceDelegateForSyntheticSource(value)) {
572+
return;
573+
}
574+
575+
// otherwise, we need to store a copy of this value so that synthetic source can load it
576+
final String fieldName = fieldType().syntheticSourceFallbackFieldName();
577+
context.doc().add(new StoredField(fieldName, value));
546578
}
547579
}
548580

@@ -553,8 +585,7 @@ protected String contentType() {
553585

554586
@Override
555587
public FieldMapper.Builder getMergeBuilder() {
556-
return new Builder(leafName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers, builder.isSyntheticSourceEnabled)
557-
.init(this);
588+
return new Builder(leafName(), builder.indexCreatedVersion, builder.analyzers.indexAnalyzers, isWithinMultiField).init(this);
558589
}
559590

560591
@Override
@@ -568,11 +599,31 @@ protected void write(XContentBuilder b, Object value) throws IOException {
568599
});
569600
}
570601

602+
return new SyntheticSourceSupport.Native(() -> syntheticFieldLoader(fullPath(), leafName()));
603+
}
604+
605+
private SourceLoader.SyntheticFieldLoader syntheticFieldLoader(String fullFieldName, String leafFieldName) {
606+
// since we don't know whether the delegate field loader can be used for synthetic source until parsing, we
607+
// need to check both this field and the delegate
608+
609+
// first field loader, representing this field
610+
final String fieldName = fieldType().syntheticSourceFallbackFieldName();
611+
final var thisFieldLayer = new CompositeSyntheticFieldLoader.StoredFieldLayer(fieldName) {
612+
@Override
613+
protected void writeValue(Object value, XContentBuilder b) throws IOException {
614+
b.value(value.toString());
615+
}
616+
};
617+
618+
final CompositeSyntheticFieldLoader fieldLoader = new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, thisFieldLayer);
619+
620+
// second loader, representing a delegate field, if one exists
571621
var kwd = TextFieldMapper.SyntheticSourceHelper.getKeywordFieldMapperForSyntheticSource(this);
572622
if (kwd != null) {
573-
return new SyntheticSourceSupport.Native(() -> kwd.syntheticFieldLoader(fullPath(), leafName()));
623+
// merge the two field loaders into one
624+
return fieldLoader.mergedWith(kwd.syntheticFieldLoader(fullPath(), leafName()));
574625
}
575626

576-
return super.syntheticSourceSupport();
627+
return fieldLoader;
577628
}
578629
}

server/src/main/java/org/elasticsearch/index/mapper/CompositeSyntheticFieldLoader.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,18 @@ public String fieldName() {
132132
return this.fullFieldName;
133133
}
134134

135+
/**
136+
* Returns a new {@link CompositeSyntheticFieldLoader} that merges this field loader with the given one.
137+
*/
138+
public CompositeSyntheticFieldLoader mergedWith(CompositeSyntheticFieldLoader other) {
139+
if (other == null) {
140+
return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, List.copyOf(parts));
141+
}
142+
List<Layer> mergedParts = new ArrayList<>(parts);
143+
mergedParts.addAll(other.parts);
144+
return new CompositeSyntheticFieldLoader(leafFieldName, fullFieldName, mergedParts);
145+
}
146+
135147
/**
136148
* Represents one layer of loading synthetic source values for a field
137149
* as a part of {@link CompositeSyntheticFieldLoader}.

server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,20 @@ public final void addIgnoredField(IgnoredSourceFieldMapper.NameValue values) {
335335
}
336336
}
337337

338+
/**
339+
* This function acts as a more "readable" wrapper around adding ignored fields.
340+
*
341+
* This is useful when we want to reuse the existing logic that {@link IgnoredSourceFieldMapper} provides for synthetic source, without
342+
* explicitly calling addIgnoredField(). Without this, it's a bit confusing why fields that are not meant to be ignored, are being
343+
* added to ignored source.
344+
*/
345+
public final void storeFieldForSyntheticSource(String fullPath, String leafName, BytesRef valueBytes, LuceneDocument doc) {
346+
if (canAddIgnoredField()) {
347+
var fieldData = new IgnoredSourceFieldMapper.NameValue(fullPath, fullPath.lastIndexOf(leafName), valueBytes, doc);
348+
ignoredFieldValues.add(fieldData);
349+
}
350+
}
351+
338352
final void removeLastIgnoredField(String name) {
339353
if (ignoredFieldValues.isEmpty() == false && ignoredFieldValues.getLast().name().equals(name)) {
340354
ignoredFieldValues.removeLast();

server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -335,10 +335,9 @@ public boolean newDynamicStringField(DocumentParserContext context, String name)
335335
);
336336
} else {
337337
return createDynamicField(
338-
new TextFieldMapper.Builder(name, context.indexAnalyzers(), SourceFieldMapper.isSynthetic(context.indexSettings()))
339-
.addMultiField(
340-
new KeywordFieldMapper.Builder("keyword", context.indexSettings().getIndexVersionCreated()).ignoreAbove(256)
341-
),
338+
new TextFieldMapper.Builder(name, context.indexAnalyzers()).addMultiField(
339+
new KeywordFieldMapper.Builder("keyword", context.indexSettings().getIndexVersionCreated(), true).ignoreAbove(256)
340+
),
342341
context
343342
);
344343
}

server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -610,28 +610,13 @@ public static class Builder {
610610

611611
private final Map<String, Function<MapperBuilderContext, FieldMapper>> mapperBuilders = new HashMap<>();
612612

613-
private boolean hasSyntheticSourceCompatibleKeywordField;
614-
615613
public Builder add(FieldMapper.Builder builder) {
616614
mapperBuilders.put(builder.leafName(), builder::build);
617-
618-
if (builder instanceof KeywordFieldMapper.Builder kwd) {
619-
if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) {
620-
hasSyntheticSourceCompatibleKeywordField = true;
621-
}
622-
}
623-
624615
return this;
625616
}
626617

627618
private void add(FieldMapper mapper) {
628619
mapperBuilders.put(mapper.leafName(), context -> mapper);
629-
630-
if (mapper instanceof KeywordFieldMapper kwd) {
631-
if (kwd.hasNormalizer() == false && (kwd.fieldType().hasDocValues() || kwd.fieldType().isStored())) {
632-
hasSyntheticSourceCompatibleKeywordField = true;
633-
}
634-
}
635620
}
636621

637622
private void update(FieldMapper toMerge, MapperMergeContext context) {
@@ -649,10 +634,6 @@ public boolean hasMultiFields() {
649634
return mapperBuilders.isEmpty() == false;
650635
}
651636

652-
public boolean hasSyntheticSourceCompatibleKeywordField() {
653-
return hasSyntheticSourceCompatibleKeywordField;
654-
}
655-
656637
public MultiFields build(Mapper.Builder mainFieldBuilder, MapperBuilderContext context) {
657638
if (mapperBuilders.isEmpty()) {
658639
return empty();

0 commit comments

Comments
 (0)