Skip to content

Commit 0c1b3ac

Browse files
authored
Properly handle multi fields in block loaders with synthetic source enabled (#127483)
1 parent ac6c7a9 commit 0c1b3ac

File tree

23 files changed

+535
-302
lines changed

23 files changed

+535
-302
lines changed

modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
378378
if (hasDocValues() && (blContext.fieldExtractPreference() != FieldExtractPreference.STORED || isSyntheticSource)) {
379379
return new BlockDocValuesReader.DoublesBlockLoader(name(), l -> l / scalingFactor);
380380
}
381-
if (isSyntheticSource) {
381+
// Multi fields don't have fallback synthetic source.
382+
if (isSyntheticSource && blContext.parentField(name()) == null) {
382383
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
383384
@Override
384385
public Builder builder(BlockFactory factory, int expectedCount) {

server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
350350
return new BlockDocValuesReader.BooleansBlockLoader(name());
351351
}
352352

353-
if (isSyntheticSource) {
353+
// Multi fields don't have fallback synthetic source.
354+
if (isSyntheticSource && blContext.parentField(name()) == null) {
354355
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
355356
@Override
356357
public Builder builder(BlockFactory factory, int expectedCount) {

server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
948948
return new BlockDocValuesReader.LongsBlockLoader(name());
949949
}
950950

951-
if (isSyntheticSource) {
951+
// Multi fields don't have fallback synthetic source.
952+
if (isSyntheticSource && blContext.parentField(name()) == null) {
952953
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
953954
@Override
954955
public Builder builder(BlockFactory factory, int expectedCount) {

server/src/main/java/org/elasticsearch/index/mapper/GeoPointFieldMapper.java

+9-3
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ protected void index(DocumentParserContext context, GeoPoint geometry) throws IO
316316

317317
/**
318318
* Parser that pretends to be the main document parser, but exposes the provided geohash regardless of how the geopoint was provided
319-
* in the incoming document. We rely on the fact that consumers are only ever call {@link XContentParser#textOrNull()} and never
319+
* in the incoming document. We rely on the fact that consumers only ever read text from the parser and never
320320
* advance tokens, which is explicitly disallowed by this parser.
321321
*/
322322
static class GeoHashMultiFieldParser extends FilterXContentParserWrapper {
@@ -332,6 +332,11 @@ public String textOrNull() throws IOException {
332332
return value;
333333
}
334334

335+
@Override
336+
public String text() throws IOException {
337+
return value;
338+
}
339+
335340
@Override
336341
public Token currentToken() {
337342
return Token.VALUE_STRING;
@@ -545,8 +550,9 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
545550
// So we have two subcases:
546551
// - doc_values are enabled - _ignored_source field does not exist since we have doc_values. We will use
547552
// blockLoaderFromSource which reads "native" synthetic source.
548-
// - doc_values are disabled - we know that _ignored_source field is present and use a special block loader.
549-
if (isSyntheticSource && hasDocValues() == false) {
553+
// - doc_values are disabled - we know that _ignored_source field is present and use a special block loader unless it's a multi
554+
// field.
555+
if (isSyntheticSource && hasDocValues() == false && blContext.parentField(name()) == null) {
550556
return blockLoaderFromFallbackSyntheticSource(blContext);
551557
}
552558

server/src/main/java/org/elasticsearch/index/mapper/IpFieldMapper.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
467467
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
468468
}
469469

470-
if (isSyntheticSource) {
470+
// Multi fields don't have fallback synthetic source.
471+
if (isSyntheticSource && blContext.parentField(name()) == null) {
471472
return blockLoaderFromFallbackSyntheticSource(blContext);
472473
}
473474

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
755755
return new BlockStoredFieldsReader.BytesFromBytesRefsBlockLoader(name());
756756
}
757757

758-
if (isSyntheticSource) {
758+
// Multi fields don't have fallback synthetic source.
759+
if (isSyntheticSource && blContext.parentField(name()) == null) {
759760
return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) {
760761
@Override
761762
public Builder builder(BlockFactory factory, int expectedCount) {

server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java

+2-1
Original file line numberDiff line numberDiff line change
@@ -1973,7 +1973,8 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) {
19731973
return type.blockLoaderFromDocValues(name());
19741974
}
19751975

1976-
if (isSyntheticSource) {
1976+
// Multi fields don't have fallback synthetic source.
1977+
if (isSyntheticSource && blContext.parentField(name()) == null) {
19771978
return type.blockLoaderFromFallbackSyntheticSource(name(), nullValue, coerce);
19781979
}
19791980

server/src/test/java/org/elasticsearch/index/mapper/blockloader/GeoPointFieldBlockLoaderTests.java

+40-18
Original file line numberDiff line numberDiff line change
@@ -35,28 +35,29 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
3535
var values = extractedFieldValues.values();
3636

3737
var nullValue = switch (fieldMapping.get("null_value")) {
38-
case String s -> convert(s, null);
38+
case String s -> convert(s, null, false);
3939
case null -> null;
4040
default -> throw new IllegalStateException("Unexpected null_value format");
4141
};
4242

4343
if (params.preference() == MappedFieldType.FieldExtractPreference.DOC_VALUES && hasDocValues(fieldMapping, true)) {
4444
if (values instanceof List<?> == false) {
45-
var point = convert(values, nullValue);
45+
var point = convert(values, nullValue, testContext.isMultifield());
4646
return point != null ? point.getEncoded() : null;
4747
}
4848

4949
var resultList = ((List<Object>) values).stream()
50-
.map(v -> convert(v, nullValue))
50+
.map(v -> convert(v, nullValue, testContext.isMultifield()))
5151
.filter(Objects::nonNull)
5252
.map(GeoPoint::getEncoded)
5353
.sorted()
5454
.toList();
5555
return maybeFoldList(resultList);
5656
}
5757

58+
// stored source is used
5859
if (params.syntheticSource() == false) {
59-
return exactValuesFromSource(values, nullValue);
60+
return exactValuesFromSource(values, nullValue, false);
6061
}
6162

6263
// Usually implementation of block loader from source adjusts values read from source
@@ -67,25 +68,25 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
6768
// That is unless "synthetic_source_keep" forces fallback synthetic source again.
6869

6970
if (testContext.forceFallbackSyntheticSource()) {
70-
return exactValuesFromSource(values, nullValue);
71+
return exactValuesFromSource(values, nullValue, false);
7172
}
7273

7374
String syntheticSourceKeep = (String) fieldMapping.getOrDefault("synthetic_source_keep", "none");
7475
if (syntheticSourceKeep.equals("all")) {
75-
return exactValuesFromSource(values, nullValue);
76+
return exactValuesFromSource(values, nullValue, false);
7677
}
7778
if (syntheticSourceKeep.equals("arrays") && extractedFieldValues.documentHasObjectArrays()) {
78-
return exactValuesFromSource(values, nullValue);
79+
return exactValuesFromSource(values, nullValue, false);
7980
}
8081

8182
// synthetic source and doc_values are present
8283
if (hasDocValues(fieldMapping, true)) {
8384
if (values instanceof List<?> == false) {
84-
return toWKB(normalize(convert(values, nullValue)));
85+
return toWKB(normalize(convert(values, nullValue, false)));
8586
}
8687

8788
var resultList = ((List<Object>) values).stream()
88-
.map(v -> convert(v, nullValue))
89+
.map(v -> convert(v, nullValue, false))
8990
.filter(Objects::nonNull)
9091
.sorted(Comparator.comparingLong(GeoPoint::getEncoded))
9192
.map(p -> toWKB(normalize(p)))
@@ -94,16 +95,20 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
9495
}
9596

9697
// synthetic source but no doc_values so using fallback synthetic source
97-
return exactValuesFromSource(values, nullValue);
98+
return exactValuesFromSource(values, nullValue, false);
9899
}
99100

100101
@SuppressWarnings("unchecked")
101-
private Object exactValuesFromSource(Object value, GeoPoint nullValue) {
102+
private Object exactValuesFromSource(Object value, GeoPoint nullValue, boolean needsMultifieldAdjustment) {
102103
if (value instanceof List<?> == false) {
103-
return toWKB(convert(value, nullValue));
104+
return toWKB(convert(value, nullValue, needsMultifieldAdjustment));
104105
}
105106

106-
var resultList = ((List<Object>) value).stream().map(v -> convert(v, nullValue)).filter(Objects::nonNull).map(this::toWKB).toList();
107+
var resultList = ((List<Object>) value).stream()
108+
.map(v -> convert(v, nullValue, needsMultifieldAdjustment))
109+
.filter(Objects::nonNull)
110+
.map(this::toWKB)
111+
.toList();
107112
return maybeFoldList(resultList);
108113
}
109114

@@ -163,14 +168,17 @@ private void processLeafLevel(Object value, ArrayList<Object> extracted) {
163168
}
164169

165170
@SuppressWarnings("unchecked")
166-
private GeoPoint convert(Object value, GeoPoint nullValue) {
171+
private GeoPoint convert(Object value, GeoPoint nullValue, boolean needsMultifieldAdjustment) {
167172
if (value == null) {
168-
return nullValue;
173+
if (nullValue == null) {
174+
return null;
175+
}
176+
return possiblyAdjustMultifieldValue(nullValue, needsMultifieldAdjustment);
169177
}
170178

171179
if (value instanceof String s) {
172180
try {
173-
return new GeoPoint(s);
181+
return possiblyAdjustMultifieldValue(new GeoPoint(s), needsMultifieldAdjustment);
174182
} catch (Exception e) {
175183
return null;
176184
}
@@ -180,16 +188,30 @@ private GeoPoint convert(Object value, GeoPoint nullValue) {
180188
if (m.get("type") != null) {
181189
var coordinates = (List<Double>) m.get("coordinates");
182190
// Order is GeoJSON is lon,lat
183-
return new GeoPoint(coordinates.get(1), coordinates.get(0));
191+
return possiblyAdjustMultifieldValue(new GeoPoint(coordinates.get(1), coordinates.get(0)), needsMultifieldAdjustment);
184192
} else {
185-
return new GeoPoint((Double) m.get("lat"), (Double) m.get("lon"));
193+
return possiblyAdjustMultifieldValue(new GeoPoint((Double) m.get("lat"), (Double) m.get("lon")), needsMultifieldAdjustment);
186194
}
187195
}
188196

189197
// Malformed values are excluded
190198
return null;
191199
}
192200

201+
private GeoPoint possiblyAdjustMultifieldValue(GeoPoint point, boolean isMultifield) {
202+
// geo_point multifields are parsed from a geohash representation of the original point (GeoPointFieldMapper#index)
203+
// and it's not exact.
204+
// So if this is a multifield we need another adjustment here.
205+
// Note that this does not apply to block loader from source because in this case we parse raw original values.
206+
// Same thing happens with synthetic source since it is generated from the parent field data that didn't go through multi field
207+
// parsing logic.
208+
if (isMultifield) {
209+
return point.resetFromString(point.geohash());
210+
}
211+
212+
return point;
213+
}
214+
193215
private GeoPoint normalize(GeoPoint point) {
194216
if (point == null) {
195217
return null;

server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java

+79-27
Original file line numberDiff line numberDiff line change
@@ -9,51 +9,108 @@
99

1010
package org.elasticsearch.index.mapper.blockloader;
1111

12+
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
13+
14+
import org.elasticsearch.datageneration.DocumentGenerator;
1215
import org.elasticsearch.datageneration.FieldType;
16+
import org.elasticsearch.datageneration.MappingGenerator;
17+
import org.elasticsearch.datageneration.Template;
1318
import org.elasticsearch.datageneration.datasource.DataSourceHandler;
1419
import org.elasticsearch.datageneration.datasource.DataSourceRequest;
1520
import org.elasticsearch.datageneration.datasource.DataSourceResponse;
16-
import org.elasticsearch.datageneration.datasource.DefaultMappingParametersHandler;
1721
import org.elasticsearch.index.mapper.BlockLoaderTestCase;
22+
import org.elasticsearch.index.mapper.BlockLoaderTestRunner;
23+
import org.elasticsearch.index.mapper.MapperServiceTestCase;
24+
import org.elasticsearch.xcontent.XContentBuilder;
25+
import org.elasticsearch.xcontent.XContentType;
1826

19-
import java.util.HashMap;
27+
import java.io.IOException;
2028
import java.util.List;
2129
import java.util.Map;
2230

23-
public class TextFieldWithParentBlockLoaderTests extends BlockLoaderTestCase {
24-
public TextFieldWithParentBlockLoaderTests(Params params) {
25-
// keyword because we need a keyword parent field
26-
super(FieldType.KEYWORD.toString(), List.of(new DataSourceHandler() {
31+
import static org.elasticsearch.index.mapper.BlockLoaderTestCase.buildSpecification;
32+
import static org.elasticsearch.index.mapper.BlockLoaderTestCase.hasDocValues;
33+
34+
public class TextFieldWithParentBlockLoaderTests extends MapperServiceTestCase {
35+
private final BlockLoaderTestCase.Params params;
36+
private final BlockLoaderTestRunner runner;
37+
38+
@ParametersFactory(argumentFormatting = "preference=%s")
39+
public static List<Object[]> args() {
40+
return BlockLoaderTestCase.args();
41+
}
42+
43+
public TextFieldWithParentBlockLoaderTests(BlockLoaderTestCase.Params params) {
44+
this.params = params;
45+
this.runner = new BlockLoaderTestRunner(params);
46+
}
47+
48+
// This is similar to BlockLoaderTestCase#testBlockLoaderOfMultiField but has customizations required to properly test the case
49+
// of text multi field in a keyword field.
50+
public void testBlockLoaderOfParentField() throws IOException {
51+
var template = new Template(Map.of("parent", new Template.Leaf("parent", FieldType.KEYWORD.toString())));
52+
var specification = buildSpecification(List.of(new DataSourceHandler() {
2753
@Override
2854
public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceRequest.LeafMappingParametersGenerator request) {
29-
assert request.fieldType().equals(FieldType.KEYWORD.toString());
55+
// This is a bit tricky meta-logic.
56+
// We want to customize mapping but to do this we need the mapping for the same field type
57+
// so we use name to untangle this.
58+
if (request.fieldName().equals("parent") == false) {
59+
return null;
60+
}
3061

31-
// We need to force multi field generation
3262
return new DataSourceResponse.LeafMappingParametersGenerator(() -> {
33-
var defaultSupplier = DefaultMappingParametersHandler.keywordMapping(
34-
request,
35-
DefaultMappingParametersHandler.commonMappingParameters()
36-
);
37-
var mapping = defaultSupplier.get();
63+
var dataSource = request.dataSource();
64+
65+
var keywordParentMapping = dataSource.get(
66+
new DataSourceRequest.LeafMappingParametersGenerator(
67+
dataSource,
68+
"_field",
69+
FieldType.KEYWORD.toString(),
70+
request.eligibleCopyToFields(),
71+
request.dynamicMapping()
72+
)
73+
).mappingGenerator().get();
74+
75+
var textMultiFieldMapping = dataSource.get(
76+
new DataSourceRequest.LeafMappingParametersGenerator(
77+
dataSource,
78+
"_field",
79+
FieldType.TEXT.toString(),
80+
request.eligibleCopyToFields(),
81+
request.dynamicMapping()
82+
)
83+
).mappingGenerator().get();
84+
3885
// we don't need this here
39-
mapping.remove("copy_to");
86+
keywordParentMapping.remove("copy_to");
4087

41-
var textMultiFieldMappingSupplier = DefaultMappingParametersHandler.textMapping(request, new HashMap<>());
42-
var textMultiFieldMapping = textMultiFieldMappingSupplier.get();
4388
textMultiFieldMapping.put("type", "text");
4489
textMultiFieldMapping.remove("fields");
4590

46-
mapping.put("fields", Map.of("txt", textMultiFieldMapping));
91+
keywordParentMapping.put("fields", Map.of("mf", textMultiFieldMapping));
4792

48-
return mapping;
93+
return keywordParentMapping;
4994
});
5095
}
51-
}), params);
96+
}));
97+
var mapping = new MappingGenerator(specification).generate(template);
98+
var fieldMapping = mapping.lookup().get("parent");
99+
100+
var document = new DocumentGenerator(specification).generate(template, mapping);
101+
var fieldValue = document.get("parent");
102+
103+
Object expected = expected(fieldMapping, fieldValue, new BlockLoaderTestCase.TestContext(false, true));
104+
var mappingXContent = XContentBuilder.builder(XContentType.JSON.xContent()).map(mapping.raw());
105+
var mapperService = params.syntheticSource()
106+
? createSytheticSourceMapperService(mappingXContent)
107+
: createMapperService(mappingXContent);
108+
109+
runner.runTest(mapperService, document, expected, "parent.mf");
52110
}
53111

54-
@Override
55112
@SuppressWarnings("unchecked")
56-
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
113+
private Object expected(Map<String, Object> fieldMapping, Object value, BlockLoaderTestCase.TestContext testContext) {
57114
assert fieldMapping.containsKey("fields");
58115

59116
Object normalizer = fieldMapping.get("normalizer");
@@ -66,12 +123,7 @@ protected Object expected(Map<String, Object> fieldMapping, Object value, TestCo
66123
}
67124

68125
// we are using block loader of the text field itself
69-
var textFieldMapping = (Map<String, Object>) ((Map<String, Object>) fieldMapping.get("fields")).get("txt");
126+
var textFieldMapping = (Map<String, Object>) ((Map<String, Object>) fieldMapping.get("fields")).get("mf");
70127
return TextFieldBlockLoaderTests.expectedValue(textFieldMapping, value, params, testContext);
71128
}
72-
73-
@Override
74-
protected String blockLoaderFieldName(String originalName) {
75-
return originalName + ".txt";
76-
}
77129
}

test/framework/src/main/java/org/elasticsearch/datageneration/MappingGenerator.java

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ private void generateMapping(
104104
var mappingParametersGenerator = specification.dataSource()
105105
.get(
106106
new DataSourceRequest.LeafMappingParametersGenerator(
107+
specification.dataSource(),
107108
fieldName,
108109
leaf.type(),
109110
context.eligibleCopyToDestinations(),

test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DataSourceRequest.java

+1
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ public DataSourceResponse.ObjectArrayGenerator accept(DataSourceHandler handler)
199199
}
200200

201201
record LeafMappingParametersGenerator(
202+
DataSource dataSource,
202203
String fieldName,
203204
String fieldType,
204205
Set<String> eligibleCopyToFields,

0 commit comments

Comments
 (0)