From 9fd42d2a4b3d4ee957a469ee4774fa9f3d87d53e Mon Sep 17 00:00:00 2001 From: Mehran Ali Banka Date: Fri, 18 Jul 2025 09:46:35 -0400 Subject: [PATCH 1/3] Changes for text structure finder to detect nested ndjson mappings --- temurin.deb | 0 .../structurefinder/TextStructureUtils.java | 91 ++++++++++++++++--- .../NdJsonTextStructureFinderTests.java | 54 +++++++++++ .../TextStructureTestCase.java | 6 ++ 4 files changed, 137 insertions(+), 14 deletions(-) create mode 100644 temurin.deb diff --git a/temurin.deb b/temurin.deb new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java index 7a35721d59142..a11fe7c8dc6a9 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java @@ -333,20 +333,31 @@ static Tuple, SortedMap> guessMapp .filter(Objects::nonNull) .collect(Collectors.toList()); - Tuple, FieldStats> mappingAndFieldStats = guessMappingAndCalculateFieldStats( - explanation, - fieldName, - fieldValues, - timeoutChecker, - ecsCompatibility, - timestampFormatOverride - ); - if (mappingAndFieldStats != null) { - if (mappingAndFieldStats.v1() != null) { - mappings.put(fieldName, mappingAndFieldStats.v1()); - } - if (mappingAndFieldStats.v2() != null) { - fieldStats.put(fieldName, mappingAndFieldStats.v2()); + // Process nested fields dynamically (recursively if needed) + if (isNestedField(fieldValues)) { + // Recursively process nested fields + List> nestedFieldValues = extractNestedFieldValues(sampleRecords, fieldName); + + Tuple, SortedMap> nestedResult = guessMappingsAndCalculateFieldStats( + explanation, nestedFieldValues, timeoutChecker, ecsCompatibility, timestampFormatOverride + ); + // Create a nested mapping for the parent field and merge the nested field mappings + mappings.put(fieldName, createNestedMapping(nestedResult.v1(), determineNestedFieldType(nestedFieldValues))); // Apply type: nested for the parent field + fieldStats.putAll(nestedResult.v2()); + + } + else { + // For non-nested fields, process them normally + Tuple, FieldStats> mappingAndFieldStats = guessMappingAndCalculateFieldStats( + explanation, fieldName, fieldValues, timeoutChecker, ecsCompatibility, timestampFormatOverride + ); + if (mappingAndFieldStats != null) { + if (mappingAndFieldStats.v1() != null) { + mappings.put(fieldName, mappingAndFieldStats.v1()); + } + if (mappingAndFieldStats.v2() != null) { + fieldStats.put(fieldName, mappingAndFieldStats.v2()); + } } } } @@ -354,6 +365,58 @@ static Tuple, SortedMap> guessMapp return new Tuple<>(mappings, fieldStats); } + /** + * Extracts the nested field values for a given field from a list of sample records. + * + * @param sampleRecords The list of records, where each record is a map containing field names as keys. + * @param fieldName The name of the field whose values are to be extracted from each record. + * @return A list of Maps representing the nested field values for the specified field. + */ + private static List> extractNestedFieldValues(List> sampleRecords, String fieldName) { + @SuppressWarnings("unchecked") + List> extractedFieldValue = sampleRecords.stream() + .map(record -> record.get(fieldName)) + .filter(Objects::nonNull) + .filter(val -> val instanceof Map) + .map(val -> (Map) val) + .collect(Collectors.toList()); + return extractedFieldValue; + } + + /** + * Creates a mapping for a nested field, either as a "nested" or "object" field type. + * + * @param nestedFieldMappings A map containing the field mappings for the nested fields. + * @param nestingType The type of the nested field (either "nested" or "object"). + * @return A map representing the field mapping for the nested field with the specified type. + */ + private static Object createNestedMapping(SortedMap nestedFieldMappings, String nestingType) { + SortedMap nestedMapping = new TreeMap<>(); + nestedMapping.put(MAPPING_TYPE_SETTING, nestingType); + nestedMapping.put(MAPPING_PROPERTIES_SETTING, nestedFieldMappings); + return nestedMapping; + } + + /** + * @param fieldValues value of a field in the sample records + * @return boolean for whether the field is nested (i.e., Map or List of Maps) + */ + static boolean isNestedField(List fieldValues) { + return fieldValues.stream().anyMatch(val -> val instanceof Map || val instanceof List); + } + + + /** + * @param nestedFieldValues value of a nested field in the sample records + * @return "nested" or "object" based on the data structure + * @TODO: If the field values contain a List of Maps and need to be queried independently, set as "nested" + * If not, treat it as a regular "object" + */ + static String determineNestedFieldType(List> nestedFieldValues) { + // just supports object for now + return "object"; + } + /** * Given the sampled records, guess appropriate Elasticsearch mappings. * @param explanation List of reasons for choosing the overall text structure. This list diff --git a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java index 222ca3f2794b5..dc727ab38b256 100644 --- a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java +++ b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java @@ -9,12 +9,20 @@ import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure; import java.util.Collections; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; public class NdJsonTextStructureFinderTests extends TextStructureTestCase { private final TextStructureFinderFactory factory = new NdJsonTextStructureFinderFactory(); public void testCreateConfigsGivenGoodJson() throws Exception { + testCreateConfigsGivenFlatJson(); + testCreateConfigsGivenNestedJson(); + } + + public void testCreateConfigsGivenFlatJson() throws Exception { assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE, 0.0)); String charset = randomFrom(POSSIBLE_CHARSETS); @@ -49,4 +57,50 @@ public void testCreateConfigsGivenGoodJson() throws Exception { assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats()); assertEquals(Collections.singleton("properties"), structure.getMappings().keySet()); } + + + public void testCreateConfigsGivenNestedJson() throws Exception { + assertTrue(factory.canCreateFromSample(explanation, NESTED_NDJSON_SAMPLE, 0.0)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + TextStructureFinder structureFinder = factory.createFromSample( + explanation, + NESTED_NDJSON_SAMPLE, + charset, + hasByteOrderMarker, + TextStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, + TextStructureOverrides.EMPTY_OVERRIDES, + NOOP_TIMEOUT_CHECKER + ); + + TextStructure structure = structureFinder.getStructure(); + assertEquals(TextStructure.Format.NDJSON, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + + assertEquals("timestamp", structure.getTimestampField()); + assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats()); + assertEquals(1, structure.getMappings().size()); + + // Verify that the 'host' field is nested + @SuppressWarnings("unchecked") + Map props = (Map) structure.getMappings().get("properties"); + @SuppressWarnings("unchecked") + Map hostMapping = (Map) props.get("host"); + assertNotNull("Host should be a nested field", hostMapping); + assertEquals("object", hostMapping.get("type")); + + // Verify 'host' properties + @SuppressWarnings("unchecked") + Map hostProperties = (Map) hostMapping.get("properties"); + assertTrue("Host should have 'id' property", hostProperties.containsKey("id")); + assertTrue("Host should have 'category' property", hostProperties.containsKey("category")); + } + } diff --git a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java index b942dcc46cd4a..0d78ef508c703 100644 --- a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java +++ b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java @@ -40,6 +40,12 @@ public abstract class TextStructureTestCase extends ESTestCase { "class":"ml","method":"core::SomeNoiseMaker","file":"Noisemaker.cc","line":333} """; + protected static final String NESTED_NDJSON_SAMPLE = """ + {"host": {"id": 1, "category": "NETWORKING DEVICE"}, "timestamp": "1478261151445"} + {"host": {"id": 2, "category": "COMPUTE NODE"}, "timestamp": "1478261151445"} + """; + + protected static final String PIPE_DELIMITED_SAMPLE = """ 2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |listening on 0.0.0.0:9987, :::9987 2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client 'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8) From 975dff9622a4d065bd0a669cf719eff72ed683d5 Mon Sep 17 00:00:00 2001 From: Mehran Ali Banka Date: Fri, 18 Jul 2025 09:54:41 -0400 Subject: [PATCH 2/3] deleted cached file --- temurin.deb | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 temurin.deb diff --git a/temurin.deb b/temurin.deb deleted file mode 100644 index e69de29bb2d1d..0000000000000 From 53dd63345aa30cd3d0900fb49bd8108ed0659aeb Mon Sep 17 00:00:00 2001 From: elasticsearchmachine Date: Fri, 18 Jul 2025 14:24:56 +0000 Subject: [PATCH 3/3] [CI] Auto commit changes from spotless --- .../structurefinder/TextStructureUtils.java | 23 ++++++++++++++----- .../NdJsonTextStructureFinderTests.java | 3 --- .../TextStructureTestCase.java | 7 +++--- 3 files changed, 20 insertions(+), 13 deletions(-) diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java index a11fe7c8dc6a9..6708973103dc2 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java @@ -339,17 +339,29 @@ static Tuple, SortedMap> guessMapp List> nestedFieldValues = extractNestedFieldValues(sampleRecords, fieldName); Tuple, SortedMap> nestedResult = guessMappingsAndCalculateFieldStats( - explanation, nestedFieldValues, timeoutChecker, ecsCompatibility, timestampFormatOverride + explanation, + nestedFieldValues, + timeoutChecker, + ecsCompatibility, + timestampFormatOverride ); // Create a nested mapping for the parent field and merge the nested field mappings - mappings.put(fieldName, createNestedMapping(nestedResult.v1(), determineNestedFieldType(nestedFieldValues))); // Apply type: nested for the parent field + mappings.put(fieldName, createNestedMapping(nestedResult.v1(), determineNestedFieldType(nestedFieldValues))); // Apply + // type: + // nested for + // the parent + // field fieldStats.putAll(nestedResult.v2()); - } - else { + } else { // For non-nested fields, process them normally Tuple, FieldStats> mappingAndFieldStats = guessMappingAndCalculateFieldStats( - explanation, fieldName, fieldValues, timeoutChecker, ecsCompatibility, timestampFormatOverride + explanation, + fieldName, + fieldValues, + timeoutChecker, + ecsCompatibility, + timestampFormatOverride ); if (mappingAndFieldStats != null) { if (mappingAndFieldStats.v1() != null) { @@ -405,7 +417,6 @@ static boolean isNestedField(List fieldValues) { return fieldValues.stream().anyMatch(val -> val instanceof Map || val instanceof List); } - /** * @param nestedFieldValues value of a nested field in the sample records * @return "nested" or "object" based on the data structure diff --git a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java index dc727ab38b256..f32f052940abe 100644 --- a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java +++ b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java @@ -10,8 +10,6 @@ import java.util.Collections; import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; public class NdJsonTextStructureFinderTests extends TextStructureTestCase { @@ -58,7 +56,6 @@ public void testCreateConfigsGivenFlatJson() throws Exception { assertEquals(Collections.singleton("properties"), structure.getMappings().keySet()); } - public void testCreateConfigsGivenNestedJson() throws Exception { assertTrue(factory.canCreateFromSample(explanation, NESTED_NDJSON_SAMPLE, 0.0)); diff --git a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java index 0d78ef508c703..b373f8f7962a9 100644 --- a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java +++ b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java @@ -41,10 +41,9 @@ public abstract class TextStructureTestCase extends ESTestCase { """; protected static final String NESTED_NDJSON_SAMPLE = """ - {"host": {"id": 1, "category": "NETWORKING DEVICE"}, "timestamp": "1478261151445"} - {"host": {"id": 2, "category": "COMPUTE NODE"}, "timestamp": "1478261151445"} - """; - + {"host": {"id": 1, "category": "NETWORKING DEVICE"}, "timestamp": "1478261151445"} + {"host": {"id": 2, "category": "COMPUTE NODE"}, "timestamp": "1478261151445"} + """; protected static final String PIPE_DELIMITED_SAMPLE = """ 2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |listening on 0.0.0.0:9987, :::9987