diff --git a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java index 7a35721d59142..6708973103dc2 100644 --- a/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java +++ b/x-pack/plugin/text-structure/src/main/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureUtils.java @@ -333,20 +333,43 @@ static Tuple, SortedMap> guessMapp .filter(Objects::nonNull) .collect(Collectors.toList()); - Tuple, FieldStats> mappingAndFieldStats = guessMappingAndCalculateFieldStats( - explanation, - fieldName, - fieldValues, - timeoutChecker, - ecsCompatibility, - timestampFormatOverride - ); - if (mappingAndFieldStats != null) { - if (mappingAndFieldStats.v1() != null) { - mappings.put(fieldName, mappingAndFieldStats.v1()); - } - if (mappingAndFieldStats.v2() != null) { - fieldStats.put(fieldName, mappingAndFieldStats.v2()); + // Process nested fields dynamically (recursively if needed) + if (isNestedField(fieldValues)) { + // Recursively process nested fields + List> nestedFieldValues = extractNestedFieldValues(sampleRecords, fieldName); + + Tuple, SortedMap> nestedResult = guessMappingsAndCalculateFieldStats( + explanation, + nestedFieldValues, + timeoutChecker, + ecsCompatibility, + timestampFormatOverride + ); + // Create a nested mapping for the parent field and merge the nested field mappings + mappings.put(fieldName, createNestedMapping(nestedResult.v1(), determineNestedFieldType(nestedFieldValues))); // Apply + // type: + // nested for + // the parent + // field + fieldStats.putAll(nestedResult.v2()); + + } else { + // For non-nested fields, process them normally + Tuple, FieldStats> mappingAndFieldStats = guessMappingAndCalculateFieldStats( + explanation, + fieldName, + fieldValues, + timeoutChecker, + ecsCompatibility, + timestampFormatOverride + ); + if (mappingAndFieldStats != null) { + if (mappingAndFieldStats.v1() != null) { + mappings.put(fieldName, mappingAndFieldStats.v1()); + } + if (mappingAndFieldStats.v2() != null) { + fieldStats.put(fieldName, mappingAndFieldStats.v2()); + } } } } @@ -354,6 +377,57 @@ static Tuple, SortedMap> guessMapp return new Tuple<>(mappings, fieldStats); } + /** + * Extracts the nested field values for a given field from a list of sample records. + * + * @param sampleRecords The list of records, where each record is a map containing field names as keys. + * @param fieldName The name of the field whose values are to be extracted from each record. + * @return A list of Maps representing the nested field values for the specified field. + */ + private static List> extractNestedFieldValues(List> sampleRecords, String fieldName) { + @SuppressWarnings("unchecked") + List> extractedFieldValue = sampleRecords.stream() + .map(record -> record.get(fieldName)) + .filter(Objects::nonNull) + .filter(val -> val instanceof Map) + .map(val -> (Map) val) + .collect(Collectors.toList()); + return extractedFieldValue; + } + + /** + * Creates a mapping for a nested field, either as a "nested" or "object" field type. + * + * @param nestedFieldMappings A map containing the field mappings for the nested fields. + * @param nestingType The type of the nested field (either "nested" or "object"). + * @return A map representing the field mapping for the nested field with the specified type. + */ + private static Object createNestedMapping(SortedMap nestedFieldMappings, String nestingType) { + SortedMap nestedMapping = new TreeMap<>(); + nestedMapping.put(MAPPING_TYPE_SETTING, nestingType); + nestedMapping.put(MAPPING_PROPERTIES_SETTING, nestedFieldMappings); + return nestedMapping; + } + + /** + * @param fieldValues value of a field in the sample records + * @return boolean for whether the field is nested (i.e., Map or List of Maps) + */ + static boolean isNestedField(List fieldValues) { + return fieldValues.stream().anyMatch(val -> val instanceof Map || val instanceof List); + } + + /** + * @param nestedFieldValues value of a nested field in the sample records + * @return "nested" or "object" based on the data structure + * @TODO: If the field values contain a List of Maps and need to be queried independently, set as "nested" + * If not, treat it as a regular "object" + */ + static String determineNestedFieldType(List> nestedFieldValues) { + // just supports object for now + return "object"; + } + /** * Given the sampled records, guess appropriate Elasticsearch mappings. * @param explanation List of reasons for choosing the overall text structure. This list diff --git a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java index 222ca3f2794b5..f32f052940abe 100644 --- a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java +++ b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/NdJsonTextStructureFinderTests.java @@ -9,12 +9,18 @@ import org.elasticsearch.xpack.core.textstructure.structurefinder.TextStructure; import java.util.Collections; +import java.util.Map; public class NdJsonTextStructureFinderTests extends TextStructureTestCase { private final TextStructureFinderFactory factory = new NdJsonTextStructureFinderFactory(); public void testCreateConfigsGivenGoodJson() throws Exception { + testCreateConfigsGivenFlatJson(); + testCreateConfigsGivenNestedJson(); + } + + public void testCreateConfigsGivenFlatJson() throws Exception { assertTrue(factory.canCreateFromSample(explanation, NDJSON_SAMPLE, 0.0)); String charset = randomFrom(POSSIBLE_CHARSETS); @@ -49,4 +55,49 @@ public void testCreateConfigsGivenGoodJson() throws Exception { assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats()); assertEquals(Collections.singleton("properties"), structure.getMappings().keySet()); } + + public void testCreateConfigsGivenNestedJson() throws Exception { + assertTrue(factory.canCreateFromSample(explanation, NESTED_NDJSON_SAMPLE, 0.0)); + + String charset = randomFrom(POSSIBLE_CHARSETS); + Boolean hasByteOrderMarker = randomHasByteOrderMarker(charset); + TextStructureFinder structureFinder = factory.createFromSample( + explanation, + NESTED_NDJSON_SAMPLE, + charset, + hasByteOrderMarker, + TextStructureFinderManager.DEFAULT_LINE_MERGE_SIZE_LIMIT, + TextStructureOverrides.EMPTY_OVERRIDES, + NOOP_TIMEOUT_CHECKER + ); + + TextStructure structure = structureFinder.getStructure(); + assertEquals(TextStructure.Format.NDJSON, structure.getFormat()); + assertEquals(charset, structure.getCharset()); + + if (hasByteOrderMarker == null) { + assertNull(structure.getHasByteOrderMarker()); + } else { + assertEquals(hasByteOrderMarker, structure.getHasByteOrderMarker()); + } + + assertEquals("timestamp", structure.getTimestampField()); + assertEquals(Collections.singletonList("UNIX_MS"), structure.getJodaTimestampFormats()); + assertEquals(1, structure.getMappings().size()); + + // Verify that the 'host' field is nested + @SuppressWarnings("unchecked") + Map props = (Map) structure.getMappings().get("properties"); + @SuppressWarnings("unchecked") + Map hostMapping = (Map) props.get("host"); + assertNotNull("Host should be a nested field", hostMapping); + assertEquals("object", hostMapping.get("type")); + + // Verify 'host' properties + @SuppressWarnings("unchecked") + Map hostProperties = (Map) hostMapping.get("properties"); + assertTrue("Host should have 'id' property", hostProperties.containsKey("id")); + assertTrue("Host should have 'category' property", hostProperties.containsKey("category")); + } + } diff --git a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java index b942dcc46cd4a..b373f8f7962a9 100644 --- a/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java +++ b/x-pack/plugin/text-structure/src/test/java/org/elasticsearch/xpack/textstructure/structurefinder/TextStructureTestCase.java @@ -40,6 +40,11 @@ public abstract class TextStructureTestCase extends ESTestCase { "class":"ml","method":"core::SomeNoiseMaker","file":"Noisemaker.cc","line":333} """; + protected static final String NESTED_NDJSON_SAMPLE = """ + {"host": {"id": 1, "category": "NETWORKING DEVICE"}, "timestamp": "1478261151445"} + {"host": {"id": 2, "category": "COMPUTE NODE"}, "timestamp": "1478261151445"} + """; + protected static final String PIPE_DELIMITED_SAMPLE = """ 2018-01-06 16:56:14.295748|INFO |VirtualServer |1 |listening on 0.0.0.0:9987, :::9987 2018-01-06 17:19:44.465252|INFO |VirtualServer |1 |client 'User1'(id:2) changed default admin channelgroup to 'Guest'(id:8)