diff --git a/CHANGELOG.md b/CHANGELOG.md index 02c45d0e36057..62a66f6ef548f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -89,6 +89,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Use `new SecureRandom()` to avoid blocking ([18729](https://github.com/opensearch-project/OpenSearch/issues/18729)) - Use ScoreDoc instead of FieldDoc when creating TopScoreDocCollectorManager to avoid unnecessary conversion ([#18802](https://github.com/opensearch-project/OpenSearch/pull/18802)) - Fix leafSorter optimization for ReadOnlyEngine and NRTReplicationEngine ([#18639](https://github.com/opensearch-project/OpenSearch/pull/18639)) +- Fix query string regex queries incorrectly swallowing TooComplexToDeterminizeException ([#18883](https://github.com/opensearch-project/OpenSearch/pull/18883)) ### Security diff --git a/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java b/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java index 7bfb3475d6744..afa3acc28fc54 100644 --- a/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java +++ b/server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java @@ -57,6 +57,7 @@ import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.automaton.RegExp; +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; import org.opensearch.common.lucene.search.Queries; import org.opensearch.common.regex.Regex; import org.opensearch.common.unit.Fuzziness; @@ -795,7 +796,9 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc termStr = getAnalyzer().normalize(currentFieldType.name(), termStr).utf8ToString(); return currentFieldType.regexpQuery(termStr, RegExp.ALL, 0, getDeterminizeWorkLimit(), getMultiTermRewriteMethod(), context); } catch (RuntimeException e) { - if (lenient) { + // Lenient queries are intended for data type mismatches, but TooComplexToDeterminizeException + // comes up from the same place in the code. Don't create a lenient query in this case. + if (lenient && !(e instanceof TooComplexToDeterminizeException)) { return newLenientFieldQuery(field, e); } throw e; diff --git a/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java index 267567f0c3c86..ea31d2680d4ec 100644 --- a/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java @@ -803,8 +803,14 @@ public void testToQueryRegExpQueryTooComplex() throws Exception { TooComplexToDeterminizeException.class, () -> queryBuilder.toQuery(createShardContext()) ); - assertThat(e.getMessage(), containsString("Determinizing automaton")); - assertThat(e.getMessage(), containsString("would require more than 10000 effort")); + assertTrue(e.getMessage().contains("Determinizing automaton")); + assertTrue(e.getMessage().contains("would require more than 10000 effort")); + + // TooComplexToDeterminizeException should be thrown even if lenient is true + QueryStringQueryBuilder lenientQueryBuilder = queryStringQuery("/[ac]*a[ac]{50,200}/").defaultField(TEXT_FIELD_NAME).lenient(true); + e = expectThrows(TooComplexToDeterminizeException.class, () -> lenientQueryBuilder.toQuery(createShardContext())); + assertTrue(e.getMessage().contains("Determinizing automaton")); + assertTrue(e.getMessage().contains("would require more than 10000 effort")); } /**