Skip to content

Commit 938dc65

Browse files
peteralfonsiPeter Alfonsi
authored andcommitted
Propagate TooComplexToDeterminizeException in query_string regex queries (opensearch-project#18883)
Signed-off-by: Peter Alfonsi <[email protected]> Co-authored-by: Peter Alfonsi <[email protected]>
1 parent de35b91 commit 938dc65

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
9797
- Ignore awareness attributes when a custom preference string is included with a search request ([#18848](https://github.com/opensearch-project/OpenSearch/pull/18848))
9898
- Use ScoreDoc instead of FieldDoc when creating TopScoreDocCollectorManager to avoid unnecessary conversion ([#18802](https://github.com/opensearch-project/OpenSearch/pull/18802))
9999
- Fix leafSorter optimization for ReadOnlyEngine and NRTReplicationEngine ([#18639](https://github.com/opensearch-project/OpenSearch/pull/18639))
100+
- Fix query string regex queries incorrectly swallowing TooComplexToDeterminizeException ([#18883](https://github.com/opensearch-project/OpenSearch/pull/18883))
100101

101102
### Security
102103

server/src/main/java/org/opensearch/index/search/QueryStringQueryParser.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
import org.apache.lucene.search.WildcardQuery;
5858
import org.apache.lucene.util.BytesRef;
5959
import org.apache.lucene.util.automaton.RegExp;
60+
import org.apache.lucene.util.automaton.TooComplexToDeterminizeException;
6061
import org.opensearch.common.lucene.search.Queries;
6162
import org.opensearch.common.regex.Regex;
6263
import org.opensearch.common.unit.Fuzziness;
@@ -795,7 +796,9 @@ private Query getRegexpQuerySingle(String field, String termStr) throws ParseExc
795796
termStr = getAnalyzer().normalize(currentFieldType.name(), termStr).utf8ToString();
796797
return currentFieldType.regexpQuery(termStr, RegExp.ALL, 0, getDeterminizeWorkLimit(), getMultiTermRewriteMethod(), context);
797798
} catch (RuntimeException e) {
798-
if (lenient) {
799+
// Lenient queries are intended for data type mismatches, but TooComplexToDeterminizeException
800+
// comes up from the same place in the code. Don't create a lenient query in this case.
801+
if (lenient && !(e instanceof TooComplexToDeterminizeException)) {
799802
return newLenientFieldQuery(field, e);
800803
}
801804
throw e;

server/src/test/java/org/opensearch/index/query/QueryStringQueryBuilderTests.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -803,8 +803,14 @@ public void testToQueryRegExpQueryTooComplex() throws Exception {
803803
TooComplexToDeterminizeException.class,
804804
() -> queryBuilder.toQuery(createShardContext())
805805
);
806-
assertThat(e.getMessage(), containsString("Determinizing automaton"));
807-
assertThat(e.getMessage(), containsString("would require more than 10000 effort"));
806+
assertTrue(e.getMessage().contains("Determinizing automaton"));
807+
assertTrue(e.getMessage().contains("would require more than 10000 effort"));
808+
809+
// TooComplexToDeterminizeException should be thrown even if lenient is true
810+
QueryStringQueryBuilder lenientQueryBuilder = queryStringQuery("/[ac]*a[ac]{50,200}/").defaultField(TEXT_FIELD_NAME).lenient(true);
811+
e = expectThrows(TooComplexToDeterminizeException.class, () -> lenientQueryBuilder.toQuery(createShardContext()));
812+
assertTrue(e.getMessage().contains("Determinizing automaton"));
813+
assertTrue(e.getMessage().contains("would require more than 10000 effort"));
808814
}
809815

810816
/**

0 commit comments

Comments
 (0)