From c2b93abd63366e35140ae20b704aee3acc33afb6 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 11 Dec 2023 13:45:22 -0700 Subject: [PATCH 1/4] Speed up quantification fast paths by unswitching the loop --- .../_StringProcessing/ConsumerInterface.swift | 11 +- .../_StringProcessing/Engine/MEQuantify.swift | 186 +++++++++++++++--- .../_StringProcessing/Engine/Processor.swift | 20 +- 3 files changed, 162 insertions(+), 55 deletions(-) diff --git a/Sources/_StringProcessing/ConsumerInterface.swift b/Sources/_StringProcessing/ConsumerInterface.swift index 808a1e498..c19996d44 100644 --- a/Sources/_StringProcessing/ConsumerInterface.swift +++ b/Sources/_StringProcessing/ConsumerInterface.swift @@ -391,9 +391,8 @@ extension DSLTree.CustomCharacterClass.Member { return { input, bounds in let curIdx = bounds.lowerBound - let nextIndex = isCharacterSemantic - ? input.index(after: curIdx) - : input.unicodeScalars.index(after: curIdx) + let nextIndex = input.index( + after: curIdx, isScalarSemantics: !isCharacterSemantic) // Under grapheme semantics, we compare based on single NFC scalars. If // such a character is not single scalar under NFC, the match fails. In @@ -603,9 +602,9 @@ extension AST.Atom.CharacterProperty { if p(input, bounds) != nil { return nil } // TODO: bounds check - return opts.semanticLevel == .graphemeCluster - ? input.index(after: bounds.lowerBound) - : input.unicodeScalars.index(after: bounds.lowerBound) + return input.index( + after: bounds.lowerBound, + isScalarSemantics: opts.semanticLevel == .unicodeScalar) } } diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index a0480cde6..7e2a1097a 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -1,5 +1,49 @@ +private typealias ASCIIBitset = DSLTree.CustomCharacterClass.AsciiBitset + extension Processor { - func _doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? { + func _doASCIIBitsetMatch( + _: AsciiBitsetRegister + ) -> Input.Index? { + fatalError() + } +} + + +extension String { + func index(after idx: Index, isScalarSemantics: Bool) -> Index { + if isScalarSemantics { + return unicodeScalars.index(after: idx) + } else { + return index(after: idx) + } + } +} + + +extension Processor { + + internal mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { + let matched: Bool + switch (payload.quantKind, payload.minTrips, payload.maxExtraTrips) { + case (.reluctant, _, _): + assertionFailure(".reluctant is not supported by .quantify") + // TODO: this was pre-refactoring behavior, should we fatal error + // instead? + return false + case (.eager, 0, nil): + runEagerZeroOrMoreQuantify(payload) + return true + case (.eager, 1, nil): + return runEagerOneOrMoreQuantify(payload) + case (_, 0, 1): + runZeroOrOneQuantify(payload) + return true + default: + return runGeneralQuantify(payload) + } + } + + private func doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? { let isScalarSemantics = payload.isScalarSemantics switch payload.type { @@ -31,10 +75,8 @@ extension Processor { guard currentPosition < end else { return nil } if payload.anyMatchesNewline { - if isScalarSemantics { - return input.unicodeScalars.index(after: currentPosition) - } - return input.index(after: currentPosition) + return input.index( + after: currentPosition, isScalarSemantics: isScalarSemantics) } return input.matchAnyNonNewline( @@ -47,14 +89,14 @@ extension Processor { /// Generic quantify instruction interpreter /// - Handles .eager and .posessive /// - Handles arbitrary minTrips and maxExtraTrips - mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { + private mutating func runGeneralQuantify(_ payload: QuantifyPayload) -> Bool { assert(payload.quantKind != .reluctant) var trips = 0 var maxExtraTrips = payload.maxExtraTrips while trips < payload.minTrips { - guard let next = _doQuantifyMatch(payload) else { + guard let next = doQuantifyMatch(payload) else { signalFailure() return false } @@ -67,7 +109,7 @@ extension Processor { return true } - guard let next = _doQuantifyMatch(payload) else { + guard let next = doQuantifyMatch(payload) else { return true } maxExtraTrips = maxExtraTrips.map { $0 - 1 } @@ -81,7 +123,7 @@ extension Processor { while true { if maxExtraTrips == 0 { break } - guard let next = _doQuantifyMatch(payload) else { + guard let next = doQuantifyMatch(payload) else { break } maxExtraTrips = maxExtraTrips.map({$0 - 1}) @@ -100,67 +142,147 @@ extension Processor { } /// Specialized quantify instruction interpreter for `*`, always succeeds - mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { + private mutating func runEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { assert(payload.quantKind == .eager && payload.minTrips == 0 && payload.maxExtraTrips == nil) - _doRunEagerZeroOrMoreQuantify(payload) + _ = doRunEagerZeroOrMoreQuantify(payload) } - // NOTE: So-as to inline into one-or-more call, which makes a significant - // performance difference + // Returns whether it matched at least once + // + // NOTE: inline-always so-as to inline into one-or-more call, which makes a + // significant performance difference @inline(__always) - mutating func _doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) { - guard let next = _doQuantifyMatch(payload) else { - // Consumed no input, no point saved - return - } - + private mutating func doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) -> Bool { // Create a quantified save point for every part of the input matched up // to the final position. + let isScalarSemantics = payload.isScalarSemantics let rangeStart = currentPosition var rangeEnd = currentPosition - currentPosition = next - while true { - guard let next = _doQuantifyMatch(payload) else { break } - rangeEnd = currentPosition - currentPosition = next + var matchedOnce = false + + switch payload.type { + case .asciiBitset: + let bitset = registers[payload.bitset] + while true { + guard let next = input.matchASCIIBitset( + bitset, + at: currentPosition, + limitedBy: end, + isScalarSemantics: isScalarSemantics) + else { + break + } + matchedOnce = true + rangeEnd = currentPosition + currentPosition = next + assert(currentPosition > rangeEnd) + } + case .asciiChar: + let asciiScalar = UnicodeScalar.init(_value: UInt32(payload.asciiChar)) + while true { + guard let next = input.matchScalar( + asciiScalar, + at: currentPosition, + limitedBy: end, + boundaryCheck: !isScalarSemantics, + isCaseInsensitive: false) + else { + break + } + matchedOnce = true + rangeEnd = currentPosition + currentPosition = next + assert(currentPosition > rangeEnd) + } + case .builtin: + let builtin = payload.builtin + let isInverted = payload.builtinIsInverted + let isStrictASCII = payload.builtinIsStrict + while true { + guard let next = input.matchBuiltinCC( + builtin, + at: currentPosition, + limitedBy: end, + isInverted: isInverted, + isStrictASCII: isStrictASCII, + isScalarSemantics: isScalarSemantics) + else { + break + } + matchedOnce = true + rangeEnd = currentPosition + currentPosition = next + assert(currentPosition > rangeEnd) + } + case .any: + while true { + guard currentPosition < end else { break } + let next: String.Index? + if payload.anyMatchesNewline { + next = input.index( + after: currentPosition, isScalarSemantics: isScalarSemantics) + } else { + next = input.matchAnyNonNewline( + at: currentPosition, + limitedBy: end, + isScalarSemantics: isScalarSemantics) + } + + guard let next else { break } + matchedOnce = true + rangeEnd = currentPosition + currentPosition = next + assert(currentPosition > rangeEnd) + } + } + + guard matchedOnce else { + // Consumed no input, no point saved + return false } - savePoints.append(makeQuantifiedSavePoint(rangeStart.. Bool { + private mutating func runEagerOneOrMoreQuantify(_ payload: QuantifyPayload) -> Bool { assert(payload.quantKind == .eager && payload.minTrips == 1 && payload.maxExtraTrips == nil) // Match at least once - guard let next = _doQuantifyMatch(payload) else { + guard let next = doQuantifyMatch(payload) else { signalFailure() return false } // Run `a+` as `aa*` currentPosition = next - _doRunEagerZeroOrMoreQuantify(payload) + doRunEagerZeroOrMoreQuantify(payload) return true } /// Specialized quantify instruction interpreter for ? - mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) -> Bool { + private mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) { assert(payload.minTrips == 0 && payload.maxExtraTrips == 1) - let next = _doQuantifyMatch(payload) + let next = doQuantifyMatch(payload) guard let idx = next else { - return true // matched zero times + return // matched zero times } if payload.quantKind != .possessive { // Save the zero match savePoints.append(makeSavePoint(resumingAt: currentPC+1)) } currentPosition = idx - return true + return } } diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index 86365322b..eccbcff64 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -515,26 +515,12 @@ extension Processor { controller.step() } case .quantify: - let quantPayload = payload.quantify - let matched: Bool - switch (quantPayload.quantKind, quantPayload.minTrips, quantPayload.maxExtraTrips) { - case (.reluctant, _, _): - assertionFailure(".reluctant is not supported by .quantify") - return - case (.eager, 0, nil): - runEagerZeroOrMoreQuantify(quantPayload) - matched = true - case (.eager, 1, nil): - matched = runEagerOneOrMoreQuantify(quantPayload) - case (_, 0, 1): - matched = runZeroOrOneQuantify(quantPayload) - default: - matched = runQuantify(quantPayload) - } - if matched { + if runQuantify(payload.quantify) { controller.step() } + + case .consumeBy: let reg = payload.consumer let consumer = registers[reg] From b48f09e1130f9315695e1f177c6a2f8751503ead Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 11 Dec 2023 16:09:19 -0700 Subject: [PATCH 2/4] squash me: cleanup --- .../_StringProcessing/Engine/MEBuiltins.swift | 19 +++++++ .../_StringProcessing/Engine/MEQuantify.swift | 53 ++++--------------- Sources/_StringProcessing/Utility/Misc.swift | 12 +++++ 3 files changed, 41 insertions(+), 43 deletions(-) diff --git a/Sources/_StringProcessing/Engine/MEBuiltins.swift b/Sources/_StringProcessing/Engine/MEBuiltins.swift index 0dafd6720..33b13178b 100644 --- a/Sources/_StringProcessing/Engine/MEBuiltins.swift +++ b/Sources/_StringProcessing/Engine/MEBuiltins.swift @@ -223,6 +223,25 @@ extension String { else { return nil } return next } + + internal func matchRegexDot( + at currentPosition: Index, + limitedBy end: Index, + anyMatchesNewline: Bool, + isScalarSemantics: Bool + ) -> Index? { + guard currentPosition < end else { return nil } + + if anyMatchesNewline { + return index( + after: currentPosition, isScalarSemantics: isScalarSemantics) + } + + return matchAnyNonNewline( + at: currentPosition, + limitedBy: end, + isScalarSemantics: isScalarSemantics) + } } // MARK: - Built-in character class matching diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 7e2a1097a..dfab9e17b 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -1,27 +1,6 @@ private typealias ASCIIBitset = DSLTree.CustomCharacterClass.AsciiBitset extension Processor { - func _doASCIIBitsetMatch( - _: AsciiBitsetRegister - ) -> Input.Index? { - fatalError() - } -} - - -extension String { - func index(after idx: Index, isScalarSemantics: Bool) -> Index { - if isScalarSemantics { - return unicodeScalars.index(after: idx) - } else { - return index(after: idx) - } - } -} - - -extension Processor { - internal mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { let matched: Bool switch (payload.quantKind, payload.minTrips, payload.maxExtraTrips) { @@ -61,8 +40,6 @@ extension Processor { boundaryCheck: !isScalarSemantics, isCaseInsensitive: false) case .builtin: - guard currentPosition < end else { return nil } - // We only emit .quantify if it consumes a single character return input.matchBuiltinCC( payload.builtin, @@ -72,16 +49,10 @@ extension Processor { isStrictASCII: payload.builtinIsStrict, isScalarSemantics: isScalarSemantics) case .any: - guard currentPosition < end else { return nil } - - if payload.anyMatchesNewline { - return input.index( - after: currentPosition, isScalarSemantics: isScalarSemantics) - } - - return input.matchAnyNonNewline( + return input.matchRegexDot( at: currentPosition, limitedBy: end, + anyMatchesNewline: payload.anyMatchesNewline, isScalarSemantics: isScalarSemantics) } } @@ -217,20 +188,16 @@ extension Processor { assert(currentPosition > rangeEnd) } case .any: + let anyMatchesNewline = payload.anyMatchesNewline while true { - guard currentPosition < end else { break } - let next: String.Index? - if payload.anyMatchesNewline { - next = input.index( - after: currentPosition, isScalarSemantics: isScalarSemantics) - } else { - next = input.matchAnyNonNewline( - at: currentPosition, - limitedBy: end, - isScalarSemantics: isScalarSemantics) + guard let next = input.matchRegexDot( + at: currentPosition, + limitedBy: end, + anyMatchesNewline: anyMatchesNewline, + isScalarSemantics: isScalarSemantics) + else { + break } - - guard let next else { break } matchedOnce = true rangeEnd = currentPosition currentPosition = next diff --git a/Sources/_StringProcessing/Utility/Misc.swift b/Sources/_StringProcessing/Utility/Misc.swift index 8555ec85c..d63370b55 100644 --- a/Sources/_StringProcessing/Utility/Misc.swift +++ b/Sources/_StringProcessing/Utility/Misc.swift @@ -65,3 +65,15 @@ enum QuickResult { case unknown } +extension String { + /// Index after in either grapheme or scalar view + func index(after idx: Index, isScalarSemantics: Bool) -> Index { + if isScalarSemantics { + return unicodeScalars.index(after: idx) + } else { + return index(after: idx) + } + } +} + + From efd4a1dee774dd90462cbec72ffaa93a54962f97 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 11 Dec 2023 16:12:35 -0700 Subject: [PATCH 3/4] comments --- Sources/_StringProcessing/Engine/MEQuantify.swift | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index dfab9e17b..2dcdc2ecb 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -226,6 +226,11 @@ extension Processor { && payload.maxExtraTrips == nil) // Match at least once + // + // NOTE: Due to newline-sequence in scalar-semantic mode advancing two + // positions, we can't just have doRunEagerZeroOrMoreQuantify return the + // range-end and advance the range-start ourselves. Instead, we do one + // call before looping. guard let next = doQuantifyMatch(payload) else { signalFailure() return false From 1fd7391b6a7a91ca15a8a16900248a2d1d090c98 Mon Sep 17 00:00:00 2001 From: Michael Ilseman Date: Mon, 11 Dec 2023 17:38:05 -0700 Subject: [PATCH 4/4] Refactor off of mutating methods Refactors mutating methods into string methods for easier unit testing and parity-checking via assertions. Prepares for more efficient implementations. Doing so creates many regressions, unfortunately, so this should only be done in conjunction with more refactorings and improvements. --- .../_StringProcessing/Engine/MEQuantify.swift | 253 ++++++++++++++---- .../_StringProcessing/Engine/Processor.swift | 2 - 2 files changed, 198 insertions(+), 57 deletions(-) diff --git a/Sources/_StringProcessing/Engine/MEQuantify.swift b/Sources/_StringProcessing/Engine/MEQuantify.swift index 2dcdc2ecb..09702f7b4 100644 --- a/Sources/_StringProcessing/Engine/MEQuantify.swift +++ b/Sources/_StringProcessing/Engine/MEQuantify.swift @@ -1,8 +1,20 @@ private typealias ASCIIBitset = DSLTree.CustomCharacterClass.AsciiBitset extension Processor { + private func maybeASCIIBitset( + _ payload: QuantifyPayload + ) -> ASCIIBitset? { + guard payload.type == .asciiBitset else { return nil } + return registers[payload.bitset] + } + internal mutating func runQuantify(_ payload: QuantifyPayload) -> Bool { - let matched: Bool + let asciiBitset = maybeASCIIBitset(payload) + + // TODO: Refactor below called functions to be non-mutating. + // They might need to communicate save-point info upwards in addition to + // a new (optional) currentPosition. Then, we can assert in testing that the + // specialized functions produce the same answer as `runGeneralQuantify`. switch (payload.quantKind, payload.minTrips, payload.maxExtraTrips) { case (.reluctant, _, _): assertionFailure(".reluctant is not supported by .quantify") @@ -10,30 +22,111 @@ extension Processor { // instead? return false case (.eager, 0, nil): - runEagerZeroOrMoreQuantify(payload) + let (next, savePointRange) = input.runEagerZeroOrMoreQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end) + assert((next, savePointRange) == input.runGeneralQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end)!) + if let savePointRange { + savePoints.append(makeQuantifiedSavePoint( + savePointRange, isScalarSemantics: payload.isScalarSemantics)) + } + currentPosition = next return true case (.eager, 1, nil): - return runEagerOneOrMoreQuantify(payload) + guard let (next, savePointRange) = input.runEagerOneOrMoreQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end + ) else { + assert(nil == input.runGeneralQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end)) + signalFailure() + return false + } + assert((next, savePointRange) == input.runGeneralQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end)!) + if let savePointRange { + savePoints.append(makeQuantifiedSavePoint( + savePointRange, isScalarSemantics: payload.isScalarSemantics)) + } + currentPosition = next + return true case (_, 0, 1): - runZeroOrOneQuantify(payload) + // FIXME: Is this correct for lazy zero-or-one? + let (next, save) = input.runZeroOrOneQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end) + // Also, we should assert same answer as runGeneralQuantify... + if save { + savePoints.append(makeSavePoint(resumingAt: currentPC+1)) + } + currentPosition = next return true default: - return runGeneralQuantify(payload) + guard let (next, savePointRange) = input.runGeneralQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end + ) else { + assert(nil == input.runGeneralQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end)) + signalFailure() + return false + } + assert((next, savePointRange) == input.runGeneralQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end)!) + if let savePointRange { + savePoints.append(makeQuantifiedSavePoint( + savePointRange, isScalarSemantics: payload.isScalarSemantics)) + } + currentPosition = next + + return true } } +} - private func doQuantifyMatch(_ payload: QuantifyPayload) -> Input.Index? { +extension String { + fileprivate func doQuantifyMatch( + _ payload: QuantifyPayload, + asciiBitset: ASCIIBitset?, // Necessary ugliness... + at currentPosition: Index, + limitedBy end: Index + ) -> Index? { let isScalarSemantics = payload.isScalarSemantics switch payload.type { case .asciiBitset: - return input.matchASCIIBitset( - registers[payload.bitset], + assert(asciiBitset != nil, "Invariant: needs to be passed in") + return matchASCIIBitset( + asciiBitset!, at: currentPosition, limitedBy: end, isScalarSemantics: isScalarSemantics) case .asciiChar: - return input.matchScalar( + return matchScalar( UnicodeScalar.init(_value: UInt32(payload.asciiChar)), at: currentPosition, limitedBy: end, @@ -41,7 +134,7 @@ extension Processor { isCaseInsensitive: false) case .builtin: // We only emit .quantify if it consumes a single character - return input.matchBuiltinCC( + return matchBuiltinCC( payload.builtin, at: currentPosition, limitedBy: end, @@ -49,7 +142,7 @@ extension Processor { isStrictASCII: payload.builtinIsStrict, isScalarSemantics: isScalarSemantics) case .any: - return input.matchRegexDot( + return matchRegexDot( at: currentPosition, limitedBy: end, anyMatchesNewline: payload.anyMatchesNewline, @@ -60,16 +153,29 @@ extension Processor { /// Generic quantify instruction interpreter /// - Handles .eager and .posessive /// - Handles arbitrary minTrips and maxExtraTrips - private mutating func runGeneralQuantify(_ payload: QuantifyPayload) -> Bool { + fileprivate func runGeneralQuantify( + _ payload: QuantifyPayload, + asciiBitset: ASCIIBitset?, + at currentPosition: Index, + limitedBy end: Index + ) -> ( + nextPosition: Index, + savePointRange: Range? + )? { assert(payload.quantKind != .reluctant) var trips = 0 var maxExtraTrips = payload.maxExtraTrips + var currentPosition = currentPosition while trips < payload.minTrips { - guard let next = doQuantifyMatch(payload) else { - signalFailure() - return false + guard let next = doQuantifyMatch( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end + ) else { + return nil } currentPosition = next trips += 1 @@ -77,11 +183,16 @@ extension Processor { if maxExtraTrips == 0 { // We're done - return true + return (currentPosition, nil) } - guard let next = doQuantifyMatch(payload) else { - return true + guard let next = doQuantifyMatch( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end + ) else { + return (currentPosition, nil) } maxExtraTrips = maxExtraTrips.map { $0 - 1 } @@ -94,7 +205,12 @@ extension Processor { while true { if maxExtraTrips == 0 { break } - guard let next = doQuantifyMatch(payload) else { + guard let next = doQuantifyMatch( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end + ) else { break } maxExtraTrips = maxExtraTrips.map({$0 - 1}) @@ -103,31 +219,43 @@ extension Processor { } if payload.quantKind == .eager { - savePoints.append(makeQuantifiedSavePoint( - rangeStart.. (Index, savePointRange: Range?) { assert(payload.quantKind == .eager && payload.minTrips == 0 && payload.maxExtraTrips == nil) - _ = doRunEagerZeroOrMoreQuantify(payload) + return doRunEagerZeroOrMoreQuantify( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end) } - // Returns whether it matched at least once - // // NOTE: inline-always so-as to inline into one-or-more call, which makes a // significant performance difference @inline(__always) - private mutating func doRunEagerZeroOrMoreQuantify(_ payload: QuantifyPayload) -> Bool { + private func doRunEagerZeroOrMoreQuantify( + _ payload: QuantifyPayload, + asciiBitset: ASCIIBitset?, // Necessary ugliness... + at currentPosition: Index, + limitedBy end: Index + ) -> (Index, savePointRange: Range?) { // Create a quantified save point for every part of the input matched up // to the final position. + var currentPosition = currentPosition let isScalarSemantics = payload.isScalarSemantics let rangeStart = currentPosition var rangeEnd = currentPosition @@ -135,10 +263,10 @@ extension Processor { switch payload.type { case .asciiBitset: - let bitset = registers[payload.bitset] while true { - guard let next = input.matchASCIIBitset( - bitset, + assert(asciiBitset != nil, "Invariant: needs to be passed in") + guard let next = matchASCIIBitset( + asciiBitset!, at: currentPosition, limitedBy: end, isScalarSemantics: isScalarSemantics) @@ -153,7 +281,7 @@ extension Processor { case .asciiChar: let asciiScalar = UnicodeScalar.init(_value: UInt32(payload.asciiChar)) while true { - guard let next = input.matchScalar( + guard let next = matchScalar( asciiScalar, at: currentPosition, limitedBy: end, @@ -172,7 +300,7 @@ extension Processor { let isInverted = payload.builtinIsInverted let isStrictASCII = payload.builtinIsStrict while true { - guard let next = input.matchBuiltinCC( + guard let next = matchBuiltinCC( builtin, at: currentPosition, limitedBy: end, @@ -190,7 +318,7 @@ extension Processor { case .any: let anyMatchesNewline = payload.anyMatchesNewline while true { - guard let next = input.matchRegexDot( + guard let next = matchRegexDot( at: currentPosition, limitedBy: end, anyMatchesNewline: anyMatchesNewline, @@ -207,20 +335,23 @@ extension Processor { guard matchedOnce else { // Consumed no input, no point saved - return false + return (currentPosition, nil) } // NOTE: We can't assert that rangeEnd trails currentPosition by one // position, because newline-sequence in scalar semantic mode still // matches two scalars - savePoints.append(makeQuantifiedSavePoint( - rangeStart.. Bool { + fileprivate func runEagerOneOrMoreQuantify( + _ payload: QuantifyPayload, + asciiBitset: ASCIIBitset?, // Necessary ugliness... + at currentPosition: Index, + limitedBy end: Index + ) -> (Index, savePointRange: Range?)? { assert(payload.quantKind == .eager && payload.minTrips == 1 && payload.maxExtraTrips == nil) @@ -231,30 +362,42 @@ extension Processor { // positions, we can't just have doRunEagerZeroOrMoreQuantify return the // range-end and advance the range-start ourselves. Instead, we do one // call before looping. - guard let next = doQuantifyMatch(payload) else { - signalFailure() - return false + guard let next = doQuantifyMatch( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end + ) else { + return nil } // Run `a+` as `aa*` - currentPosition = next - doRunEagerZeroOrMoreQuantify(payload) - return true + return doRunEagerZeroOrMoreQuantify( + payload, + asciiBitset: asciiBitset, + at: next, + limitedBy: end) } /// Specialized quantify instruction interpreter for ? - private mutating func runZeroOrOneQuantify(_ payload: QuantifyPayload) { + fileprivate func runZeroOrOneQuantify( + _ payload: QuantifyPayload, + asciiBitset: ASCIIBitset?, // Necessary ugliness... + at currentPosition: Index, + limitedBy end: Index + ) -> (Index, makeSavePoint: Bool) { assert(payload.minTrips == 0 && payload.maxExtraTrips == 1) - let next = doQuantifyMatch(payload) - guard let idx = next else { - return // matched zero times - } - if payload.quantKind != .possessive { - // Save the zero match - savePoints.append(makeSavePoint(resumingAt: currentPC+1)) + guard let next = doQuantifyMatch( + payload, + asciiBitset: asciiBitset, + at: currentPosition, + limitedBy: end + ) else { + return (currentPosition, false) } - currentPosition = idx - return - } + return (next, payload.quantKind != .possessive) + } } + + diff --git a/Sources/_StringProcessing/Engine/Processor.swift b/Sources/_StringProcessing/Engine/Processor.swift index eccbcff64..310b5d932 100644 --- a/Sources/_StringProcessing/Engine/Processor.swift +++ b/Sources/_StringProcessing/Engine/Processor.swift @@ -519,8 +519,6 @@ extension Processor { controller.step() } - - case .consumeBy: let reg = payload.consumer let consumer = registers[reg]