Skip to content

Commit 1777121

Browse files
authored
✨ add key-aware decoding to the query string parser (#14)
* ✨ add DecodeKind enum to distinguish decoding context for keys and values * 🐛 protect encoded dots in key decoding to prevent premature conversion to '.' and ensure correct parsing * 🐛 handle lowercase '%2e' in key decoding and improve bracketed key parsing for accurate dot conversion * ✅ add comprehensive tests for encoded dot handling in keys with allowDots and decodeDotInKeys options * 🗑️ deprecate getDecoder in favor of context-aware decode methods for value decoding * 💡 update Decoder interface documentation to use code formatting for parameter names * 🚸 add LegacyDecoder typealias and deprecate legacy decoder support in DecodeOptions for backward compatibility * 💡 update deprecation annotation for indices option in EncodeOptions with message, replacement, and level * 🐛 fix key segment handling for depth 0 to preserve original key with encoded dots * 🐛 optimize protectEncodedDotsForKeys to skip processing when no encoded dots are present; update deprecation message for getDecoder to clarify removal timeline * 🐛 replace regex-based dot-to-bracket conversion with top-level parser to correctly handle encoded dots in key segments * ✅ add tests for key coercion and depth=0 behavior with allowDots in decode * ✅ update decoder tests to handle DecodeKind for selective key/value decoding * 🎨 remove explicit Decoder type annotations in custom decoder test cases for improved readability * ✅ add tests for defaultDecode to verify encoded dot handling in keys with allowDots and decodeDotInKeys options * 💡 clarify deprecation message for legacy decoder adapter and document bracket handling in protectEncodedDotsForKeys * 🎨 reformat deprecation and documentation comments for improved readability in DecodeOptions * 🐛 fix allowDots logic to ensure decodeDotInKeys requires allowDots not explicitly false * 🎨 rename local variable for custom decoder in encoding test for clarity * ✅ add tests for dot-to-bracket conversion guardrails in decode with allowDots option
1 parent 44e14d3 commit 1777121

File tree

8 files changed

+426
-55
lines changed

8 files changed

+426
-55
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
package io.github.techouse.qskotlin.enums
2+
3+
import io.github.techouse.qskotlin.enums.DecodeKind.KEY
4+
import io.github.techouse.qskotlin.enums.DecodeKind.VALUE
5+
6+
/**
7+
* Decoding context for a scalar token.
8+
* - [KEY]: the token is a key or key segment. Callers may want to preserve percent-encoded dots
9+
* (%2E / %2e) until after key-splitting.
10+
* - [VALUE]: the token is a value; typically fully percent-decode.
11+
*/
12+
enum class DecodeKind {
13+
KEY,
14+
VALUE,
15+
}

qs-kotlin/src/main/kotlin/io/github/techouse/qskotlin/internal/Decoder.kt

Lines changed: 63 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -110,10 +110,12 @@ internal object Decoder {
110110
var value: Any?
111111

112112
if (pos == -1) {
113-
key = options.getDecoder(part, charset).toString()
113+
// Decode a bare key (no '=') using key-aware decoding
114+
key = options.decodeKey(part, charset).orEmpty()
114115
value = if (options.strictNullHandling) null else ""
115116
} else {
116-
key = options.getDecoder(part.take(pos), charset).toString()
117+
// Decode the key slice as a key; values decode as values
118+
key = options.decodeKey(part.take(pos), charset).orEmpty()
117119
value =
118120
Utils.apply<Any?>(
119121
parseListValue(
@@ -124,7 +126,7 @@ internal object Decoder {
124126
} else 0,
125127
)
126128
) { v: Any? ->
127-
options.getDecoder((v as String?), charset)
129+
options.decodeValue(v as String?, charset)
128130
}
129131
}
130132

@@ -202,12 +204,15 @@ internal object Decoder {
202204
val mutableObj = LinkedHashMap<String, Any?>(1)
203205

204206
val cleanRoot =
205-
if (root.startsWith("[") && root.endsWith("]")) {
206-
root.substring(1, root.length - 1)
207+
if (root.startsWith("[")) {
208+
val last = root.lastIndexOf(']')
209+
if (last > 0) root.substring(1, last) else root.substring(1)
207210
} else root
208211

209212
val decodedRoot =
210-
if (options.getDecodeDotInKeys) cleanRoot.replace("%2E", ".") else cleanRoot
213+
if (options.getDecodeDotInKeys)
214+
cleanRoot.replace("%2E", ".").replace("%2e", ".")
215+
else cleanRoot
211216

212217
val isPureNumeric = decodedRoot.isNotEmpty() && decodedRoot.all { it.isDigit() }
213218
val idx: Int? = if (isPureNumeric) decodedRoot.toInt() else null
@@ -232,8 +237,7 @@ internal object Decoder {
232237

233238
// Otherwise, treat it as a map with *string* key (even if numeric)
234239
else -> {
235-
val keyForMap = decodedRoot
236-
mutableObj[keyForMap] = leaf
240+
mutableObj[decodedRoot] = leaf
237241
obj = mutableObj
238242
}
239243
}
@@ -274,10 +278,53 @@ internal object Decoder {
274278
}
275279

276280
/**
277-
* Regular expression to match dots followed by non-dot and non-bracket characters. This is used
278-
* to replace dots in keys with brackets for parsing.
281+
* Converts a dot notation key to bracket notation at the top level.
282+
*
283+
* @param s The string to convert, which may contain dot notation.
284+
* @return The converted string with brackets replacing dots at the top level.
279285
*/
280-
private val DOT_TO_BRACKET = Regex("""\.([^.\[]+)""")
286+
private fun dotToBracketTopLevel(s: String): String {
287+
val sb = StringBuilder(s.length)
288+
var depth = 0
289+
var i = 0
290+
while (i < s.length) {
291+
val ch = s[i]
292+
when (ch) {
293+
'[' -> {
294+
depth++
295+
sb.append(ch)
296+
i++
297+
}
298+
']' -> {
299+
if (depth > 0) depth--
300+
sb.append(ch)
301+
i++
302+
}
303+
'.' -> {
304+
if (depth == 0) {
305+
// collect the next segment name (stop at '.' or '[')
306+
val start = ++i
307+
var j = start
308+
while (j < s.length && s[j] != '.' && s[j] != '[') j++
309+
if (j > start) {
310+
sb.append('[').append(s, start, j).append(']')
311+
i = j
312+
} else {
313+
sb.append('.') // nothing to convert
314+
}
315+
} else {
316+
sb.append('.')
317+
i++
318+
}
319+
}
320+
else -> {
321+
sb.append(ch)
322+
i++
323+
}
324+
}
325+
}
326+
return sb.toString()
327+
}
281328

282329
/**
283330
* Splits a key into segments based on brackets and dots, handling depth and strictness.
@@ -295,17 +342,15 @@ internal object Decoder {
295342
maxDepth: Int,
296343
strictDepth: Boolean,
297344
): List<String> {
298-
// Apply dot→bracket *before* splitting, but when depth == 0, we do NOT split at all and do
299-
// NOT throw.
300-
val key: String =
301-
if (allowDots) originalKey.replace(DOT_TO_BRACKET) { "[${it.groupValues[1]}]" }
302-
else originalKey
303-
304345
// Depth 0 semantics: use the original key as a single segment; never throw.
305346
if (maxDepth <= 0) {
306-
return listOf(key)
347+
return listOf(originalKey)
307348
}
308349

350+
// Apply dot→bracket *before* splitting, but when depth == 0, we do NOT split at all and do
351+
// NOT throw.
352+
val key: String = if (allowDots) dotToBracketTopLevel(originalKey) else originalKey
353+
309354
val segments = ArrayList<String>(key.count { it == '[' } + 1)
310355

311356
val first = key.indexOf('[')

qs-kotlin/src/main/kotlin/io/github/techouse/qskotlin/models/DecodeOptions.kt

Lines changed: 155 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
11
package io.github.techouse.qskotlin.models
22

3+
import io.github.techouse.qskotlin.enums.DecodeKind
34
import io.github.techouse.qskotlin.enums.Duplicates
45
import io.github.techouse.qskotlin.internal.Utils
56
import java.nio.charset.Charset
67
import java.nio.charset.StandardCharsets
78

8-
/**
9-
* A function that decodes a value from a query string or form data. It takes a value and an
10-
* optional charset, returning the decoded value.
11-
*
12-
* @param value The encoded value to decode.
13-
* @param charset The character set to use for decoding, if any.
14-
* @return The decoded value, or null if the value is not present.
15-
*/
16-
typealias Decoder = (value: String?, charset: Charset?) -> Any?
9+
/** Unified scalar decoder. Implementations may ignore `charset` and/or `kind`. */
10+
fun interface Decoder {
11+
fun decode(value: String?, charset: Charset?, kind: DecodeKind?): Any?
12+
}
13+
14+
/** Back‑compat adapter for `(value, charset) -> Any?` decoders. */
15+
@Deprecated(
16+
message =
17+
"Use Decoder fun interface; wrap your two‑arg lambda: Decoder { v, c, _ -> legacy(v, c) }",
18+
replaceWith = ReplaceWith("Decoder { value, charset, _ -> legacyDecoder(value, charset) }"),
19+
level = DeprecationLevel.WARNING,
20+
)
21+
typealias LegacyDecoder = (String?, Charset?) -> Any?
1722

1823
/** Options that configure the output of Qs.decode. */
1924
data class DecodeOptions(
@@ -22,6 +27,13 @@ data class DecodeOptions(
2227

2328
/** Set a Decoder to affect the decoding of the input. */
2429
private val decoder: Decoder? = null,
30+
@Deprecated(
31+
message = "Use `decoder` fun interface; this will be removed in a future major release",
32+
replaceWith = ReplaceWith("decoder"),
33+
level = DeprecationLevel.WARNING,
34+
)
35+
@Suppress("DEPRECATION")
36+
private val legacyDecoder: LegacyDecoder? = null,
2537

2638
/**
2739
* Set to `true` to decode dots in keys.
@@ -107,8 +119,11 @@ data class DecodeOptions(
107119
val parseLists: Boolean = true,
108120

109121
/**
110-
* Set to `true` to add a layer of protection by throwing an error when the limit is exceeded,
111-
* allowing you to catch and handle such cases.
122+
* Enforce the [depth] limit when parsing nested keys.
123+
*
124+
* When `true`, exceeding [depth] throws an `IndexOutOfBoundsException` during key splitting.
125+
* When `false` (default), any remainder beyond [depth] is treated as a single trailing segment
126+
* (matching the reference `qs` behavior).
112127
*/
113128
val strictDepth: Boolean = false,
114129

@@ -118,11 +133,21 @@ data class DecodeOptions(
118133
/** Set to `true` to throw an error when the limit is exceeded. */
119134
val throwOnLimitExceeded: Boolean = false,
120135
) {
121-
/** The List encoding format to use. */
136+
/**
137+
* Effective `allowDots` value.
138+
*
139+
* Returns `true` when `allowDots == true` **or** when `decodeDotInKeys == true` (since decoding
140+
* dots in keys implies dot‑splitting). Otherwise returns `false`.
141+
*/
122142
val getAllowDots: Boolean
123143
get() = allowDots ?: (decodeDotInKeys == true)
124144

125-
/** The List encoding format to use. */
145+
/**
146+
* Effective `decodeDotInKeys` value.
147+
*
148+
* Defaults to `false` when unspecified. When `true`, encoded dots (`%2E`/`%2e`) inside key
149+
* segments are mapped to `.` **after** splitting, without introducing extra dot‑splits.
150+
*/
126151
val getDecodeDotInKeys: Boolean
127152
get() = decodeDotInKeys ?: false
128153

@@ -131,13 +156,126 @@ data class DecodeOptions(
131156
"Invalid charset"
132157
}
133158
require(parameterLimit > 0) { "Parameter limit must be positive" }
134-
require(!getDecodeDotInKeys || getAllowDots) {
159+
// If decodeDotInKeys is enabled, allowDots must not be explicitly false.
160+
require(!getDecodeDotInKeys || allowDots != false) {
135161
"decodeDotInKeys requires allowDots to be true"
136162
}
137163
}
138164

139-
/** Decode the input using the specified Decoder. */
165+
/**
166+
* Unified scalar decode with key/value context.
167+
*
168+
* Uses the provided [decoder] when set; otherwise falls back to [Utils.decode]. For backward
169+
* compatibility, a [legacyDecoder] `(value, charset)` can be supplied and is adapted
170+
* internally. The [kind] will be [DecodeKind.KEY] for keys (and key segments) and
171+
* [DecodeKind.VALUE] for values.
172+
*/
173+
internal fun decode(
174+
value: String?,
175+
charset: Charset? = null,
176+
kind: DecodeKind = DecodeKind.VALUE,
177+
): Any? {
178+
@Suppress("DEPRECATION")
179+
val d = decoder ?: legacyDecoder?.let { legacy -> Decoder { v, c, _ -> legacy(v, c) } }
180+
return if (d != null) {
181+
d.decode(value, charset, kind) // honor nulls from user decoder
182+
} else {
183+
defaultDecode(value, charset, kind)
184+
}
185+
}
186+
187+
/**
188+
* Default library decode.
189+
*
190+
* For [DecodeKind.KEY], protects encoded dots (`%2E`/`%2e`) **before** percent‑decoding so key
191+
* splitting and post‑split mapping run on the intended tokens.
192+
*/
193+
private fun defaultDecode(value: String?, charset: Charset?, kind: DecodeKind): Any? {
194+
if (value == null) return null
195+
if (kind == DecodeKind.KEY) {
196+
val protected =
197+
protectEncodedDotsForKeys(value, includeOutsideBrackets = (allowDots == true))
198+
return Utils.decode(protected, charset)
199+
}
200+
return Utils.decode(value, charset)
201+
}
202+
203+
/**
204+
* Double‑encode %2E/%2e in KEY strings so the percent‑decoder does not turn them into '.' too
205+
* early.
206+
*
207+
* When [includeOutsideBrackets] is true, occurrences both inside and outside bracket segments
208+
* are protected. Otherwise, only those **inside** `[...]` are protected. Note: only literal
209+
* `[`/`]` affect depth; percent‑encoded brackets (`%5B`/`%5D`) are treated as content, not
210+
* structure.
211+
*/
212+
private fun protectEncodedDotsForKeys(input: String, includeOutsideBrackets: Boolean): String {
213+
val pct = input.indexOf('%')
214+
if (pct < 0) return input
215+
if (input.indexOf("2E", pct) < 0 && input.indexOf("2e", pct) < 0) return input
216+
val n = input.length
217+
val sb = StringBuilder(n + 8)
218+
var depth = 0
219+
var i = 0
220+
while (i < n) {
221+
when (val ch = input[i]) {
222+
'[' -> {
223+
depth++
224+
sb.append(ch)
225+
i++
226+
}
227+
']' -> {
228+
if (depth > 0) depth--
229+
sb.append(ch)
230+
i++
231+
}
232+
'%' -> {
233+
if (
234+
i + 2 < n &&
235+
input[i + 1] == '2' &&
236+
(input[i + 2] == 'E' || input[i + 2] == 'e')
237+
) {
238+
val inside = depth > 0
239+
if (inside || includeOutsideBrackets) {
240+
sb.append("%25").append(if (input[i + 2] == 'E') "2E" else "2e")
241+
} else {
242+
sb.append('%').append('2').append(input[i + 2])
243+
}
244+
i += 3
245+
} else {
246+
sb.append(ch)
247+
i++
248+
}
249+
}
250+
else -> {
251+
sb.append(ch)
252+
i++
253+
}
254+
}
255+
}
256+
return sb.toString()
257+
}
258+
259+
/**
260+
* Back‑compat helper: decode a value without key/value kind context.
261+
*
262+
* Prefer calling [decode] directly (or [decodeKey]/[decodeValue] for explicit context).
263+
*/
264+
@Deprecated(
265+
message =
266+
"Deprecated: use decodeKey/decodeValue (or decode(value, charset, kind)) to honor key/value context. This will be removed in the next major.",
267+
replaceWith = ReplaceWith("decode(value, charset)"),
268+
level = DeprecationLevel.WARNING,
269+
)
270+
@Suppress("unused")
140271
@JvmOverloads
141-
fun getDecoder(value: String?, charset: Charset? = null): Any? =
142-
if (decoder != null) decoder.invoke(value, charset) else Utils.decode(value, charset)
272+
fun getDecoder(value: String?, charset: Charset? = null): Any? = decode(value, charset)
273+
274+
/** Convenience: decode a key to String? */
275+
internal fun decodeKey(value: String?, charset: Charset?): String? =
276+
decode(value, charset, DecodeKind.KEY)?.toString() // keys are always coerced to String
277+
278+
/** Convenience: decode a value */
279+
internal fun decodeValue(value: String?, charset: Charset?): Any? =
280+
decode(value, charset, DecodeKind.VALUE)
143281
}

qs-kotlin/src/main/kotlin/io/github/techouse/qskotlin/models/EncodeOptions.kt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,12 @@ data class EncodeOptions(
5151

5252
/** The List encoding format to use. */
5353
private val listFormat: ListFormat? = null,
54-
@Deprecated("Use listFormat instead") val indices: Boolean? = null,
54+
@Deprecated(
55+
message = "Use listFormat instead",
56+
replaceWith = ReplaceWith("listFormat"),
57+
level = DeprecationLevel.WARNING,
58+
)
59+
val indices: Boolean? = null,
5560

5661
/** Set to `true` to use dot Map notation in the encoded output. */
5762
private val allowDots: Boolean? = null,

0 commit comments

Comments
 (0)