Skip to content

Commit 69b6c74

Browse files
author
Adrian Tosca
committed
feat(env): underscore field nesting for single _ separators
Allow to nest with single `_` even when field names contain underscores via `.underscore_nesting(true)``. Can be used when field names also have `_` with environment overrides. Can be used as an alternative for double underscore separator '__' for nesting. Supports deep nesting. Is off by default.
1 parent bf6e256 commit 69b6c74

File tree

2 files changed

+240
-18
lines changed

2 files changed

+240
-18
lines changed

src/env.rs

Lines changed: 129 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
use std::env;
2+
use std::collections::BTreeSet;
23
use std::ffi::OsString;
34

45
#[cfg(feature = "convert-case")]
@@ -35,6 +36,12 @@ pub struct Environment {
3536
/// Optional character sequence that separates each key segment in an environment key pattern.
3637
/// Consider a nested configuration such as `redis.password`, a separator of `_` would allow
3738
/// an environment key of `REDIS_PASSWORD` to match.
39+
/// When using `_` as separator, and field names contain underscores,
40+
/// there are some different strategies for resolving the ambiguity, for example:
41+
/// 1. Use double underscores as separator to denote nesting with `__`,
42+
/// e.g. `PREFIX__INNER_CONFIG__ANOTHER_MULTIPART_NAME`
43+
/// 2. Use a single underscore as separator and enable underscore nesting with
44+
/// [`underscore_nesting`](Environment::underscore_nesting())
3845
separator: Option<String>,
3946

4047
/// Optional directive to translate collected keys into a form that matches what serializers
@@ -43,10 +50,14 @@ pub struct Environment {
4350
#[cfg(feature = "convert-case")]
4451
convert_case: Option<Case>,
4552

46-
/// Optional character sequence that separates each env value into a vector. only works when `try_parsing` is set to true
47-
/// Once set, you cannot have type String on the same environment, unless you set `list_parse_keys`.
53+
/// Optional character sequence that separates each env value into a vector.
54+
/// Only works when `try_parsing` is set to true.
55+
/// Once set, you cannot have type String on the same environment,
56+
/// unless you set `list_parse_keys`.
4857
list_separator: Option<String>,
49-
/// A list of keys which should always be parsed as a list. If not set you can have only `Vec<String>` or `String` (not both) in one environment.
58+
59+
/// A list of keys which should always be parsed as a list.
60+
/// If not set you can have only `Vec<String>` or `String` (not both) in one environment.
5061
list_parse_keys: Option<Vec<String>>,
5162

5263
/// Ignore empty env values (treat as unset).
@@ -58,6 +69,13 @@ pub struct Environment {
5869
// Preserve the prefix while parsing
5970
keep_prefix: bool,
6071

72+
/// When enabled in combination with `separator("_")`, environment keys with underscores
73+
/// will be interpreted with all possible underscore groupings as nested segments. This allows
74+
/// single-underscore separation to coexist with field names that themselves contain underscores.
75+
/// For example, `PREFIX_INNER_CONFIG_ANOTHER_MULTIPART_NAME` can match
76+
/// `inner_config.another_multipart_name`.
77+
underscore_nesting: bool,
78+
6179
/// Alternate source for the environment. This can be used when you want to test your own code
6280
/// using this source, without the need to change the actual system environment variables.
6381
///
@@ -154,8 +172,24 @@ impl Environment {
154172
}
155173

156174
/// Add a key which should be parsed as a list when collecting [`Value`]s from the environment.
157-
/// Once `list_separator` is set, the type for string is [`Vec<String>`].
158-
/// To switch the default type back to type Strings you need to provide the keys which should be [`Vec<String>`] using this function.
175+
/// Once `list_separator` is set, the type for any string is [`Vec<String>`]
176+
/// unless `list_parse_keys` is set.
177+
/// If you want to use [`Vec<String>`] in combination with [`String`] you need to provide
178+
/// the keys which should be [`Vec<String>`] using this function.
179+
/// All other keys will remain [`String`] when using `list_separator` with `list_parse_keys`.
180+
/// Example:
181+
/// ```rust
182+
/// # use config::Environment;
183+
/// # use serde::Deserialize;
184+
/// #[derive(Clone, Debug, Deserialize)]
185+
/// struct MyConfig {
186+
/// pub my_string: String, // will be parsed as String
187+
/// pub my_list: Vec<String>, // will be parsed as Vec<String>
188+
/// }
189+
/// let source = Environment::default()
190+
/// .list_separator(",")
191+
/// .with_list_parse_key("my_list");
192+
/// ```
159193
pub fn with_list_parse_key(mut self, key: &str) -> Self {
160194
let keys = self.list_parse_keys.get_or_insert_with(Vec::new);
161195
keys.push(key.into());
@@ -181,6 +215,88 @@ impl Environment {
181215
self
182216
}
183217

218+
/// Enable alternative underscore-based nesting when `separator("_")` is used.
219+
///
220+
/// When enabled, each environment key (after prefix removal) is split on `_` and all
221+
/// groupings of tokens are generated into dotted keys by joining grouped tokens with `_`
222+
/// (preserving underscores within field names) and groups with `.` (denoting nesting).
223+
/// This makes it possible to use a single underscore both as a nesting separator and as
224+
/// part of field names.
225+
///
226+
/// Note: The number of key variants grows as 2^(n-1) for n underscore-separated tokens
227+
/// in a key. Typical env keys are short; however, consider leaving this disabled for
228+
/// very long keys if performance is a concern and use double an underscore strategy
229+
/// for nesting.
230+
pub fn underscore_nesting(mut self, enable: bool) -> Self {
231+
self.underscore_nesting = enable;
232+
self
233+
}
234+
235+
// Generate all candidate key variants for a given base (lowercased, post-prefix) env key.
236+
// Returns the complete set of dotted key variants and the primary variant (separator replaced
237+
// by `.` and case-converted if enabled) which should be used for list parsing decisions.
238+
fn generate_key_variants(&self, base_key: &str, separator: &str) -> (BTreeSet<String>, String) {
239+
// Primary variant: separator replaced with '.'
240+
let mut primary_key = if !separator.is_empty() {
241+
base_key.replace(separator, ".")
242+
} else {
243+
base_key.to_owned()
244+
};
245+
246+
// Generate variants. When underscore_nesting is enabled with "_" separator,
247+
// generate all possible ways to group tokens (preserving underscores within field names).
248+
let mut variants_vec: Vec<String> = if separator == "_" && self.underscore_nesting {
249+
let tokens: Vec<&str> = base_key.split('_').filter(|s| !s.is_empty()).collect();
250+
251+
if tokens.is_empty() {
252+
vec![primary_key.clone()]
253+
} else {
254+
// Generate all 2^(n-1) ways to partition n tokens.
255+
// Each bit position represents whether to split after that token.
256+
let num_partitions = 1usize << tokens.len().saturating_sub(1);
257+
let mut variants = Vec::with_capacity(num_partitions + 1);
258+
259+
for partition in 0..num_partitions {
260+
let mut groups = Vec::new();
261+
let mut current_group = vec![tokens[0]];
262+
263+
for i in 1..tokens.len() {
264+
if (partition >> (i - 1)) & 1 == 1 {
265+
// Split here: join current group and start a new one
266+
groups.push(current_group.join("_"));
267+
current_group = vec![tokens[i]];
268+
} else {
269+
// Continue current group
270+
current_group.push(tokens[i]);
271+
}
272+
}
273+
// Add the final group
274+
groups.push(current_group.join("_"));
275+
variants.push(groups.join("."));
276+
}
277+
278+
variants.push(primary_key.clone());
279+
variants
280+
}
281+
} else {
282+
vec![primary_key.clone()]
283+
};
284+
285+
// Apply convert_case to all variants and primary if requested
286+
#[cfg(feature = "convert-case")]
287+
if let Some(convert_case) = &self.convert_case {
288+
for variant in &mut variants_vec {
289+
*variant = variant.to_case(*convert_case);
290+
}
291+
primary_key = primary_key.to_case(*convert_case);
292+
}
293+
294+
// Build the final set, deduplicating in the process
295+
let variants: BTreeSet<String> = variants_vec.into_iter().collect();
296+
297+
(variants, primary_key)
298+
}
299+
184300
/// Alternate source for the environment. This can be used when you want to test your own code
185301
/// using this source, without the need to change the actual system environment variables.
186302
///
@@ -231,8 +347,6 @@ impl Source for Environment {
231347
let uri: String = "the environment".into();
232348

233349
let separator = self.separator.as_deref().unwrap_or("");
234-
#[cfg(feature = "convert-case")]
235-
let convert_case = &self.convert_case;
236350
let prefix_separator = match (self.prefix_separator.as_deref(), self.separator.as_deref()) {
237351
(Some(pre), _) => pre,
238352
(None, Some(sep)) => sep,
@@ -280,16 +394,11 @@ impl Source for Environment {
280394
))
281395
})?;
282396

283-
// If separator is given replace with `.`
284-
if !separator.is_empty() {
285-
key = key.replace(separator, ".");
286-
}
287-
288-
#[cfg(feature = "convert-case")]
289-
if let Some(convert_case) = convert_case {
290-
key = key.to_case(*convert_case);
291-
}
397+
// Prepare key variants using helper
398+
let base_key = key.clone();
399+
let (variants, primary_key) = self.generate_key_variants(&base_key, separator);
292400

401+
// Use the primary, possibly case-converted, key for list parsing decisions
293402
let value = if self.try_parsing {
294403
// convert to lowercase because bool parsing expects all lowercase
295404
if let Ok(parsed) = value.to_lowercase().parse::<bool>() {
@@ -300,7 +409,7 @@ impl Source for Environment {
300409
ValueKind::Float(parsed)
301410
} else if let Some(separator) = &self.list_separator {
302411
if let Some(keys) = &self.list_parse_keys {
303-
if keys.contains(&key) {
412+
if keys.contains(&primary_key) {
304413
let v: Vec<Value> = value
305414
.split(separator)
306415
.map(|s| Value::new(Some(&uri), ValueKind::String(s.to_owned())))
@@ -323,7 +432,9 @@ impl Source for Environment {
323432
ValueKind::String(value)
324433
};
325434

326-
m.insert(key, Value::new(Some(&uri), value));
435+
for k in variants.into_iter() {
436+
m.insert(k, Value::new(Some(&uri), value.clone()));
437+
}
327438

328439
Ok(())
329440
};

tests/testsuite/env.rs

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,117 @@ fn test_parse_string_and_list_ignore_list_parse_key_case() {
518518
);
519519
}
520520

521+
#[test]
522+
fn test_parse_nested_double_separator() {
523+
#[derive(Deserialize, Debug)]
524+
struct TestConfig {
525+
single: String,
526+
plain: SimpleInner,
527+
value_with_multipart_name: String,
528+
inner_config: ComplexInner,
529+
}
530+
531+
#[derive(Deserialize, Debug)]
532+
struct SimpleInner {
533+
val: String,
534+
}
535+
536+
#[derive(Deserialize, Debug)]
537+
struct ComplexInner {
538+
another_multipart_name: String,
539+
}
540+
541+
temp_env::with_vars(
542+
vec![
543+
("PREFIX__SINGLE", Some("test")),
544+
("PREFIX__PLAIN__VAL", Some("simple")),
545+
("PREFIX__VALUE_WITH_MULTIPART_NAME", Some("value1")),
546+
(
547+
"PREFIX__INNER_CONFIG__ANOTHER_MULTIPART_NAME",
548+
Some("value2"),
549+
),
550+
],
551+
|| {
552+
let environment = Environment::default()
553+
.prefix("PREFIX")
554+
.separator("__");
555+
556+
let config = Config::builder().add_source(environment).build().unwrap();
557+
558+
// println!("{config:#?}");
559+
560+
let config: TestConfig = config.try_deserialize().unwrap();
561+
562+
assert_eq!(config.single, "test");
563+
assert_eq!(config.plain.val, "simple");
564+
assert_eq!(config.value_with_multipart_name, "value1");
565+
assert_eq!(config.inner_config.another_multipart_name, "value2");
566+
},
567+
);
568+
}
569+
570+
#[test]
571+
fn test_parse_nested_single_separator() {
572+
#[derive(Deserialize, Debug)]
573+
struct TestConfig {
574+
single: String,
575+
plain: SimpleInner,
576+
value_with_multipart_name: String,
577+
inner_config: ComplexInner,
578+
}
579+
580+
#[derive(Deserialize, Debug)]
581+
struct SimpleInner {
582+
val: String,
583+
}
584+
585+
#[derive(Deserialize, Debug)]
586+
struct ComplexInner {
587+
another_multipart_value: String, // value vs name, earlier, to test proper sorting of keys
588+
another_multipart_name: String,
589+
another_1_multipart_value_2: String, // with numbers
590+
}
591+
592+
temp_env::with_vars(
593+
vec![
594+
("PREFIX_SINGLE", Some("test")),
595+
("PREFIX_PLAIN_VAL", Some("simple")),
596+
("PREFIX_VALUE_WITH_MULTIPART_NAME", Some("value1")),
597+
(
598+
"PREFIX_INNER_CONFIG_ANOTHER_MULTIPART_VALUE",
599+
Some("value2"),
600+
),
601+
(
602+
"PREFIX_INNER_CONFIG_ANOTHER_MULTIPART_NAME",
603+
Some("value3"),
604+
),
605+
(
606+
"PREFIX_INNER_CONFIG_ANOTHER_1_MULTIPART_VALUE_2",
607+
Some("value4"),
608+
),
609+
],
610+
|| {
611+
let environment = Environment::default()
612+
.prefix("PREFIX")
613+
.separator("_")
614+
.underscore_nesting(true);
615+
616+
let config = Config::builder().add_source(environment).build().unwrap();
617+
618+
// println!("{config:#?}");
619+
620+
let config: TestConfig = config.try_deserialize().unwrap();
621+
622+
assert_eq!(config.single, "test");
623+
assert_eq!(config.plain.val, "simple");
624+
assert_eq!(config.value_with_multipart_name, "value1");
625+
assert_eq!(config.inner_config.another_multipart_value, "value2");
626+
assert_eq!(config.inner_config.another_multipart_name, "value3");
627+
assert_eq!(config.inner_config.another_1_multipart_value_2, "value4");
628+
},
629+
);
630+
}
631+
521632
#[test]
522633
#[cfg(feature = "convert-case")]
523634
fn test_parse_nested_kebab() {

0 commit comments

Comments
 (0)