From 5ff268a62d1a51902b891ba54e0becb85e90c5be Mon Sep 17 00:00:00 2001 From: ia0 Date: Fri, 25 Jul 2025 23:18:07 +0200 Subject: [PATCH 1/2] Document maximum input length Fixes #145 --- bin/CHANGELOG.md | 6 ++++ bin/Cargo.toml | 4 +-- lib/CHANGELOG.md | 7 ++++ lib/Cargo.toml | 2 +- lib/fuzz/Cargo.toml | 8 +++++ lib/fuzz/fuzz_targets/impl_decode_len.rs | 1 + lib/fuzz/fuzz_targets/impl_encode_len.rs | 1 + lib/fuzz/run.sh | 8 +++-- lib/fuzz/src/cmd.rs | 8 +++++ lib/macro/Cargo.toml | 6 ++-- lib/macro/internal/Cargo.toml | 4 +-- lib/src/lib.rs | 43 ++++++++++++++++++++++++ 12 files changed, 88 insertions(+), 10 deletions(-) create mode 120000 lib/fuzz/fuzz_targets/impl_decode_len.rs create mode 120000 lib/fuzz/fuzz_targets/impl_encode_len.rs diff --git a/bin/CHANGELOG.md b/bin/CHANGELOG.md index a433734..39ad59c 100644 --- a/bin/CHANGELOG.md +++ b/bin/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## 0.3.8-git + +### Patch + +- Update `data-encoding` version + ## 0.3.7 ### Patch diff --git a/bin/Cargo.toml b/bin/Cargo.toml index b642080..a4ea477 100644 --- a/bin/Cargo.toml +++ b/bin/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "data-encoding-bin" -version = "0.3.7" +version = "0.3.8-git" authors = ["Julien Cretin "] license = "MIT" edition = "2021" @@ -17,5 +17,5 @@ name = "data-encoding" path = "src/main.rs" [dependencies] -data-encoding = { version = "2.9.0", path = "../lib" } +data-encoding = { version = "2.10.0-git", path = "../lib" } getopts = "0.2" diff --git a/lib/CHANGELOG.md b/lib/CHANGELOG.md index 775ff74..f8a02ac 100644 --- a/lib/CHANGELOG.md +++ b/lib/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 2.10.0-git + +### Minor + +- Document maximum input length for `Encoding::{decode,encode}_len()` (fixes #145) +- Add `Encoding::encode_align()` to decide where to split long inputs + ## 2.9.0 ### Minor diff --git a/lib/Cargo.toml b/lib/Cargo.toml index d3827c3..7688a63 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "data-encoding" -version = "2.9.0" +version = "2.10.0-git" authors = ["Julien Cretin "] license = "MIT" edition = "2018" diff --git a/lib/fuzz/Cargo.toml b/lib/fuzz/Cargo.toml index a168956..3f734b7 100644 --- a/lib/fuzz/Cargo.toml +++ b/lib/fuzz/Cargo.toml @@ -21,6 +21,14 @@ libfuzzer-sys = "0.4.3" name = "fuzz_any_spec" path = "fuzz_targets/fuzz_any_spec.rs" +[[bin]] +name = "impl_encode_len" +path = "fuzz_targets/impl_encode_len.rs" + +[[bin]] +name = "impl_decode_len" +path = "fuzz_targets/impl_decode_len.rs" + [[bin]] name = "impl_encode" path = "fuzz_targets/impl_encode.rs" diff --git a/lib/fuzz/fuzz_targets/impl_decode_len.rs b/lib/fuzz/fuzz_targets/impl_decode_len.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/impl_decode_len.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/fuzz_targets/impl_encode_len.rs b/lib/fuzz/fuzz_targets/impl_encode_len.rs new file mode 120000 index 0000000..ba589d7 --- /dev/null +++ b/lib/fuzz/fuzz_targets/impl_encode_len.rs @@ -0,0 +1 @@ +template.rs \ No newline at end of file diff --git a/lib/fuzz/run.sh b/lib/fuzz/run.sh index 30ddc4f..86ef2e7 100755 --- a/lib/fuzz/run.sh +++ b/lib/fuzz/run.sh @@ -1,8 +1,12 @@ #!/bin/sh -N="$(cargo fuzz list | wc -l)" +LIST="$*" +[ -n "$LIST" ] || LIST=$(echo $(cargo fuzz list)) +list() { for x in $LIST; do echo $x; done; } + +N="$(list | wc -l)" i=1 -next() { cargo fuzz list | head -n$i | tail -n1; } +next() { list | head -n$i | tail -n1; } while cargo fuzz run "$(next)" -- -max_total_time=600; do i=$(( i % N + 1 )) done diff --git a/lib/fuzz/src/cmd.rs b/lib/fuzz/src/cmd.rs index 8313259..5d99098 100644 --- a/lib/fuzz/src/cmd.rs +++ b/lib/fuzz/src/cmd.rs @@ -40,6 +40,14 @@ pub fn execute(target: &str, mut input: &[u8]) -> Output { let input = gen::rev_spec(&spec); assert_eq!(gen::spec(&mut input.as_slice()).encoding().unwrap(), base); } + "impl_encode_len" => { + let (_, base) = gen_spec_base(&mut input, &mut output); + let _ = base.encode_len(usize::MAX / 512); + } + "impl_decode_len" => { + let (_, base) = gen_spec_base(&mut input, &mut output); + let _ = base.decode_len(usize::MAX / 8); + } "impl_encode" => { let (spec, base) = gen_spec_base(&mut input, &mut output); assert_eq!(base.encode(input), spec::encode(&spec, input)); diff --git a/lib/macro/Cargo.toml b/lib/macro/Cargo.toml index dafd738..bc26d0e 100644 --- a/lib/macro/Cargo.toml +++ b/lib/macro/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "data-encoding-macro" -version = "0.1.18" +version = "0.1.19-git" authors = ["Julien Cretin "] license = "MIT" edition = "2018" @@ -14,5 +14,5 @@ description = "Macros for data-encoding" include = ["Cargo.toml", "LICENSE", "README.md", "src/lib.rs"] [dependencies] -data-encoding = { version = "2.9.0", path = "..", default-features = false } -data-encoding-macro-internal = { version = "0.1.16", path = "internal" } +data-encoding = { version = "2.10.0-git", path = "..", default-features = false } +data-encoding-macro-internal = { version = "0.1.17-git", path = "internal" } diff --git a/lib/macro/internal/Cargo.toml b/lib/macro/internal/Cargo.toml index 864ded6..c3a65eb 100644 --- a/lib/macro/internal/Cargo.toml +++ b/lib/macro/internal/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "data-encoding-macro-internal" -version = "0.1.16" +version = "0.1.17-git" authors = ["Julien Cretin "] license = "MIT" edition = "2018" @@ -14,7 +14,7 @@ include = ["Cargo.toml", "LICENSE", "README.md", "src/lib.rs"] proc-macro = true [dependencies.data-encoding] -version = "2.9.0" +version = "2.10.0-git" path = "../.." default-features = false features = ["alloc"] diff --git a/lib/src/lib.rs b/lib/src/lib.rs index c531730..40c37d4 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -1256,9 +1256,21 @@ impl Encoding { /// /// See [`encode_mut`] for when to use it. /// + /// # Panics + /// + /// Panics if `len` is greater than `usize::MAX / 512`: + /// - `len <= 8_388_607` when `target_pointer_width = "32"` + /// - `len <= 36028_797018_963967` when `target_pointer_width = "64"` + /// + /// If you need to encode an input of length greater than this limit (possibly of infinite + /// length), then you must chunk your input, encode each chunk, and concatenate to obtain the + /// output. The length of each input chunk must be a multiple of [`encode_align`]. + /// + /// [`encode_align`]: struct.Encoding.html#method.encode_align /// [`encode_mut`]: struct.Encoding.html#method.encode_mut #[must_use] pub fn encode_len(&self, len: usize) -> usize { + assert!(len <= usize::MAX / 512); dispatch! { let bit: usize = self.bit(); let pad: Option = self.pad(); @@ -1267,6 +1279,20 @@ impl Encoding { } } + /// Returns the minimum alignment when chunking a long input + /// + /// See [`encode_len`] for context. + /// + /// [`encode_len`]: struct.Encoding.html#method.encode_len + #[must_use] + pub fn encode_align(&self) -> usize { + let bit = self.bit(); + match self.wrap() { + None => enc(bit), + Some((col, _)) => col * bit / 8, + } + } + /// Encodes `input` in `output` /// /// # Panics @@ -1433,6 +1459,22 @@ impl Encoding { /// See [`decode_mut`] for when to use it. In particular, the actual decoded length might be /// smaller if the actual input contains padding or ignored characters. /// + /// # Panics + /// + /// Panics if `len` is greater than `usize::MAX / 8`: + /// - `len <= 536_870_911` when `target_pointer_width = "32"` + /// - `len <= 2_305843_009213_693951` when `target_pointer_width = "64"` + /// + /// If you need to decode an input of length greater than this limit (possibly of infinite + /// length), then you must decode your input chunk by chunk with [`decode_mut`], making sure + /// that you take into account how many bytes have been read from the input and how many bytes + /// have been written to the output: + /// - `Ok(written)` means all bytes have been read and `written` bytes have been written + /// - `Err(DecodePartial { error, .. })` means an error occurred if `error.kind != + /// DecodeKind::Length` or this was the last input chunk + /// - `Err(DecodePartial { read, written, .. })` means that `read` bytes have been read and + /// `written` bytes written (the error can be ignored) + /// /// # Errors /// /// Returns an error if `len` is invalid. The error kind is [`Length`] and the [position] is the @@ -1442,6 +1484,7 @@ impl Encoding { /// [`Length`]: enum.DecodeKind.html#variant.Length /// [position]: struct.DecodeError.html#structfield.position pub fn decode_len(&self, len: usize) -> Result { + assert!(len <= usize::MAX / 8); let (ilen, olen) = dispatch! { let bit: usize = self.bit(); let pad: bool = self.pad().is_some(); From 94583f8cc9743021b546669c41945dd67644f77f Mon Sep 17 00:00:00 2001 From: ia0 Date: Thu, 31 Jul 2025 21:03:56 +0200 Subject: [PATCH 2/2] Improve panic wording regarding what is guaranteed --- lib/src/lib.rs | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 40c37d4..ccef77a 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -1258,7 +1258,7 @@ impl Encoding { /// /// # Panics /// - /// Panics if `len` is greater than `usize::MAX / 512`: + /// May panic if `len` is greater than `usize::MAX / 512`: /// - `len <= 8_388_607` when `target_pointer_width = "32"` /// - `len <= 36028_797018_963967` when `target_pointer_width = "64"` /// @@ -1266,6 +1266,11 @@ impl Encoding { /// length), then you must chunk your input, encode each chunk, and concatenate to obtain the /// output. The length of each input chunk must be a multiple of [`encode_align`]. /// + /// Note that this function only _may_ panic in those cases. The function may also return the + /// correct value in some cases depending on the implementation. In other words, those limits + /// are the guarantee below which the function will not panic, and not the guarantee above which + /// the function will panic. + /// /// [`encode_align`]: struct.Encoding.html#method.encode_align /// [`encode_mut`]: struct.Encoding.html#method.encode_mut #[must_use] @@ -1461,7 +1466,7 @@ impl Encoding { /// /// # Panics /// - /// Panics if `len` is greater than `usize::MAX / 8`: + /// May panic if `len` is greater than `usize::MAX / 8`: /// - `len <= 536_870_911` when `target_pointer_width = "32"` /// - `len <= 2_305843_009213_693951` when `target_pointer_width = "64"` /// @@ -1475,6 +1480,11 @@ impl Encoding { /// - `Err(DecodePartial { read, written, .. })` means that `read` bytes have been read and /// `written` bytes written (the error can be ignored) /// + /// Note that this function only _may_ panic in those cases. The function may also return the + /// correct value in some cases depending on the implementation. In other words, those limits + /// are the guarantee below which the function will not panic, and not the guarantee above which + /// the function will panic. + /// /// # Errors /// /// Returns an error if `len` is invalid. The error kind is [`Length`] and the [position] is the