From 5ff268a62d1a51902b891ba54e0becb85e90c5be Mon Sep 17 00:00:00 2001
From: ia0 <git@ia0.eu>
Date: Fri, 25 Jul 2025 23:18:07 +0200
Subject: [PATCH 1/2] Document maximum input length

Fixes #145
---
 bin/CHANGELOG.md                         |  6 ++++
 bin/Cargo.toml                           |  4 +--
 lib/CHANGELOG.md                         |  7 ++++
 lib/Cargo.toml                           |  2 +-
 lib/fuzz/Cargo.toml                      |  8 +++++
 lib/fuzz/fuzz_targets/impl_decode_len.rs |  1 +
 lib/fuzz/fuzz_targets/impl_encode_len.rs |  1 +
 lib/fuzz/run.sh                          |  8 +++--
 lib/fuzz/src/cmd.rs                      |  8 +++++
 lib/macro/Cargo.toml                     |  6 ++--
 lib/macro/internal/Cargo.toml            |  4 +--
 lib/src/lib.rs                           | 43 ++++++++++++++++++++++++
 12 files changed, 88 insertions(+), 10 deletions(-)
 create mode 120000 lib/fuzz/fuzz_targets/impl_decode_len.rs
 create mode 120000 lib/fuzz/fuzz_targets/impl_encode_len.rs

diff --git a/bin/CHANGELOG.md b/bin/CHANGELOG.md
index a433734..39ad59c 100644
--- a/bin/CHANGELOG.md
+++ b/bin/CHANGELOG.md
@@ -1,5 +1,11 @@
 # Changelog
 
+## 0.3.8-git
+
+### Patch
+
+- Update `data-encoding` version
+
 ## 0.3.7
 
 ### Patch
diff --git a/bin/Cargo.toml b/bin/Cargo.toml
index b642080..a4ea477 100644
--- a/bin/Cargo.toml
+++ b/bin/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "data-encoding-bin"
-version = "0.3.7"
+version = "0.3.8-git"
 authors = ["Julien Cretin <git@ia0.eu>"]
 license = "MIT"
 edition = "2021"
@@ -17,5 +17,5 @@ name = "data-encoding"
 path = "src/main.rs"
 
 [dependencies]
-data-encoding = { version = "2.9.0", path = "../lib" }
+data-encoding = { version = "2.10.0-git", path = "../lib" }
 getopts = "0.2"
diff --git a/lib/CHANGELOG.md b/lib/CHANGELOG.md
index 775ff74..f8a02ac 100644
--- a/lib/CHANGELOG.md
+++ b/lib/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## 2.10.0-git
+
+### Minor
+
+- Document maximum input length for `Encoding::{decode,encode}_len()` (fixes #145)
+- Add `Encoding::encode_align()` to decide where to split long inputs
+
 ## 2.9.0
 
 ### Minor
diff --git a/lib/Cargo.toml b/lib/Cargo.toml
index d3827c3..7688a63 100644
--- a/lib/Cargo.toml
+++ b/lib/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "data-encoding"
-version = "2.9.0"
+version = "2.10.0-git"
 authors = ["Julien Cretin <git@ia0.eu>"]
 license = "MIT"
 edition = "2018"
diff --git a/lib/fuzz/Cargo.toml b/lib/fuzz/Cargo.toml
index a168956..3f734b7 100644
--- a/lib/fuzz/Cargo.toml
+++ b/lib/fuzz/Cargo.toml
@@ -21,6 +21,14 @@ libfuzzer-sys = "0.4.3"
 name = "fuzz_any_spec"
 path = "fuzz_targets/fuzz_any_spec.rs"
 
+[[bin]]
+name = "impl_encode_len"
+path = "fuzz_targets/impl_encode_len.rs"
+
+[[bin]]
+name = "impl_decode_len"
+path = "fuzz_targets/impl_decode_len.rs"
+
 [[bin]]
 name = "impl_encode"
 path = "fuzz_targets/impl_encode.rs"
diff --git a/lib/fuzz/fuzz_targets/impl_decode_len.rs b/lib/fuzz/fuzz_targets/impl_decode_len.rs
new file mode 120000
index 0000000..ba589d7
--- /dev/null
+++ b/lib/fuzz/fuzz_targets/impl_decode_len.rs
@@ -0,0 +1 @@
+template.rs
\ No newline at end of file
diff --git a/lib/fuzz/fuzz_targets/impl_encode_len.rs b/lib/fuzz/fuzz_targets/impl_encode_len.rs
new file mode 120000
index 0000000..ba589d7
--- /dev/null
+++ b/lib/fuzz/fuzz_targets/impl_encode_len.rs
@@ -0,0 +1 @@
+template.rs
\ No newline at end of file
diff --git a/lib/fuzz/run.sh b/lib/fuzz/run.sh
index 30ddc4f..86ef2e7 100755
--- a/lib/fuzz/run.sh
+++ b/lib/fuzz/run.sh
@@ -1,8 +1,12 @@
 #!/bin/sh
 
-N="$(cargo fuzz list | wc -l)"
+LIST="$*"
+[ -n "$LIST" ] || LIST=$(echo $(cargo fuzz list))
+list() { for x in $LIST; do echo $x; done; }
+
+N="$(list | wc -l)"
 i=1
-next() { cargo fuzz list | head -n$i | tail -n1; }
+next() { list | head -n$i | tail -n1; }
 while cargo fuzz run "$(next)" -- -max_total_time=600; do
   i=$(( i % N + 1 ))
 done
diff --git a/lib/fuzz/src/cmd.rs b/lib/fuzz/src/cmd.rs
index 8313259..5d99098 100644
--- a/lib/fuzz/src/cmd.rs
+++ b/lib/fuzz/src/cmd.rs
@@ -40,6 +40,14 @@ pub fn execute(target: &str, mut input: &[u8]) -> Output {
             let input = gen::rev_spec(&spec);
             assert_eq!(gen::spec(&mut input.as_slice()).encoding().unwrap(), base);
         }
+        "impl_encode_len" => {
+            let (_, base) = gen_spec_base(&mut input, &mut output);
+            let _ = base.encode_len(usize::MAX / 512);
+        }
+        "impl_decode_len" => {
+            let (_, base) = gen_spec_base(&mut input, &mut output);
+            let _ = base.decode_len(usize::MAX / 8);
+        }
         "impl_encode" => {
             let (spec, base) = gen_spec_base(&mut input, &mut output);
             assert_eq!(base.encode(input), spec::encode(&spec, input));
diff --git a/lib/macro/Cargo.toml b/lib/macro/Cargo.toml
index dafd738..bc26d0e 100644
--- a/lib/macro/Cargo.toml
+++ b/lib/macro/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "data-encoding-macro"
-version = "0.1.18"
+version = "0.1.19-git"
 authors = ["Julien Cretin <cretin@google.com>"]
 license = "MIT"
 edition = "2018"
@@ -14,5 +14,5 @@ description = "Macros for data-encoding"
 include = ["Cargo.toml", "LICENSE", "README.md", "src/lib.rs"]
 
 [dependencies]
-data-encoding = { version = "2.9.0", path = "..", default-features = false }
-data-encoding-macro-internal = { version = "0.1.16", path = "internal" }
+data-encoding = { version = "2.10.0-git", path = "..", default-features = false }
+data-encoding-macro-internal = { version = "0.1.17-git", path = "internal" }
diff --git a/lib/macro/internal/Cargo.toml b/lib/macro/internal/Cargo.toml
index 864ded6..c3a65eb 100644
--- a/lib/macro/internal/Cargo.toml
+++ b/lib/macro/internal/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "data-encoding-macro-internal"
-version = "0.1.16"
+version = "0.1.17-git"
 authors = ["Julien Cretin <cretin@google.com>"]
 license = "MIT"
 edition = "2018"
@@ -14,7 +14,7 @@ include = ["Cargo.toml", "LICENSE", "README.md", "src/lib.rs"]
 proc-macro = true
 
 [dependencies.data-encoding]
-version = "2.9.0"
+version = "2.10.0-git"
 path = "../.."
 default-features = false
 features = ["alloc"]
diff --git a/lib/src/lib.rs b/lib/src/lib.rs
index c531730..40c37d4 100644
--- a/lib/src/lib.rs
+++ b/lib/src/lib.rs
@@ -1256,9 +1256,21 @@ impl Encoding {
     ///
     /// See [`encode_mut`] for when to use it.
     ///
+    /// # Panics
+    ///
+    /// Panics if `len` is greater than `usize::MAX / 512`:
+    /// - `len <= 8_388_607` when `target_pointer_width = "32"`
+    /// - `len <= 36028_797018_963967` when `target_pointer_width = "64"`
+    ///
+    /// If you need to encode an input of length greater than this limit (possibly of infinite
+    /// length), then you must chunk your input, encode each chunk, and concatenate to obtain the
+    /// output. The length of each input chunk must be a multiple of [`encode_align`].
+    ///
+    /// [`encode_align`]: struct.Encoding.html#method.encode_align
     /// [`encode_mut`]: struct.Encoding.html#method.encode_mut
     #[must_use]
     pub fn encode_len(&self, len: usize) -> usize {
+        assert!(len <= usize::MAX / 512);
         dispatch! {
             let bit: usize = self.bit();
             let pad: Option<u8> = self.pad();
@@ -1267,6 +1279,20 @@ impl Encoding {
         }
     }
 
+    /// Returns the minimum alignment when chunking a long input
+    ///
+    /// See [`encode_len`] for context.
+    ///
+    /// [`encode_len`]: struct.Encoding.html#method.encode_len
+    #[must_use]
+    pub fn encode_align(&self) -> usize {
+        let bit = self.bit();
+        match self.wrap() {
+            None => enc(bit),
+            Some((col, _)) => col * bit / 8,
+        }
+    }
+
     /// Encodes `input` in `output`
     ///
     /// # Panics
@@ -1433,6 +1459,22 @@ impl Encoding {
     /// See [`decode_mut`] for when to use it. In particular, the actual decoded length might be
     /// smaller if the actual input contains padding or ignored characters.
     ///
+    /// # Panics
+    ///
+    /// Panics if `len` is greater than `usize::MAX / 8`:
+    /// - `len <= 536_870_911` when `target_pointer_width = "32"`
+    /// - `len <= 2_305843_009213_693951` when `target_pointer_width = "64"`
+    ///
+    /// If you need to decode an input of length greater than this limit (possibly of infinite
+    /// length), then you must decode your input chunk by chunk with [`decode_mut`], making sure
+    /// that you take into account how many bytes have been read from the input and how many bytes
+    /// have been written to the output:
+    /// - `Ok(written)` means all bytes have been read and `written` bytes have been written
+    /// - `Err(DecodePartial { error, .. })` means an error occurred if `error.kind !=
+    ///   DecodeKind::Length` or this was the last input chunk
+    /// - `Err(DecodePartial { read, written, .. })` means that `read` bytes have been read and
+    ///   `written` bytes written (the error can be ignored)
+    ///
     /// # Errors
     ///
     /// Returns an error if `len` is invalid. The error kind is [`Length`] and the [position] is the
@@ -1442,6 +1484,7 @@ impl Encoding {
     /// [`Length`]: enum.DecodeKind.html#variant.Length
     /// [position]: struct.DecodeError.html#structfield.position
     pub fn decode_len(&self, len: usize) -> Result<usize, DecodeError> {
+        assert!(len <= usize::MAX / 8);
         let (ilen, olen) = dispatch! {
             let bit: usize = self.bit();
             let pad: bool = self.pad().is_some();

From 94583f8cc9743021b546669c41945dd67644f77f Mon Sep 17 00:00:00 2001
From: ia0 <git@ia0.eu>
Date: Thu, 31 Jul 2025 21:03:56 +0200
Subject: [PATCH 2/2] Improve panic wording regarding what is guaranteed

---
 lib/src/lib.rs | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/lib/src/lib.rs b/lib/src/lib.rs
index 40c37d4..ccef77a 100644
--- a/lib/src/lib.rs
+++ b/lib/src/lib.rs
@@ -1258,7 +1258,7 @@ impl Encoding {
     ///
     /// # Panics
     ///
-    /// Panics if `len` is greater than `usize::MAX / 512`:
+    /// May panic if `len` is greater than `usize::MAX / 512`:
     /// - `len <= 8_388_607` when `target_pointer_width = "32"`
     /// - `len <= 36028_797018_963967` when `target_pointer_width = "64"`
     ///
@@ -1266,6 +1266,11 @@ impl Encoding {
     /// length), then you must chunk your input, encode each chunk, and concatenate to obtain the
     /// output. The length of each input chunk must be a multiple of [`encode_align`].
     ///
+    /// Note that this function only _may_ panic in those cases. The function may also return the
+    /// correct value in some cases depending on the implementation. In other words, those limits
+    /// are the guarantee below which the function will not panic, and not the guarantee above which
+    /// the function will panic.
+    ///
     /// [`encode_align`]: struct.Encoding.html#method.encode_align
     /// [`encode_mut`]: struct.Encoding.html#method.encode_mut
     #[must_use]
@@ -1461,7 +1466,7 @@ impl Encoding {
     ///
     /// # Panics
     ///
-    /// Panics if `len` is greater than `usize::MAX / 8`:
+    /// May panic if `len` is greater than `usize::MAX / 8`:
     /// - `len <= 536_870_911` when `target_pointer_width = "32"`
     /// - `len <= 2_305843_009213_693951` when `target_pointer_width = "64"`
     ///
@@ -1475,6 +1480,11 @@ impl Encoding {
     /// - `Err(DecodePartial { read, written, .. })` means that `read` bytes have been read and
     ///   `written` bytes written (the error can be ignored)
     ///
+    /// Note that this function only _may_ panic in those cases. The function may also return the
+    /// correct value in some cases depending on the implementation. In other words, those limits
+    /// are the guarantee below which the function will not panic, and not the guarantee above which
+    /// the function will panic.
+    ///
     /// # Errors
     ///
     /// Returns an error if `len` is invalid. The error kind is [`Length`] and the [position] is the