From 75d428bcfaa91f696793a69df0e6cd11d5769d6a Mon Sep 17 00:00:00 2001 From: catskhi Date: Sun, 3 May 2026 23:32:39 -0300 Subject: [PATCH 1/2] feat: add RG (Registro Geral) module for Brazilian identity validation - Implemented RG parsing, validation, formatting, and generation in core and wasm bindings. - Added support for RG check digit computation and symbol removal. - Created tests for RG functionality, including strict and lenient validation cases. - Updated parity generation tool to include RG cases. --- bindings/ffi-c/src/lib.rs | 1 + bindings/ffi-c/src/rg.rs | 212 +++++++++++ bindings/nodejs/src/lib.rs | 1 + bindings/nodejs/src/rg.rs | 102 +++++ bindings/python/src/lib.rs | 2 + bindings/python/src/rg.rs | 100 +++++ bindings/wasm/src/lib.rs | 1 + bindings/wasm/src/rg.rs | 84 +++++ core/src/lib.rs | 1 + core/src/rg.rs | 697 +++++++++++++++++++++++++++++++++++ core/tests/parity.rs | 125 ++++++- tools/parity_gen/src/main.rs | 118 +++++- 12 files changed, 1442 insertions(+), 2 deletions(-) create mode 100644 bindings/ffi-c/src/rg.rs create mode 100644 bindings/nodejs/src/rg.rs create mode 100644 bindings/python/src/rg.rs create mode 100644 bindings/wasm/src/rg.rs create mode 100644 core/src/rg.rs diff --git a/bindings/ffi-c/src/lib.rs b/bindings/ffi-c/src/lib.rs index d799aec..969ee23 100644 --- a/bindings/ffi-c/src/lib.rs +++ b/bindings/ffi-c/src/lib.rs @@ -2,6 +2,7 @@ mod cep; mod cnpj; mod cpf; mod municipio; +mod rg; pub(crate) mod uf; use core::ffi::{CStr, c_char}; diff --git a/bindings/ffi-c/src/rg.rs b/bindings/ffi-c/src/rg.rs new file mode 100644 index 0000000..4847f64 --- /dev/null +++ b/bindings/ffi-c/src/rg.rs @@ -0,0 +1,212 @@ +use core::ffi::c_char; +use core::ptr; + +use stdbr_core::rg::{self, RgError}; + +use crate::uf::StdbrState; +use crate::{cstr_to_str, to_c_string}; + +/// Error codes for RG validation. `STDBR_RG_ERROR_OK` (0) = success. +#[repr(u8)] +pub enum StdbrRgError { + Ok = 0, + InvalidLength = 1, + InvalidCharacter = 2, + InvalidFormat = 3, + InvalidCheckDigit = 4, + UnsupportedUfForGeneration = 5, +} + +impl StdbrRgError { + fn from_core(e: &RgError) -> Self { + match e { + RgError::InvalidLength => Self::InvalidLength, + RgError::InvalidCharacter => Self::InvalidCharacter, + RgError::InvalidFormat => Self::InvalidFormat, + RgError::InvalidCheckDigit => Self::InvalidCheckDigit, + RgError::UnsupportedUfForGeneration => Self::UnsupportedUfForGeneration, + } + } +} + +pub struct StdbrRg(rg::Rg); + +/// Parses an RG string for the given UF. Returns `NULL` on failure. +/// +/// # Safety +/// `raw` must be a valid null-terminated UTF-8 string. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_parse( + raw: *const c_char, + uf: StdbrState, + err: *mut StdbrRgError, +) -> *mut StdbrRg { + let Some(s) = (unsafe { cstr_to_str(raw) }) else { + if !err.is_null() { + unsafe { *err = StdbrRgError::InvalidLength }; + } + return ptr::null_mut(); + }; + + match rg::parse_strict(s, uf.into_core()) { + Result::Ok(r) => { + if !err.is_null() { + unsafe { *err = StdbrRgError::Ok }; + } + Box::into_raw(Box::new(StdbrRg(r))) + } + Err(e) => { + if !err.is_null() { + unsafe { *err = StdbrRgError::from_core(&e) }; + } + ptr::null_mut() + } + } +} + +/// Generates a random valid RG for the given UF (currently SP only). +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_create_for_uf( + uf: StdbrState, + err: *mut StdbrRgError, +) -> *mut StdbrRg { + match rg::generate_for_uf(uf.into_core()) { + Result::Ok(r) => { + if !err.is_null() { + unsafe { *err = StdbrRgError::Ok }; + } + Box::into_raw(Box::new(StdbrRg(r))) + } + Err(e) => { + if !err.is_null() { + unsafe { *err = StdbrRgError::from_core(&e) }; + } + ptr::null_mut() + } + } +} + +/// Destroys an RG handle. `NULL`-safe. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_destroy(rg: *mut StdbrRg) { + if !rg.is_null() { + unsafe { drop(Box::from_raw(rg)) }; + } +} + +/// Unformatted body. Caller frees with `stdbr_free`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_as_str(rg: *const StdbrRg) -> *mut c_char { + if rg.is_null() { + return ptr::null_mut(); + } + to_c_string(unsafe { &*rg }.0.as_str().into()) +} + +/// Formatted per the UF mask. Caller frees with `stdbr_free`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_formatted(rg: *const StdbrRg) -> *mut c_char { + if rg.is_null() { + return ptr::null_mut(); + } + to_c_string(unsafe { &*rg }.0.formatted()) +} + +/// Returns the issuing UF. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_uf(rg: *const StdbrRg) -> StdbrState { + if rg.is_null() { + return StdbrState::AC; + } + StdbrState::from_core(unsafe { &*rg }.0.uf()) +} + +/// Writes the check digit to `*out`. Returns `true` if a digit exists +/// (SP only). For UFs without a verified algorithm, returns `false`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_check_digit(rg: *const StdbrRg, out: *mut u8) -> bool { + if rg.is_null() || out.is_null() { + return false; + } + match unsafe { &*rg }.0.check_digit() { + Some(d) => { + unsafe { *out = d }; + true + } + None => false, + } +} + +/// Lenient validation strips separators before checking. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_is_valid(raw: *const c_char, uf: StdbrState) -> bool { + let Some(s) = (unsafe { cstr_to_str(raw) }) else { + return false; + }; + rg::is_valid(s, uf.into_core()) +} + +/// Strict validation. Returns a `StdbrRgError` code. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_is_valid_strict( + raw: *const c_char, + uf: StdbrState, +) -> StdbrRgError { + let Some(s) = (unsafe { cstr_to_str(raw) }) else { + return StdbrRgError::InvalidLength; + }; + match rg::is_valid_strict(s, uf.into_core()) { + Result::Ok(()) => StdbrRgError::Ok, + Err(ref e) => StdbrRgError::from_core(e), + } +} + +/// Formats per the UF mask. Returns `NULL` if length is wrong. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_format(raw: *const c_char, uf: StdbrState) -> *mut c_char { + let Some(s) = (unsafe { cstr_to_str(raw) }) else { + return ptr::null_mut(); + }; + rg::format_rg(s, uf.into_core()).map_or(ptr::null_mut(), to_c_string) +} + +/// Strips separators per UF rules. Caller frees with `stdbr_free`. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_remove_symbols( + raw: *const c_char, + uf: StdbrState, +) -> *mut c_char { + let Some(s) = (unsafe { cstr_to_str(raw) }) else { + return ptr::null_mut(); + }; + to_c_string(rg::remove_symbols(s, uf.into_core())) +} + +/// SP-only: compute the check digit. Returns `true` and writes to `*out` +/// (10 = `'X'` terminator). Returns `false` for non-SP or wrong length. +#[unsafe(no_mangle)] +pub unsafe extern "C" fn stdbr_rg_compute_check_digit( + base: *const c_char, + uf: StdbrState, + out: *mut u8, +) -> bool { + if out.is_null() { + return false; + } + let Some(s) = (unsafe { cstr_to_str(base) }) else { + return false; + }; + match rg::compute_check_digit(s, uf.into_core()) { + Some(d) => { + unsafe { *out = d }; + true + } + None => false, + } +} + +/// Random valid SP RG. Caller frees with `stdbr_free`. +#[unsafe(no_mangle)] +pub extern "C" fn stdbr_rg_generate_sp() -> *mut c_char { + to_c_string(rg::generate_sp().as_str().into()) +} diff --git a/bindings/nodejs/src/lib.rs b/bindings/nodejs/src/lib.rs index 71461b2..61e2cc4 100644 --- a/bindings/nodejs/src/lib.rs +++ b/bindings/nodejs/src/lib.rs @@ -6,4 +6,5 @@ mod cep; mod cnpj; mod cpf; mod municipio; +mod rg; pub(crate) mod uf; diff --git a/bindings/nodejs/src/rg.rs b/bindings/nodejs/src/rg.rs new file mode 100644 index 0000000..1534254 --- /dev/null +++ b/bindings/nodejs/src/rg.rs @@ -0,0 +1,102 @@ +use napi::bindgen_prelude::*; +use napi_derive::napi; + +use stdbr_core::rg as core_rg; + +use crate::uf::State; + +fn rg_err(e: &core_rg::RgError) -> Error { + Error::new(Status::InvalidArg, e.to_string()) +} + +#[napi] +pub struct Rg { + inner: core_rg::Rg, +} + +#[napi] +impl Rg { + /// Parse an RG string for the given UF (accepts the canonical mask or + /// unformatted body). + #[napi(factory)] + #[allow(clippy::needless_pass_by_value)] + pub fn parse(raw: String, uf: State) -> Result { + let inner = core_rg::parse_strict(&raw, uf.into()).map_err(|e| rg_err(&e))?; + Ok(Rg { inner }) + } + + /// Generate a random valid RG. Currently SP only; other UFs return an error. + #[napi(factory)] + pub fn generate_for_uf(uf: State) -> Result { + core_rg::generate_for_uf(uf.into()) + .map(|inner| Rg { inner }) + .map_err(|e| rg_err(&e)) + } + + /// Unformatted body (digits, with optional trailing 'X' for SP). + #[napi] + pub fn as_str(&self) -> String { + self.inner.as_str().to_owned() + } + + /// Formatted per the UF mask (or raw body if no mask is defined). + #[napi] + pub fn formatted(&self) -> String { + self.inner.formatted() + } + + /// Issuing UF. + #[napi(getter)] + pub fn uf(&self) -> State { + self.inner.uf().into() + } + + /// Check digit (`Some(0..=9)` for digits, `Some(10)` for SP `'X'`, + /// `null` for UFs without a verified algorithm). + #[napi(getter)] + pub fn check_digit(&self) -> Option { + self.inner.check_digit() + } +} + +/// Lenient RG validation (strips symbols first). +#[napi] +#[allow(clippy::needless_pass_by_value)] +pub fn rg_is_valid(rg: String, uf: State) -> bool { + core_rg::is_valid(&rg, uf.into()) +} + +/// Strict RG validation (canonical mask or unformatted body only). +#[napi] +#[allow(clippy::needless_pass_by_value)] +pub fn rg_is_valid_strict(rg: String, uf: State) -> Result<()> { + core_rg::is_valid_strict(&rg, uf.into()).map_err(|e| rg_err(&e)) +} + +/// Format an RG with the per-UF mask. Returns `null` if length is wrong. +#[napi] +#[allow(clippy::needless_pass_by_value)] +pub fn rg_format(rg: String, uf: State) -> Option { + core_rg::format_rg(&rg, uf.into()) +} + +/// Strip separators. For SP, preserves a trailing 'X'. +#[napi] +#[allow(clippy::needless_pass_by_value)] +pub fn rg_remove_symbols(rg: String, uf: State) -> String { + core_rg::remove_symbols(&rg, uf.into()) +} + +/// SP-only: compute the check digit for an 8-digit base. Returns 10 for the +/// `'X'` terminator. `null` for non-SP UFs or wrong length. +#[napi] +#[allow(clippy::needless_pass_by_value)] +pub fn rg_compute_check_digit(base: String, uf: State) -> Option { + core_rg::compute_check_digit(&base, uf.into()) +} + +/// Generate a random valid SP RG (unformatted body). +#[napi] +pub fn rg_generate_sp() -> String { + core_rg::generate_sp().as_str().to_owned() +} diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index a9713ea..594af86 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -2,6 +2,7 @@ mod cep; mod cnpj; mod cpf; mod municipio; +mod rg; pub(crate) mod uf; use pyo3::prelude::*; @@ -12,5 +13,6 @@ fn stdbr(m: &Bound<'_, PyModule>) -> PyResult<()> { cpf::register(m)?; cnpj::register(m)?; cep::register(m)?; + rg::register(m)?; municipio::register(m) } diff --git a/bindings/python/src/rg.rs b/bindings/python/src/rg.rs new file mode 100644 index 0000000..ab728f1 --- /dev/null +++ b/bindings/python/src/rg.rs @@ -0,0 +1,100 @@ +use pyo3::prelude::*; +use stdbr_core::rg as core_rg; + +use crate::uf::State; + +fn rg_err(e: &core_rg::RgError) -> PyErr { + pyo3::exceptions::PyValueError::new_err(e.to_string()) +} + +#[pyclass] +pub struct Rg { + inner: core_rg::Rg, +} + +#[pymethods] +impl Rg { + /// Parse an RG string for the given UF. + #[staticmethod] + fn parse(raw: &str, uf: State) -> PyResult { + let inner = core_rg::parse_strict(raw, uf.into()).map_err(|e| rg_err(&e))?; + Ok(Self { inner }) + } + + /// Generate a random valid RG. Currently SP only; other UFs raise ValueError. + #[staticmethod] + fn generate_for_uf(uf: State) -> PyResult { + core_rg::generate_for_uf(uf.into()) + .map(|inner| Self { inner }) + .map_err(|e| rg_err(&e)) + } + + /// Unformatted body. + fn as_str(&self) -> &str { + self.inner.as_str() + } + + /// Formatted per the UF mask. + fn formatted(&self) -> String { + self.inner.formatted() + } + + #[getter] + fn uf(&self) -> State { + self.inner.uf().into() + } + + #[getter] + fn check_digit(&self) -> Option { + self.inner.check_digit() + } + + fn __str__(&self) -> String { + self.inner.formatted() + } + + fn __repr__(&self) -> String { + format!("Rg('{}', {})", self.inner.formatted(), self.inner.uf().abbreviation()) + } +} + +#[pyfunction] +fn rg_is_valid(rg: &str, uf: State) -> bool { + core_rg::is_valid(rg, uf.into()) +} + +#[pyfunction] +fn rg_is_valid_strict(rg: &str, uf: State) -> PyResult<()> { + core_rg::is_valid_strict(rg, uf.into()).map_err(|e| rg_err(&e)) +} + +#[pyfunction] +fn rg_format(rg: &str, uf: State) -> Option { + core_rg::format_rg(rg, uf.into()) +} + +#[pyfunction] +fn rg_remove_symbols(rg: &str, uf: State) -> String { + core_rg::remove_symbols(rg, uf.into()) +} + +#[pyfunction] +fn rg_compute_check_digit(base: &str, uf: State) -> Option { + core_rg::compute_check_digit(base, uf.into()) +} + +#[pyfunction] +fn rg_generate_sp() -> String { + core_rg::generate_sp().as_str().to_owned() +} + +pub fn register(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_class::()?; + m.add_function(wrap_pyfunction!(rg_is_valid, m)?)?; + m.add_function(wrap_pyfunction!(rg_is_valid_strict, m)?)?; + m.add_function(wrap_pyfunction!(rg_format, m)?)?; + m.add_function(wrap_pyfunction!(rg_remove_symbols, m)?)?; + m.add_function(wrap_pyfunction!(rg_compute_check_digit, m)?)?; + m.add_function(wrap_pyfunction!(rg_generate_sp, m)?)?; + Ok(()) +} diff --git a/bindings/wasm/src/lib.rs b/bindings/wasm/src/lib.rs index c813dda..87239e5 100644 --- a/bindings/wasm/src/lib.rs +++ b/bindings/wasm/src/lib.rs @@ -2,4 +2,5 @@ mod cep; mod cnpj; mod cpf; mod municipio; +mod rg; pub(crate) mod uf; diff --git a/bindings/wasm/src/rg.rs b/bindings/wasm/src/rg.rs new file mode 100644 index 0000000..1835a56 --- /dev/null +++ b/bindings/wasm/src/rg.rs @@ -0,0 +1,84 @@ +use stdbr_core::rg as core_rg; +use wasm_bindgen::prelude::*; + +use crate::uf::State; + +fn rg_err(e: &core_rg::RgError) -> JsError { + JsError::new(&e.to_string()) +} + +#[wasm_bindgen] +pub struct Rg { + inner: core_rg::Rg, +} + +#[wasm_bindgen] +impl Rg { + /// Parse an RG string for the given UF. + #[wasm_bindgen] + pub fn parse(raw: &str, uf: State) -> Result { + let inner = core_rg::parse_strict(raw, uf.into()).map_err(|e| rg_err(&e))?; + Ok(Self { inner }) + } + + /// Generate a random valid RG. Currently SP only. + #[wasm_bindgen(js_name = "generateForUf")] + pub fn generate_for_uf(uf: State) -> Result { + core_rg::generate_for_uf(uf.into()) + .map(|inner| Self { inner }) + .map_err(|e| rg_err(&e)) + } + + /// Unformatted body. + #[wasm_bindgen(js_name = "asStr")] + pub fn as_str(&self) -> String { + self.inner.as_str().to_owned() + } + + /// Formatted per the UF mask. + #[wasm_bindgen] + pub fn formatted(&self) -> String { + self.inner.formatted() + } + + #[wasm_bindgen(getter)] + pub fn uf(&self) -> State { + self.inner.uf().into() + } + + /// Check digit (`0..=9`, `10` for SP `'X'`, `undefined` otherwise). + #[wasm_bindgen(getter, js_name = "checkDigit")] + pub fn check_digit(&self) -> Option { + self.inner.check_digit() + } +} + +#[wasm_bindgen(js_name = "rgIsValid")] +pub fn rg_is_valid(rg: &str, uf: State) -> bool { + core_rg::is_valid(rg, uf.into()) +} + +#[wasm_bindgen(js_name = "rgIsValidStrict")] +pub fn rg_is_valid_strict(rg: &str, uf: State) -> Result<(), JsError> { + core_rg::is_valid_strict(rg, uf.into()).map_err(|e| rg_err(&e)) +} + +#[wasm_bindgen(js_name = "rgFormat")] +pub fn rg_format(rg: &str, uf: State) -> Option { + core_rg::format_rg(rg, uf.into()) +} + +#[wasm_bindgen(js_name = "rgRemoveSymbols")] +pub fn rg_remove_symbols(rg: &str, uf: State) -> String { + core_rg::remove_symbols(rg, uf.into()) +} + +#[wasm_bindgen(js_name = "rgComputeCheckDigit")] +pub fn rg_compute_check_digit(base: &str, uf: State) -> Option { + core_rg::compute_check_digit(base, uf.into()) +} + +#[wasm_bindgen(js_name = "rgGenerateSp")] +pub fn rg_generate_sp() -> String { + core_rg::generate_sp().as_str().to_owned() +} diff --git a/core/src/lib.rs b/core/src/lib.rs index 84be9e7..4f9df8b 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -13,4 +13,5 @@ pub mod cep; pub mod cnpj; pub mod cpf; pub mod municipio; +pub mod rg; pub mod uf; diff --git a/core/src/rg.rs b/core/src/rg.rs new file mode 100644 index 0000000..67e7641 --- /dev/null +++ b/core/src/rg.rs @@ -0,0 +1,697 @@ +//! RG (Registro Geral) - per-UF identity card validation, formatting and generation. +//! +//! RG is issued independently by each Brazilian state and there is no single +//! national algorithm. Only **SP** has a widely-adopted, documented mod-11 +//! check digit algorithm — implemented here in full. For every other UF this +//! module performs **structural validation only** (length range + digit +//! charset) and treats the input as opaque digits. +//! +//! # SP algorithm +//! +//! 8-digit body `d1..d8`. Weights `2,3,4,5,6,7,8,9` applied left-to-right. +//! `sum = Σ d_i * (i+1)` for `i=0..8`. Check digit = `sum mod 11`; remainder +//! `10` is rendered as the ASCII character `'X'`. Canonical formatted form is +//! `XX.XXX.XXX-X`. +//! +//! # Other UFs +//! +//! `is_valid`/`is_valid_strict` only enforce length (5..=14 digits). Generation +//! returns `RgError::UnsupportedUfForGeneration`. Promote a UF from structural +//! to full validation by extending `uf_spec` once an authoritative algorithm +//! is verified. + +use alloc::string::String; +use alloc::vec::Vec; +use core::fmt; + +use crate::rand::{simple_seed, xorshift64}; +use crate::uf::State; + +const RG_MAX_LEN: usize = 14; +const SP_BODY_LEN: u8 = 9; +const SP_FORMATTED_LEN: u8 = 12; +const SP_BASE_LEN: usize = 8; +const SP_WEIGHTS: [u32; SP_BASE_LEN] = [2, 3, 4, 5, 6, 7, 8, 9]; +const SP_FORMATTED_DIGIT_POS: [usize; 9] = [0, 1, 3, 4, 5, 7, 8, 9, 11]; + +const STRUCTURAL_MIN_LEN: u8 = 5; +const STRUCTURAL_MAX_LEN: u8 = 14; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum RgError { + InvalidLength, + InvalidCharacter, + InvalidFormat, + InvalidCheckDigit, + UnsupportedUfForGeneration, +} + +impl fmt::Display for RgError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(match self { + Self::InvalidLength => "RG length is outside the accepted range for this UF", + Self::InvalidCharacter => "RG contains invalid characters", + Self::InvalidFormat => "RG format does not match the canonical mask for this UF", + Self::InvalidCheckDigit => "RG check digit is invalid", + Self::UnsupportedUfForGeneration => { + "RG generation is not supported for this UF (no verified algorithm)" + } + }) + } +} + +/// Per-UF formatting and validation spec. +#[derive(Clone, Copy)] +struct UfSpec { + body_len: Option, + has_check_digit: bool, + allow_x_terminator: bool, + separators: &'static [(u8, char)], + formatted_len: Option, +} + +const STRUCTURAL_DEFAULT: UfSpec = UfSpec { + body_len: None, + has_check_digit: false, + allow_x_terminator: false, + separators: &[], + formatted_len: None, +}; + +const SP_SPEC: UfSpec = UfSpec { + body_len: Some(SP_BODY_LEN), + has_check_digit: true, + allow_x_terminator: true, + separators: &[(2, '.'), (5, '.'), (8, '-')], + formatted_len: Some(SP_FORMATTED_LEN), +}; + +const fn uf_spec(uf: State) -> UfSpec { + match uf { + State::SP => SP_SPEC, + _ => STRUCTURAL_DEFAULT, + } +} + +/// A validated RG stored as ASCII bytes (digits, plus optional trailing `'X'` +/// for SP), tagged with its issuing UF. +#[derive(Clone, Copy, PartialEq, Eq, Hash)] +pub struct Rg { + bytes: [u8; RG_MAX_LEN], + len: u8, + uf: State, +} + +impl Rg { + /// Unformatted body as `&str` (digits, optionally trailing `'X'`). + pub fn as_str(&self) -> &str { + // SAFETY: constructors guarantee ASCII digits/`X` only. + unsafe { core::str::from_utf8_unchecked(&self.bytes[..self.len as usize]) } + } + + /// Issuing state. + pub fn uf(&self) -> State { + self.uf + } + + /// Formatted per the UF mask. For UFs without a known mask, returns the + /// unformatted body. + pub fn formatted(&self) -> String { + format_with_spec(self.as_str(), uf_spec(self.uf)) + .unwrap_or_else(|| self.as_str().into()) + } + + /// Check digit when the UF has a verified algorithm. `Some(0..=9)` for + /// digits, `Some(10)` for the SP `'X'` terminator, `None` otherwise. + pub fn check_digit(&self) -> Option { + let spec = uf_spec(self.uf); + if !spec.has_check_digit { + return None; + } + let last = self.bytes[self.len as usize - 1]; + if last == b'X' { + Some(10) + } else { + Some(last - b'0') + } + } +} + +impl AsRef for Rg { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl fmt::Display for Rg { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.formatted()) + } +} + +impl fmt::Debug for Rg { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Rg({}, {})", self.uf.abbreviation(), self.formatted()) + } +} + +/// Strip dots, dashes, slashes and whitespace. For SP, preserve a trailing +/// `'X'` (case-insensitive, normalized to uppercase). For other UFs, drop +/// non-digits. +pub fn remove_symbols(rg: &str, uf: State) -> String { + let spec = uf_spec(uf); + let mut out = String::with_capacity(rg.len()); + for c in rg.chars() { + if c.is_ascii_digit() { + out.push(c); + } else if spec.allow_x_terminator && (c == 'X' || c == 'x') { + out.push('X'); + } + } + out +} + +/// Lenient validation - strips symbols, then checks length and (for SP) the +/// check digit. +pub fn is_valid(rg: &str, uf: State) -> bool { + let spec = uf_spec(uf); + let raw = remove_symbols(rg, uf); + if !validate_body_length(&raw, spec) { + return false; + } + if !validate_charset(&raw, spec) { + return false; + } + if spec.has_check_digit { + return sp_check_digit_ok(&raw); + } + true +} + +/// Strict validation - input must be either the canonical formatted mask or +/// the unformatted body. No leading/trailing whitespace, no extra symbols. +pub fn is_valid_strict(rg: &str, uf: State) -> Result<(), RgError> { + parse_strict(rg, uf).map(|_| ()) +} + +/// Insert per-UF separators. Returns `None` if the input length doesn't match +/// the UF body length (or, for variable-length UFs, the structural range). +pub fn format_rg(rg: &str, uf: State) -> Option { + let spec = uf_spec(uf); + let raw = remove_symbols(rg, uf); + if !validate_body_length(&raw, spec) { + return None; + } + Some(format_with_spec(&raw, spec).unwrap_or(raw)) +} + +/// SP-only: compute the check digit for an 8-digit base. Returns `Some(0..=9)` +/// or `Some(10)` (caller renders as `'X'`); `None` for non-SP or wrong length. +pub fn compute_check_digit(base: &str, uf: State) -> Option { + if !matches!(uf, State::SP) { + return None; + } + let raw: Vec = base.bytes().filter(u8::is_ascii_digit).collect(); + if raw.len() != SP_BASE_LEN { + return None; + } + Some(sp_check_digit(&raw)) +} + +/// Parse a raw RG string into a validated [`Rg`]. +pub fn parse_strict(raw: &str, uf: State) -> Result { + let spec = uf_spec(uf); + let bytes = raw.as_bytes(); + + let body = if let Some(len) = spec.formatted_len.filter(|&l| bytes.len() == l as usize) { + let _ = len; + parse_formatted_sp(bytes)? + } else if let Some(body_len) = spec.body_len { + if bytes.len() != body_len as usize { + return Err(RgError::InvalidLength); + } + parse_unformatted(bytes, spec)? + } else { + if bytes.len() < STRUCTURAL_MIN_LEN as usize + || bytes.len() > STRUCTURAL_MAX_LEN as usize + { + return Err(RgError::InvalidLength); + } + parse_unformatted(bytes, spec)? + }; + + if spec.has_check_digit && !sp_check_digit_ok_bytes(&body) { + return Err(RgError::InvalidCheckDigit); + } + + Ok(Rg::from_body(&body, uf)) +} + +/// Generate a random valid RG. SP only; other UFs return +/// `RgError::UnsupportedUfForGeneration`. +pub fn generate_for_uf(uf: State) -> Result { + if !matches!(uf, State::SP) { + return Err(RgError::UnsupportedUfForGeneration); + } + Ok(generate_sp()) +} + +/// Convenience: generate a random valid SP RG. +pub fn generate_sp() -> Rg { + let mut seed = simple_seed(); + let mut digits = [0u8; SP_BASE_LEN]; + for d in &mut digits { + seed = xorshift64(seed); + *d = (seed % 10) as u8; + } + let mut body = [0u8; 9]; + for (i, &d) in digits.iter().enumerate() { + body[i] = d + b'0'; + } + let dv = sp_check_digit(&digits); + body[8] = if dv == 10 { b'X' } else { b'0' + dv }; + Rg::from_body(&body, State::SP) +} + +// ─────────────────────────── helpers ─────────────────────────── + +impl Rg { + fn from_body(body: &[u8], uf: State) -> Self { + let mut bytes = [0u8; RG_MAX_LEN]; + bytes[..body.len()].copy_from_slice(body); + Self { + bytes, + len: body.len() as u8, + uf, + } + } +} + +fn validate_body_length(raw: &str, spec: UfSpec) -> bool { + match spec.body_len { + Some(n) => raw.len() == n as usize, + None => { + let n = raw.len(); + n >= STRUCTURAL_MIN_LEN as usize && n <= STRUCTURAL_MAX_LEN as usize + } + } +} + +fn validate_charset(raw: &str, spec: UfSpec) -> bool { + let bytes = raw.as_bytes(); + if bytes.is_empty() { + return false; + } + if spec.allow_x_terminator { + let (last_idx, rest) = (bytes.len() - 1, &bytes[..bytes.len() - 1]); + if !rest.iter().all(u8::is_ascii_digit) { + return false; + } + let last = bytes[last_idx]; + last.is_ascii_digit() || last == b'X' + } else { + bytes.iter().all(u8::is_ascii_digit) + } +} + +fn parse_unformatted(bytes: &[u8], spec: UfSpec) -> Result, RgError> { + if !validate_charset( + // SAFETY: caller already verified ASCII boundaries via spec body_len/range checks. + unsafe { core::str::from_utf8_unchecked(bytes) }, + spec, + ) { + return Err(RgError::InvalidCharacter); + } + Ok(bytes.to_vec()) +} + +fn parse_formatted_sp(bytes: &[u8]) -> Result, RgError> { + // Format mask: `XX.XXX.XXX-X` — separators at offsets 2, 6, 10. + if bytes[2] != b'.' || bytes[6] != b'.' || bytes[10] != b'-' { + return Err(RgError::InvalidFormat); + } + let mut out = Vec::with_capacity(SP_BODY_LEN as usize); + for (i, &idx) in SP_FORMATTED_DIGIT_POS.iter().enumerate() { + let b = bytes[idx]; + let last = i == SP_FORMATTED_DIGIT_POS.len() - 1; + if b.is_ascii_digit() || (last && b == b'X') { + out.push(b); + } else if last && b == b'x' { + out.push(b'X'); + } else { + return Err(RgError::InvalidCharacter); + } + } + Ok(out) +} + +fn format_with_spec(body: &str, spec: UfSpec) -> Option { + if spec.separators.is_empty() { + return None; + } + let body_len = spec.body_len? as usize; + if body.len() != body_len { + return None; + } + let total = body_len + spec.separators.len(); + let mut out = String::with_capacity(total); + let mut sep_iter = spec.separators.iter().peekable(); + for (i, ch) in body.chars().enumerate() { + while let Some(&&(pos, sep_ch)) = sep_iter.peek() { + if pos as usize == i && i != 0 { + out.push(sep_ch); + sep_iter.next(); + } else { + break; + } + } + out.push(ch); + } + Some(out) +} + +fn sp_check_digit(digits: &[u8]) -> u8 { + let sum: u32 = digits + .iter() + .zip(SP_WEIGHTS.iter()) + .map(|(&d, &w)| u32::from(d) * w) + .sum(); + (sum % 11) as u8 +} + +fn sp_check_digit_ok(body: &str) -> bool { + sp_check_digit_ok_bytes(body.as_bytes()) +} + +fn sp_check_digit_ok_bytes(bytes: &[u8]) -> bool { + if bytes.len() != SP_BODY_LEN as usize { + return false; + } + let mut digits = [0u8; SP_BASE_LEN]; + for (i, &b) in bytes[..SP_BASE_LEN].iter().enumerate() { + if !b.is_ascii_digit() { + return false; + } + digits[i] = b - b'0'; + } + let expected = sp_check_digit(&digits); + let last = bytes[SP_BASE_LEN]; + let actual = if last == b'X' { + 10 + } else if last.is_ascii_digit() { + last - b'0' + } else { + return false; + }; + actual == expected +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::string::ToString; + + fn sp_rg(base: [u8; SP_BASE_LEN]) -> Rg { + let dv = sp_check_digit(&base); + let mut body = [0u8; 9]; + for (i, &d) in base.iter().enumerate() { + body[i] = d + b'0'; + } + body[8] = if dv == 10 { b'X' } else { b'0' + dv }; + Rg::from_body(&body, State::SP) + } + + #[test] + fn sp_check_digit_known_values() { + // 12345678: sum = 2+6+12+20+30+42+56+72 = 240; 240 mod 11 = 9. + assert_eq!(sp_check_digit(&[1, 2, 3, 4, 5, 6, 7, 8]), 9); + // 44444444: sum = 4*44 = 176; 176 mod 11 = 0. + assert_eq!(sp_check_digit(&[4, 4, 4, 4, 4, 4, 4, 4]), 0); + // 11111111: sum = 1*44 = 44; 44 mod 11 = 0. + assert_eq!(sp_check_digit(&[1, 1, 1, 1, 1, 1, 1, 1]), 0); + // Find an example whose DV = 10 (rendered 'X'). + // 00000005: sum = 9*5 = 45; 45 mod 11 = 1. Not 10. + // Search a small example: 0,0,0,0,0,0,0,X → need sum mod 11 = 10. + // 9*8 = 72; 72 mod 11 = 6. 9*9 = 81; 81 mod 11 = 4. + // Try 1,0,0,0,0,0,0,1: 2 + 9 = 11; mod 11 = 0. + // Try 1,0,0,0,0,0,0,0: 2; mod 11 = 2. + // Try 0,0,0,0,0,0,0,9: 81 mod 11 = 4. + // Try 5,0,0,0,0,0,0,0: 10; mod 11 = 10. ✓ + assert_eq!(sp_check_digit(&[5, 0, 0, 0, 0, 0, 0, 0]), 10); + } + + #[test] + fn is_valid_sp_accepts_valid() { + let rg = sp_rg([1, 2, 3, 4, 5, 6, 7, 8]); + assert!(is_valid(rg.as_str(), State::SP)); + assert!(is_valid(&rg.to_string(), State::SP)); + } + + #[test] + fn is_valid_sp_rejects_wrong_dv() { + // 12345678 should give DV=9. Use 8 instead. + assert!(!is_valid("123456788", State::SP)); + assert!(!is_valid("12.345.678-8", State::SP)); + } + + #[test] + fn is_valid_sp_x_terminator() { + let rg = sp_rg([5, 0, 0, 0, 0, 0, 0, 0]); + assert_eq!(rg.as_str(), "50000000X"); + assert!(is_valid(rg.as_str(), State::SP)); + assert!(is_valid("50.000.000-X", State::SP)); + assert!(is_valid("50.000.000-x", State::SP)); + } + + #[test] + fn is_valid_sp_lenient_strips_garbage() { + let rg = sp_rg([1, 2, 3, 4, 5, 6, 7, 8]); + let s = rg.as_str(); + let garbage = alloc::format!("{}!{}@{}#{}", &s[0..2], &s[2..5], &s[5..8], &s[8..9]); + assert!(is_valid(&garbage, State::SP)); + } + + #[test] + fn is_valid_sp_rejects_wrong_length() { + assert!(!is_valid("", State::SP)); + assert!(!is_valid("12345", State::SP)); + assert!(!is_valid("1234567890", State::SP)); + } + + #[test] + fn structural_other_uf_accepts_any_digits_in_range() { + assert!(is_valid("12345", State::RJ)); + assert!(is_valid("1234567890", State::MG)); + assert!(is_valid("12345678901234", State::PR)); + } + + #[test] + fn structural_other_uf_rejects_too_short_or_long() { + assert!(!is_valid("1234", State::RJ)); + assert!(!is_valid("123456789012345", State::RJ)); + } + + #[test] + fn structural_other_uf_rejects_letters() { + // Lenient strips non-digits (X is not allowed for non-SP UFs); a letter-only + // input collapses to an empty string and fails the length check. + assert!(!is_valid("abcdef", State::RJ)); + assert!(!is_valid("", State::RJ)); + } + + #[test] + fn strict_sp_accepts_unformatted() { + let rg = sp_rg([1, 2, 3, 4, 5, 6, 7, 8]); + assert!(is_valid_strict(rg.as_str(), State::SP).is_ok()); + } + + #[test] + fn strict_sp_accepts_formatted() { + let rg = sp_rg([1, 2, 3, 4, 5, 6, 7, 8]); + assert!(is_valid_strict(&rg.to_string(), State::SP).is_ok()); + } + + #[test] + fn strict_sp_rejects_misplaced_separators() { + assert_eq!( + is_valid_strict("123.45.678-9", State::SP), + Err(RgError::InvalidFormat) + ); + assert_eq!( + is_valid_strict("12.345.6789-", State::SP), + Err(RgError::InvalidFormat) + ); + } + + #[test] + fn strict_sp_rejects_garbage() { + assert_eq!( + is_valid_strict("12.345.678!9", State::SP), + Err(RgError::InvalidFormat) + ); + } + + #[test] + fn strict_sp_rejects_bad_dv() { + assert_eq!( + is_valid_strict("123456788", State::SP), + Err(RgError::InvalidCheckDigit) + ); + } + + #[test] + fn strict_sp_rejects_x_in_middle() { + assert_eq!( + is_valid_strict("1234X6789", State::SP), + Err(RgError::InvalidCharacter) + ); + } + + #[test] + fn strict_other_uf_accepts_digits_only() { + assert!(is_valid_strict("1234567", State::RJ).is_ok()); + } + + #[test] + fn strict_other_uf_rejects_separators() { + assert!(is_valid_strict("12.345.67", State::RJ).is_err()); + } + + #[test] + fn parse_sp_roundtrip() { + let rg = sp_rg([1, 2, 3, 4, 5, 6, 7, 8]); + let parsed = parse_strict(&rg.to_string(), State::SP).unwrap(); + assert_eq!(parsed, rg); + let parsed_raw = parse_strict(rg.as_str(), State::SP).unwrap(); + assert_eq!(parsed_raw, rg); + } + + #[test] + fn parse_sp_x_terminator() { + let parsed = parse_strict("50.000.000-X", State::SP).unwrap(); + assert_eq!(parsed.as_str(), "50000000X"); + assert_eq!(parsed.check_digit(), Some(10)); + } + + #[test] + fn parse_other_uf_returns_digits() { + let parsed = parse_strict("1234567", State::RJ).unwrap(); + assert_eq!(parsed.as_str(), "1234567"); + assert_eq!(parsed.uf(), State::RJ); + assert_eq!(parsed.check_digit(), None); + } + + #[test] + fn format_sp_inserts_separators() { + assert_eq!( + format_rg("123456789", State::SP), + Some("12.345.678-9".into()) + ); + assert_eq!( + format_rg("50000000X", State::SP), + Some("50.000.000-X".into()) + ); + } + + #[test] + fn format_sp_passes_through_already_formatted() { + assert_eq!( + format_rg("12.345.678-9", State::SP), + Some("12.345.678-9".into()) + ); + } + + #[test] + fn format_other_uf_returns_digits_unchanged() { + assert_eq!(format_rg("1234567", State::RJ), Some("1234567".into())); + } + + #[test] + fn format_returns_none_on_bad_length() { + assert_eq!(format_rg("12", State::SP), None); + assert_eq!(format_rg("12", State::RJ), None); + } + + #[test] + fn remove_symbols_sp_keeps_x() { + assert_eq!(remove_symbols("50.000.000-X", State::SP), "50000000X"); + assert_eq!(remove_symbols("50.000.000-x", State::SP), "50000000X"); + } + + #[test] + fn remove_symbols_other_uf_drops_letters() { + assert_eq!(remove_symbols("12.345-67", State::RJ), "1234567"); + assert_eq!(remove_symbols("X1234567", State::RJ), "1234567"); + } + + #[test] + fn compute_check_digit_sp() { + assert_eq!(compute_check_digit("12345678", State::SP), Some(9)); + assert_eq!(compute_check_digit("44444444", State::SP), Some(0)); + assert_eq!(compute_check_digit("50000000", State::SP), Some(10)); + } + + #[test] + fn compute_check_digit_rejects_other_uf() { + assert_eq!(compute_check_digit("1234567", State::RJ), None); + } + + #[test] + fn compute_check_digit_rejects_bad_length() { + assert_eq!(compute_check_digit("1234567", State::SP), None); + assert_eq!(compute_check_digit("123456789", State::SP), None); + } + + #[test] + fn generate_sp_produces_valid() { + for _ in 0..100 { + let rg = generate_sp(); + assert!(is_valid(rg.as_str(), State::SP)); + let parsed = parse_strict(rg.as_str(), State::SP).unwrap(); + assert_eq!(parsed, rg); + } + } + + #[test] + fn generate_for_uf_sp_ok_others_err() { + assert!(generate_for_uf(State::SP).is_ok()); + assert_eq!( + generate_for_uf(State::RJ), + Err(RgError::UnsupportedUfForGeneration) + ); + } + + #[test] + fn rg_is_copy() { + let rg = sp_rg([1, 2, 3, 4, 5, 6, 7, 8]); + let copy = rg; + assert_eq!(rg, copy); + } + + #[test] + fn rg_as_ref_str() { + let rg = sp_rg([1, 2, 3, 4, 5, 6, 7, 8]); + let r: &str = rg.as_ref(); + assert_eq!(r, rg.as_str()); + } + + #[test] + fn debug_format_includes_uf() { + let rg = sp_rg([1, 2, 3, 4, 5, 6, 7, 8]); + let dbg = alloc::format!("{rg:?}"); + assert!(dbg.starts_with("Rg(SP, ")); + assert!(dbg.ends_with(')')); + } + + #[test] + fn display_uses_formatted() { + let rg = sp_rg([1, 2, 3, 4, 5, 6, 7, 8]); + assert_eq!(rg.to_string(), "12.345.678-9"); + } + + #[test] + fn other_uf_display_passes_through() { + let rg = parse_strict("1234567", State::RJ).unwrap(); + assert_eq!(rg.to_string(), "1234567"); + } +} diff --git a/core/tests/parity.rs b/core/tests/parity.rs index 513045b..790cd5b 100644 --- a/core/tests/parity.rs +++ b/core/tests/parity.rs @@ -5,7 +5,7 @@ use serde_json::Value; use std::collections::HashMap; -use stdbr_core::{cep, cnpj, cpf, municipio, uf}; +use stdbr_core::{cep, cnpj, cpf, municipio, rg, uf}; fn golden() -> Value { let path = @@ -331,6 +331,129 @@ fn cep_generate_roundtrip() { assert_eq!(parsed.as_str(), raw); } +// ── RG ─────────────────────────────────────────────────────────────── + +fn parse_uf(s: &str) -> uf::State { + uf::State::from_abbreviation(s).unwrap_or_else(|| panic!("unknown UF: {s}")) +} + +#[test] +fn rg_parse() { + let cases = &golden()["rg"]["parse"]; + for case in cases.as_array().unwrap() { + let input = case["input"].as_str().unwrap(); + let uf = parse_uf(case["uf"].as_str().unwrap()); + let parsed = rg::parse_strict(input, uf).unwrap_or_else(|e| { + panic!("parse_strict({input}, {}) failed: {e}", uf.abbreviation()) + }); + assert_eq!(parsed.as_str(), case["digits_only"].as_str().unwrap()); + assert_eq!(parsed.formatted(), case["formatted"].as_str().unwrap()); + assert_eq!(parsed.uf().abbreviation(), case["uf_out"].as_str().unwrap()); + let expected_cd = case["check_digit"].as_u64().map(|v| v as u8); + assert_eq!(parsed.check_digit(), expected_cd); + } +} + +#[test] +fn rg_is_valid() { + let cases = &golden()["rg"]["is_valid"]; + for case in cases.as_array().unwrap() { + let input = case["input"].as_str().unwrap(); + let uf = parse_uf(case["uf"].as_str().unwrap()); + let expected = case["expected"].as_bool().unwrap(); + assert_eq!( + rg::is_valid(input, uf), + expected, + "is_valid({input}, {})", + uf.abbreviation() + ); + } +} + +#[test] +fn rg_is_valid_strict() { + let cases = &golden()["rg"]["is_valid_strict"]; + for case in cases.as_array().unwrap() { + let input = case["input"].as_str().unwrap(); + let uf = parse_uf(case["uf"].as_str().unwrap()); + let valid = case["valid"].as_bool().unwrap(); + let result = rg::is_valid_strict(input, uf); + if valid { + assert!( + result.is_ok(), + "is_valid_strict({input}, {}) expected Ok", + uf.abbreviation() + ); + } else { + let err = result.unwrap_err(); + let expected_msg = case["error"].as_str().unwrap(); + assert_eq!(err.to_string(), expected_msg); + } + } +} + +#[test] +fn rg_format() { + let cases = &golden()["rg"]["format"]; + for case in cases.as_array().unwrap() { + let input = case["input"].as_str().unwrap(); + let uf = parse_uf(case["uf"].as_str().unwrap()); + let expected = case["expected"].as_str(); + assert_eq!( + rg::format_rg(input, uf).as_deref(), + expected, + "format({input}, {})", + uf.abbreviation() + ); + } +} + +#[test] +fn rg_remove_symbols() { + let cases = &golden()["rg"]["remove_symbols"]; + for case in cases.as_array().unwrap() { + let input = case["input"].as_str().unwrap(); + let uf = parse_uf(case["uf"].as_str().unwrap()); + let expected = case["expected"].as_str().unwrap(); + assert_eq!(rg::remove_symbols(input, uf), expected); + } +} + +#[test] +fn rg_compute_check_digit() { + let cases = &golden()["rg"]["compute_check_digit"]; + for case in cases.as_array().unwrap() { + let base = case["base"].as_str().unwrap(); + let uf = parse_uf(case["uf"].as_str().unwrap()); + let result = rg::compute_check_digit(base, uf); + if case["expected"].is_null() { + assert!(result.is_none()); + } else { + assert_eq!(result, Some(case["expected"].as_u64().unwrap() as u8)); + } + } +} + +#[test] +fn rg_generate_roundtrip() { + let gen_uf = parse_uf(golden()["rg"]["generate"]["uf"].as_str().unwrap()); + let rg_val = rg::generate_for_uf(gen_uf).unwrap(); + assert!(rg::is_valid(rg_val.as_str(), gen_uf)); + let parsed = rg::parse_strict(rg_val.as_str(), gen_uf).unwrap(); + assert_eq!(parsed.as_str(), rg_val.as_str()); +} + +#[test] +fn rg_generate_unsupported() { + let cases = &golden()["rg"]["generate_unsupported"]; + for case in cases.as_array().unwrap() { + let uf = parse_uf(case["uf"].as_str().unwrap()); + let expected = case["error"].as_str().unwrap(); + let err = rg::generate_for_uf(uf).unwrap_err(); + assert_eq!(err.to_string(), expected); + } +} + // ── UF ─────────────────────────────────────────────────────────────── #[test] diff --git a/tools/parity_gen/src/main.rs b/tools/parity_gen/src/main.rs index aa1929c..0016d4f 100644 --- a/tools/parity_gen/src/main.rs +++ b/tools/parity_gen/src/main.rs @@ -5,7 +5,7 @@ //! ``` use serde_json::{Value, json}; -use stdbr_core::{cep, cnpj, cpf, municipio, uf}; +use stdbr_core::{cep, cnpj, cpf, municipio, rg, uf}; const IBGE_API_URL: &str = "https://servicodados.ibge.gov.br/api/v1/localidades/municipios?orderBy=nome"; @@ -17,6 +17,7 @@ fn main() { "cpf": cpf_cases(), "cnpj": cnpj_cases(), "cep": cep_cases(), + "rg": rg_cases(), "uf": uf_cases(), "municipio": municipio_cases(&ibge), }); @@ -255,6 +256,121 @@ fn cep_cases() -> Value { }) } +fn rg_cases() -> Value { + let sp_samples: Vec = ["123456789", "50000000X"] + .iter() + .map(|s| rg::parse_strict(s, uf::State::SP).unwrap()) + .collect(); + + let parse: Vec = ["12.345.678-9", "50.000.000-X"] + .iter() + .zip(&sp_samples) + .map(|(&input, parsed)| { + json!({ + "input": input, + "uf": "SP", + "digits_only": parsed.as_str(), + "formatted": parsed.formatted(), + "uf_out": parsed.uf().abbreviation(), + "check_digit": parsed.check_digit(), + }) + }) + .collect(); + + let rj_struct = rg::parse_strict("1234567", uf::State::RJ).unwrap(); + + let parse_struct: Vec = vec![json!({ + "input": "1234567", + "uf": "RJ", + "digits_only": rj_struct.as_str(), + "formatted": rj_struct.formatted(), + "uf_out": rj_struct.uf().abbreviation(), + "check_digit": rj_struct.check_digit(), + })]; + + let mut all_parse = parse; + all_parse.extend(parse_struct); + + json!({ + "parse": all_parse, + "is_valid": [ + { "input": "12.345.678-9", "uf": "SP", "expected": true }, + { "input": "123456789", "uf": "SP", "expected": true }, + { "input": "50.000.000-X", "uf": "SP", "expected": true }, + { "input": "12.345.678-8", "uf": "SP", "expected": false }, + { "input": "1234567", "uf": "RJ", "expected": true }, + { "input": "1234", "uf": "RJ", "expected": false }, + { "input": "abc", "uf": "RJ", "expected": false }, + { "input": "", "uf": "SP", "expected": false }, + ], + "is_valid_strict": [ + sp_strict("12.345.678-9", true, None), + sp_strict("123456789", true, None), + sp_strict("50.000.000-X", true, None), + sp_strict("12.345.678-8", false, Some("RG check digit is invalid")), + sp_strict("123.45.678-9", false, Some("RG format does not match the canonical mask for this UF")), + sp_strict("", false, Some("RG length is outside the accepted range for this UF")), + rj_strict("1234567", true, None), + rj_strict("12.345-67", false, Some("RG contains invalid characters")), + ], + "format": [ + { "input": "123456789", "uf": "SP", "expected": "12.345.678-9" }, + { "input": "50000000X", "uf": "SP", "expected": "50.000.000-X" }, + { "input": "12.345.678-9", "uf": "SP", "expected": "12.345.678-9" }, + { "input": "1234567", "uf": "RJ", "expected": "1234567" }, + { "input": "12", "uf": "SP", "expected": null }, + ], + "remove_symbols": [ + { "input": "12.345.678-9", "uf": "SP", "expected": "123456789" }, + { "input": "50.000.000-X", "uf": "SP", "expected": "50000000X" }, + { "input": "50.000.000-x", "uf": "SP", "expected": "50000000X" }, + { "input": "12.345-67", "uf": "RJ", "expected": "1234567" }, + ], + "compute_check_digit": [ + { "base": "12345678", "uf": "SP", "expected": 9 }, + { "base": "44444444", "uf": "SP", "expected": 0 }, + { "base": "50000000", "uf": "SP", "expected": 10 }, + { "base": "12345678", "uf": "RJ", "expected": null }, + { "base": "1234", "uf": "SP", "expected": null }, + ], + "generate": { "test_roundtrip": true, "uf": "SP" }, + "generate_unsupported": [ + { "uf": "RJ", "error": "RG generation is not supported for this UF (no verified algorithm)" }, + ], + }) +} + +fn sp_strict(input: &str, valid: bool, err: Option<&str>) -> Value { + let actual = rg::is_valid_strict(input, uf::State::SP); + rg_strict_assert(input, valid, err, actual, "SP") +} + +fn rj_strict(input: &str, valid: bool, err: Option<&str>) -> Value { + let actual = rg::is_valid_strict(input, uf::State::RJ); + rg_strict_assert(input, valid, err, actual, "RJ") +} + +fn rg_strict_assert( + input: &str, + valid: bool, + err: Option<&str>, + actual: Result<(), rg::RgError>, + uf_label: &str, +) -> Value { + match (valid, err) { + (true, None) => { + assert!(actual.is_ok(), "expected Ok for {input} ({uf_label}), got {actual:?}"); + json!({ "input": input, "uf": uf_label, "valid": true }) + } + (false, Some(msg)) => { + let got = actual.expect_err("expected error"); + assert_eq!(got.to_string(), msg, "error mismatch for {input} ({uf_label})"); + json!({ "input": input, "uf": uf_label, "valid": false, "error": msg }) + } + _ => panic!("inconsistent strict case: input={input} valid={valid} err={err:?}"), + } +} + fn uf_cases() -> Value { let states: Vec = uf::ALL .iter() From 5f3795e7c9c09f16067ef30a8b73dcfaa51affdc Mon Sep 17 00:00:00 2001 From: catskhi Date: Sun, 3 May 2026 23:32:47 -0300 Subject: [PATCH 2/2] feat: implement RG (Registro Geral) validation tests across multiple languages --- bindings/ffi-c/test_parity.c | 163 +++++++++++++++++++++++++++++++++ bindings/nodejs/test_parity.js | 89 ++++++++++++++++++ bindings/python/test_parity.py | 67 ++++++++++++++ bindings/wasm/test_parity.js | 90 ++++++++++++++++++ 4 files changed, 409 insertions(+) diff --git a/bindings/ffi-c/test_parity.c b/bindings/ffi-c/test_parity.c index 65998a4..56d466b 100644 --- a/bindings/ffi-c/test_parity.c +++ b/bindings/ffi-c/test_parity.c @@ -414,6 +414,168 @@ static void test_cep(cJSON *cep_json) { } } +static StdbrRgError rg_error_from_str(const char *s) { + if (!s) return 0; + if (strstr(s, "outside the accepted range")) return 1; + if (strstr(s, "invalid characters")) return 2; + if (strstr(s, "does not match the canonical mask")) return 3; + if (strstr(s, "check digit is invalid")) return 4; + if (strstr(s, "generation is not supported")) return 5; + fprintf(stderr, "WARNING: unknown RG error string: %s\n", s); + return 255; +} + +static bool uf_from_json(cJSON *item, const char *key, StdbrState *out) { + const char *abbr = cJSON_GetObjectItem(item, key)->valuestring; + return stdbr_state_from_abbreviation(abbr, out); +} + +static void test_rg(cJSON *rg_json) { + printf(" RG...\n"); + + cJSON *parse = cJSON_GetObjectItem(rg_json, "parse"); + cJSON *item; + cJSON_ArrayForEach(item, parse) { + const char *input = cJSON_GetObjectItem(item, "input")->valuestring; + const char *digits_only = cJSON_GetObjectItem(item, "digits_only")->valuestring; + const char *formatted = cJSON_GetObjectItem(item, "formatted")->valuestring; + const char *uf_out = cJSON_GetObjectItem(item, "uf_out")->valuestring; + cJSON *cd_json = cJSON_GetObjectItem(item, "check_digit"); + + StdbrState uf; + if (!uf_from_json(item, "uf", &uf)) { + ASSERT_BOOL_EQ(false, true, "rg parse uf lookup"); + continue; + } + + StdbrRgError err; + StdbrRg *rg = stdbr_rg_parse(input, uf, &err); + ASSERT_INT_EQ(err, 0, "rg_parse err"); + ASSERT_NOT_NULL(rg, "rg_parse result"); + if (!rg) continue; + + assert_str_eq_free(stdbr_rg_as_str(rg), digits_only, "rg as_str"); + assert_str_eq_free(stdbr_rg_formatted(rg), formatted, "rg formatted"); + + StdbrState got_uf = stdbr_rg_uf(rg); + char *got_uf_abbr = stdbr_state_abbreviation(got_uf); + ASSERT_STR_EQ(got_uf_abbr, uf_out, "rg uf abbr"); + stdbr_free(got_uf_abbr); + + uint8_t cd = 0; + bool has_cd = stdbr_rg_check_digit(rg, &cd); + if (cJSON_IsNull(cd_json)) { + ASSERT_BOOL_EQ(has_cd, false, "rg check_digit absent"); + } else { + ASSERT_BOOL_EQ(has_cd, true, "rg check_digit present"); + ASSERT_INT_EQ(cd, cd_json->valueint, "rg check_digit value"); + } + + stdbr_rg_destroy(rg); + } + + cJSON *is_valid = cJSON_GetObjectItem(rg_json, "is_valid"); + cJSON_ArrayForEach(item, is_valid) { + const char *input = cJSON_GetObjectItem(item, "input")->valuestring; + bool expected = cJSON_IsTrue(cJSON_GetObjectItem(item, "expected")); + StdbrState uf; + if (!uf_from_json(item, "uf", &uf)) continue; + ASSERT_BOOL_EQ(stdbr_rg_is_valid(input, uf), expected, "rg is_valid"); + } + + cJSON *strict = cJSON_GetObjectItem(rg_json, "is_valid_strict"); + cJSON_ArrayForEach(item, strict) { + const char *input = cJSON_GetObjectItem(item, "input")->valuestring; + bool valid = cJSON_IsTrue(cJSON_GetObjectItem(item, "valid")); + StdbrState uf; + if (!uf_from_json(item, "uf", &uf)) continue; + StdbrRgError err = stdbr_rg_is_valid_strict(input, uf); + if (valid) { + ASSERT_INT_EQ(err, 0, "rg strict valid"); + } else { + const char *error_str = cJSON_GetObjectItem(item, "error")->valuestring; + StdbrRgError expected_err = rg_error_from_str(error_str); + ASSERT_INT_EQ(err, expected_err, "rg strict error"); + } + } + + cJSON *fmt = cJSON_GetObjectItem(rg_json, "format"); + cJSON_ArrayForEach(item, fmt) { + const char *input = cJSON_GetObjectItem(item, "input")->valuestring; + cJSON *exp_json = cJSON_GetObjectItem(item, "expected"); + StdbrState uf; + if (!uf_from_json(item, "uf", &uf)) continue; + char *got = stdbr_rg_format(input, uf); + if (cJSON_IsNull(exp_json)) { + ASSERT_NULL(got, "rg format null"); + } else { + assert_str_eq_free(got, exp_json->valuestring, "rg format"); + got = NULL; + } + if (got) stdbr_free(got); + } + + cJSON *rs = cJSON_GetObjectItem(rg_json, "remove_symbols"); + cJSON_ArrayForEach(item, rs) { + const char *input = cJSON_GetObjectItem(item, "input")->valuestring; + const char *expected = cJSON_GetObjectItem(item, "expected")->valuestring; + StdbrState uf; + if (!uf_from_json(item, "uf", &uf)) continue; + assert_str_eq_free(stdbr_rg_remove_symbols(input, uf), expected, "rg remove_symbols"); + } + + cJSON *ccd = cJSON_GetObjectItem(rg_json, "compute_check_digit"); + cJSON_ArrayForEach(item, ccd) { + const char *base = cJSON_GetObjectItem(item, "base")->valuestring; + cJSON *exp_json = cJSON_GetObjectItem(item, "expected"); + StdbrState uf; + if (!uf_from_json(item, "uf", &uf)) continue; + uint8_t out = 0; + bool ok = stdbr_rg_compute_check_digit(base, uf, &out); + if (cJSON_IsNull(exp_json)) { + ASSERT_BOOL_EQ(ok, false, "rg compute_check_digit null"); + } else { + ASSERT_BOOL_EQ(ok, true, "rg compute_check_digit ok"); + ASSERT_INT_EQ(out, exp_json->valueint, "rg compute_check_digit value"); + } + } + + StdbrState gen_uf; + cJSON *gen = cJSON_GetObjectItem(rg_json, "generate"); + const char *gen_uf_abbr = cJSON_GetObjectItem(gen, "uf")->valuestring; + if (stdbr_state_from_abbreviation(gen_uf_abbr, &gen_uf)) { + char *generated = stdbr_rg_generate_sp(); + ASSERT_NOT_NULL(generated, "rg generate_sp"); + if (generated) { + ASSERT_BOOL_EQ(stdbr_rg_is_valid(generated, gen_uf), true, "rg generate valid"); + StdbrRgError err; + StdbrRg *rg = stdbr_rg_parse(generated, gen_uf, &err); + ASSERT_NOT_NULL(rg, "rg generate parse"); + if (rg) { + char *s = stdbr_rg_as_str(rg); + ASSERT_STR_EQ(s, generated, "rg generate roundtrip"); + stdbr_free(s); + stdbr_rg_destroy(rg); + } + stdbr_free(generated); + } + } + + cJSON *gen_unsup = cJSON_GetObjectItem(rg_json, "generate_unsupported"); + cJSON_ArrayForEach(item, gen_unsup) { + const char *uf_abbr = cJSON_GetObjectItem(item, "uf")->valuestring; + const char *err_str = cJSON_GetObjectItem(item, "error")->valuestring; + StdbrState uf; + if (!stdbr_state_from_abbreviation(uf_abbr, &uf)) continue; + StdbrRgError err; + StdbrRg *rg = stdbr_rg_create_for_uf(uf, &err); + ASSERT_NULL(rg, "rg create_for_uf unsupported null"); + if (rg) stdbr_rg_destroy(rg); + StdbrRgError expected_err = rg_error_from_str(err_str); + ASSERT_INT_EQ(err, expected_err, "rg create_for_uf err"); + } +} + static void test_uf(cJSON *uf_json) { printf(" UF...\n"); @@ -607,6 +769,7 @@ int main(void) { test_cpf(cJSON_GetObjectItem(golden, "cpf")); test_cnpj(cJSON_GetObjectItem(golden, "cnpj")); test_cep(cJSON_GetObjectItem(golden, "cep")); + test_rg(cJSON_GetObjectItem(golden, "rg")); test_uf(cJSON_GetObjectItem(golden, "uf")); test_municipio(cJSON_GetObjectItem(golden, "municipio")); diff --git a/bindings/nodejs/test_parity.js b/bindings/nodejs/test_parity.js index 94b9485..9dacc23 100644 --- a/bindings/nodejs/test_parity.js +++ b/bindings/nodejs/test_parity.js @@ -18,6 +18,8 @@ const { cnpjGenerate, cnpjComputeCheckDigits, CnpjKind, Cep, cepIsValid, cepIsValidStrict, cepFormat, cepRemoveSymbols, cepGenerate, + Rg, rgIsValid, rgIsValidStrict, rgFormat, rgRemoveSymbols, + rgComputeCheckDigit, rgGenerateSp, stateAbbreviation, stateName, stateFromAbbreviation, allStates, municipioFromIbgeCode, municipioCapitalOf, municipiosByState, municipioSearchByName, municipioCount, @@ -178,6 +180,93 @@ describe("CEP", () => { }); }); +describe("RG", () => { + describe("parse", () => { + for (const c of golden.rg.parse) { + it(`${c.input} (${c.uf})`, () => { + const uf = stateFromAbbreviation(c.uf); + const rg = Rg.parse(c.input, uf); + strictEqual(rg.asStr(), c.digits_only); + strictEqual(rg.formatted(), c.formatted); + strictEqual(stateAbbreviation(rg.uf), c.uf_out); + strictEqual(rg.checkDigit ?? null, c.check_digit); + }); + } + }); + + describe("is_valid", () => { + for (const c of golden.rg.is_valid) { + it(`${c.input || "(empty)"} (${c.uf})`, () => { + strictEqual(rgIsValid(c.input, stateFromAbbreviation(c.uf)), c.expected); + }); + } + }); + + describe("is_valid_strict", () => { + for (const c of golden.rg.is_valid_strict) { + it(`${c.input || "(empty)"} (${c.uf})`, () => { + const uf = stateFromAbbreviation(c.uf); + if (c.valid) { + rgIsValidStrict(c.input, uf); + } else { + try { + rgIsValidStrict(c.input, uf); + ok(false, "should throw"); + } catch (e) { + strictEqual(e.message, c.error); + } + } + }); + } + }); + + describe("format", () => { + for (const c of golden.rg.format) { + it(`${c.input || "(empty)"} (${c.uf})`, () => { + strictEqual(rgFormat(c.input, stateFromAbbreviation(c.uf)) ?? null, c.expected); + }); + } + }); + + describe("remove_symbols", () => { + for (const c of golden.rg.remove_symbols) { + it(`${c.input} (${c.uf})`, () => { + strictEqual(rgRemoveSymbols(c.input, stateFromAbbreviation(c.uf)), c.expected); + }); + } + }); + + describe("compute_check_digit", () => { + for (const c of golden.rg.compute_check_digit) { + it(`${c.base} (${c.uf})`, () => { + const result = rgComputeCheckDigit(c.base, stateFromAbbreviation(c.uf)); + strictEqual(result ?? null, c.expected); + }); + } + }); + + it("generate_sp roundtrip", () => { + const uf = stateFromAbbreviation(golden.rg.generate.uf); + const raw = rgGenerateSp(); + ok(rgIsValid(raw, uf)); + strictEqual(Rg.parse(raw, uf).asStr(), raw); + }); + + describe("generate_unsupported", () => { + for (const c of golden.rg.generate_unsupported) { + it(c.uf, () => { + const uf = stateFromAbbreviation(c.uf); + try { + Rg.generateForUf(uf); + ok(false, "should throw"); + } catch (e) { + strictEqual(e.message, c.error); + } + }); + } + }); +}); + describe("UF", () => { it("states", () => { const states = allStates(); diff --git a/bindings/python/test_parity.py b/bindings/python/test_parity.py index 8e43530..2b85143 100644 --- a/bindings/python/test_parity.py +++ b/bindings/python/test_parity.py @@ -169,6 +169,73 @@ def test_generate_roundtrip(self): self.assertEqual(stdbr.Cep.parse(raw).as_str(), raw) +class TestRg(unittest.TestCase): + def _uf(self, abbr): + return stdbr.state_from_abbreviation(abbr) + + def test_parse(self): + for c in GOLDEN["rg"]["parse"]: + with self.subTest(input=c["input"], uf=c["uf"]): + rg = stdbr.Rg.parse(c["input"], self._uf(c["uf"])) + self.assertEqual(rg.as_str(), c["digits_only"]) + self.assertEqual(rg.formatted(), c["formatted"]) + self.assertEqual(stdbr.state_abbreviation(rg.uf), c["uf_out"]) + self.assertEqual(rg.check_digit, c["check_digit"]) + + def test_is_valid(self): + for c in GOLDEN["rg"]["is_valid"]: + with self.subTest(input=c["input"], uf=c["uf"]): + self.assertEqual( + stdbr.rg_is_valid(c["input"], self._uf(c["uf"])), c["expected"] + ) + + def test_is_valid_strict(self): + for c in GOLDEN["rg"]["is_valid_strict"]: + with self.subTest(input=c["input"], uf=c["uf"]): + if c["valid"]: + stdbr.rg_is_valid_strict(c["input"], self._uf(c["uf"])) + else: + with self.assertRaises(ValueError) as ctx: + stdbr.rg_is_valid_strict(c["input"], self._uf(c["uf"])) + self.assertEqual(str(ctx.exception), c["error"]) + + def test_format(self): + for c in GOLDEN["rg"]["format"]: + with self.subTest(input=c["input"], uf=c["uf"]): + self.assertEqual( + stdbr.rg_format(c["input"], self._uf(c["uf"])), c["expected"] + ) + + def test_remove_symbols(self): + for c in GOLDEN["rg"]["remove_symbols"]: + with self.subTest(input=c["input"], uf=c["uf"]): + self.assertEqual( + stdbr.rg_remove_symbols(c["input"], self._uf(c["uf"])), + c["expected"], + ) + + def test_compute_check_digit(self): + for c in GOLDEN["rg"]["compute_check_digit"]: + with self.subTest(base=c["base"], uf=c["uf"]): + self.assertEqual( + stdbr.rg_compute_check_digit(c["base"], self._uf(c["uf"])), + c["expected"], + ) + + def test_generate_sp_roundtrip(self): + uf = self._uf(GOLDEN["rg"]["generate"]["uf"]) + raw = stdbr.rg_generate_sp() + self.assertTrue(stdbr.rg_is_valid(raw, uf)) + self.assertEqual(stdbr.Rg.parse(raw, uf).as_str(), raw) + + def test_generate_unsupported(self): + for c in GOLDEN["rg"]["generate_unsupported"]: + with self.subTest(uf=c["uf"]): + with self.assertRaises(ValueError) as ctx: + stdbr.Rg.generate_for_uf(self._uf(c["uf"])) + self.assertEqual(str(ctx.exception), c["error"]) + + class TestUf(unittest.TestCase): def test_states(self): states = stdbr.all_states() diff --git a/bindings/wasm/test_parity.js b/bindings/wasm/test_parity.js index a0ce2be..a7ef180 100644 --- a/bindings/wasm/test_parity.js +++ b/bindings/wasm/test_parity.js @@ -16,6 +16,8 @@ const { cnpjGenerate, cnpjComputeCheckDigits, CnpjKind, Cep, cepIsValid, cepIsValidStrict, cepFormat, cepRemoveSymbols, cepGenerate, + Rg, rgIsValid, rgIsValidStrict, rgFormat, rgRemoveSymbols, + rgComputeCheckDigit, rgGenerateSp, stateAbbreviation, stateName, stateFromAbbreviation, allStates, municipioFromIbgeCode, municipioCapitalOf, municipiosByState, municipioSearchByName, municipioCount, @@ -185,6 +187,94 @@ describe("CEP", () => { }); }); +describe("RG", () => { + describe("parse", () => { + for (const c of golden.rg.parse) { + it(`${c.input} (${c.uf})`, () => { + const uf = stateFromAbbreviation(c.uf); + const rg = Rg.parse(c.input, uf); + strictEqual(rg.asStr(), c.digits_only); + strictEqual(rg.formatted(), c.formatted); + strictEqual(stateAbbreviation(rg.uf), c.uf_out); + const cd = rg.checkDigit; + strictEqual(cd === undefined ? null : cd, c.check_digit); + }); + } + }); + + describe("is_valid", () => { + for (const c of golden.rg.is_valid) + it(`${c.input || "(empty)"} (${c.uf})`, () => + strictEqual(rgIsValid(c.input, stateFromAbbreviation(c.uf)), c.expected)); + }); + + describe("is_valid_strict", () => { + for (const c of golden.rg.is_valid_strict) { + it(`${c.input || "(empty)"} (${c.uf})`, () => { + const uf = stateFromAbbreviation(c.uf); + if (c.valid) { + rgIsValidStrict(c.input, uf); + } else { + try { + rgIsValidStrict(c.input, uf); + ok(false, "should throw"); + } catch (e) { + strictEqual(e.message, c.error); + } + } + }); + } + }); + + describe("format", () => { + for (const c of golden.rg.format) { + it(`${c.input || "(empty)"} (${c.uf})`, () => { + const got = rgFormat(c.input, stateFromAbbreviation(c.uf)); + strictEqual(got !== undefined ? got : null, c.expected); + }); + } + }); + + describe("remove_symbols", () => { + for (const c of golden.rg.remove_symbols) + it(`${c.input} (${c.uf})`, () => + strictEqual( + rgRemoveSymbols(c.input, stateFromAbbreviation(c.uf)), + c.expected, + )); + }); + + describe("compute_check_digit", () => { + for (const c of golden.rg.compute_check_digit) { + it(`${c.base} (${c.uf})`, () => { + const got = rgComputeCheckDigit(c.base, stateFromAbbreviation(c.uf)); + strictEqual(got === undefined ? null : got, c.expected); + }); + } + }); + + it("generate_sp roundtrip", () => { + const uf = stateFromAbbreviation(golden.rg.generate.uf); + const raw = rgGenerateSp(); + ok(rgIsValid(raw, uf)); + strictEqual(Rg.parse(raw, uf).asStr(), raw); + }); + + describe("generate_unsupported", () => { + for (const c of golden.rg.generate_unsupported) { + it(c.uf, () => { + const uf = stateFromAbbreviation(c.uf); + try { + Rg.generateForUf(uf); + ok(false, "should throw"); + } catch (e) { + strictEqual(e.message, c.error); + } + }); + } + }); +}); + describe("UF", () => { it("states", () => { const states = allStates();