google · ayulockedin · Jan 6, 2026
diff --git a/flax/nnx/nn/attention.py b/flax/nnx/nn/attention.py
@@ -334,7 +334,7 @@ class MultiHeadAttention(Module):
     ln_promote_dtype: function to promote the dtype of all input array arguments
       (including Variables accessed through ``self``) to the desired dtype for the
       LayerNorm submodules (query_ln and key_ln) when normalize_qk=True.
-    rngs: rng key.
+    rngs: rngs object.
     keep_rngs: whether to store the input rngs as attribute (i.e. `self.rngs = rngs`)
       (default: True). If rngs is stored, we should split the module as
       `graphdef, params, nondiff = nnx.split(module, nnx.Param, ...)` where `nondiff`
@@ -534,8 +534,8 @@ def __call__(
         dropout, whereas if true, the attention weights are deterministic. The
         ``deterministic`` flag passed into the call method will take precedence
         over the ``deterministic`` flag passed into the constructor.
-      rngs: rng key. The rng key passed into the call method will take
-        precedence over the rng key passed into the constructor.
+      rngs: rngs object. The rngs object passed into the call method will take
+        precedence over the rngs object passed into the constructor.
       sow_weights: if ``True``, the attention weights are sowed into the
         'intermediates' collection.
       decode: whether to prepare and use an autoregressive cache. The ``decode``

diff --git a/flax/nnx/nn/linear.py b/flax/nnx/nn/linear.py
@@ -142,7 +142,7 @@ class LinearGeneral(Module):
     preferred_element_type: Optional parameter controls the data type output by
       the dot product. This argument is passed to ``dot_general`` function.
       See ``jax.lax.dot`` for details.
-    rngs: rng key.
+    rngs: rngs object.
     kernel_metadata: Optional metadata dictionary to set when initializing
       the weight matrix.
     bias_metadata: Optional metadata dictionary to set when initializing
@@ -343,7 +343,7 @@ class Linear(Module):
     preferred_element_type: Optional parameter controls the data type output by
       the dot product. This argument is passed to ``dot_general`` function.
       See ``jax.lax.dot`` for details.
-    rngs: rng key.
+    rngs: rngs object.
     kernel_metadata: Optional metadata dictionary to set when initializing
       the weight matrix.
     bias_metadata: Optional metadata dictionary to set when initializing
@@ -468,7 +468,7 @@ class Einsum(Module):
     preferred_element_type: Optional parameter controls the data type output by
       the dot product. This argument is passed to ``dot_general`` function.
       See ``jax.lax.dot`` for details.
-    rngs: rng key.
+    rngs: rngs object.
     kernel_metadata: Optional metadata dictionary to set when initializing
       the weight matrix.
     bias_metadata: Optional metadata dictionary to set when initializing
@@ -699,7 +699,7 @@ class Conv(Module):
     preferred_element_type: Optional parameter controls the data type output by
       the convolution. This argument is passed to ``conv_general_dilated``
       function. See ``jax.lax.conv_general_dilated`` for details.
-    rngs: rng key.
+    rngs: rngs object.
     kernel_metadata: Optional metadata dictionary to set when initializing
       the weight matrix.
     bias_metadata: Optional metadata dictionary to set when initializing
@@ -1002,7 +1002,7 @@ class ConvTranspose(Module):
       the transposed convolution. This argument is passed to
       ``jax.lax.conv_transpose`` function. See ``jax.lax.conv_transpose``
       for details.
-    rngs: rng key.
+    rngs: rngs object.
     kernel_metadata: Optional metadata dictionary to set when initializing
       the weight matrix.
     bias_metadata: Optional metadata dictionary to set when initializing
@@ -1257,7 +1257,7 @@ class Embed(Module):
       dtype. The function should accept a tuple of ``(embedding,)`` during ``__call__``
       or ``(query, embedding)`` during ``attend``, and a ``dtype`` keyword argument,
       and return a tuple of arrays with the promoted dtype.
-    rngs: rng key.
+    rngs: rngs object.
     embedding_metadata: Optional metadata dictionary to set when initializing
       the embedding matrix.
   """

diff --git a/flax/nnx/nn/lora.py b/flax/nnx/nn/lora.py
@@ -79,7 +79,7 @@ class LoRA(Module):
       (including Variables accessed through ``self``) to the desired dtype. The
       function should accept a tuple of ``(inputs, lora_a, lora_b)`` and a ``dtype``
       keyword argument, and return a tuple of arrays with the promoted dtype.
-    rngs: rng key.
+    rngs: rngs object.
     a_metadata: Optional metadata dictionary to set when initializing
       the fan-in matrices.
     b_metadata: Optional metadata dictionary to set when initializing

diff --git a/flax/nnx/nn/normalization.py b/flax/nnx/nn/normalization.py
@@ -279,7 +279,7 @@ class BatchNorm(Module):
       function should accept a tuple of ``(inputs, mean, var, scale, bias)`` and
       a ``dtype`` keyword argument, and return a tuple of arrays with the promoted
       dtype.
-    rngs: rng key.
+    rngs: rngs object.
     bias_metadata: Optional metadata dictionary to set when initializing
       the bias.
     scale_metadata: Optional metadata dictionary to set when initializing
@@ -483,7 +483,7 @@ class LayerNorm(Module):
         (including Variables accessed through ``self``) to the desired dtype. The
         function should accept a tuple of ``(inputs, scale, bias)`` and a ``dtype``
         keyword argument, and return a tuple of arrays with the promoted dtype.
-    rngs: rng key.
+    rngs: rngs object.
     bias_metadata: Optional metadata dictionary to set when initializing
       the bias.
     scale_metadata: Optional metadata dictionary to set when initializing
@@ -630,7 +630,7 @@ class RMSNorm(Module):
       (including Variables accessed through ``self``) to the desired dtype. The
       function should accept a tuple of ``(inputs, scale)`` and a ``dtype``
       keyword argument, and return a tuple of arrays with the promoted dtype.
-    rngs: rng key.
+    rngs: rngs object.
     scale_metadata: Optional metadata dictionary to set when initializing
       the scale.
   """
@@ -785,7 +785,7 @@ class GroupNorm(Module):
       (including Variables accessed through ``self``) to the desired dtype. The
       function should accept a tuple of ``(inputs, scale, bias)`` and a ``dtype``
       keyword argument, and return a tuple of arrays with the promoted dtype.
-    rngs: rng key.
+    rngs: rngs object.
     bias_metadata: Optional metadata dictionary to set when initializing
       the bias.
     scale_metadata: Optional metadata dictionary to set when initializing
@@ -1300,7 +1300,7 @@ class SpectralNorm(Module):
       than 2 is used by the layer.
     update_stats: if True, the stored batch statistics will be
       used instead of computing the batch statistics on the input.
-    rngs: rng key.
+    rngs: rngs object.
   """
 
   def __init__(

diff --git a/flax/nnx/nn/recurrent.py b/flax/nnx/nn/recurrent.py
@@ -291,7 +291,7 @@ class OptimizedLSTMCell(RNNCellBase):
           (default: True). If rngs is stored, we should split the module as
           `graphdef, params, nondiff = nnx.split(module, nnx.Param, ...)` where `nondiff`
           contains RNG object associated with stored `self.rngs`.
-        rngs: rng key.
+        rngs: rngs object.
         kernel_metadata: Optional metadata dictionary to set when initializing
           the kernels that transform the input.
         recurrent_kernel_metadata: Optional metadata dictionary to set when initializing
@@ -612,7 +612,7 @@ class GRUCell(RNNCellBase):
           (default: True). If rngs is stored, we should split the module as
           `graphdef, params, nondiff = nnx.split(module, nnx.Param, ...)` where `nondiff`
           contains RNG object associated with stored `self.rngs`.
-        rngs: rng key.
+        rngs: rngs object.
         kernel_metadata: Optional metadata dictionary to set when initializing
           the kernels that transform the input.
         recurrent_kernel_metadata: Optional metadata dictionary to set when initializing

diff --git a/flax/nnx/nn/stochastic.py b/flax/nnx/nn/stochastic.py
@@ -65,7 +65,7 @@ class Dropout(Module):
       masked, whereas if true, no mask is applied and the inputs are returned
       as is.
     rng_collection: the rng collection name to use when requesting an rng key.
-    rngs: rng key.
+    rngs: rngs object.
   """
 
   def __init__(