Merge pull request #11 from skutaada/main

samuela · web-flow · commit 7132ab7d7354 · 2025-07-14T19:29:54.000-04:00
Adding torch.nn.PReLU
diff --git a/tests/test_torch_nn.py b/tests/test_torch_nn.py
@@ -297,6 +297,28 @@ def test_torch_nn_MaxPool2d():
         aac(jax_grad, x.grad)
 
 
+def test_torch_nn_PReLU():
+  model = torch.nn.PReLU(3)
+  input_batch = random.normal(random.PRNGKey(123), (1, 3, 112, 112))
+  params = {k: 0.1 * random.normal(random.PRNGKey(123), v.shape) for k, v in model.named_parameters()}
+  model.load_state_dict({k: j2t(v) for k, v in params.items()})
+  res_torch = model(j2t(input_batch))
+
+  jaxified_module = t2j(model)
+  res_jax = jaxified_module(input_batch, state_dict=params)
+  res_jax_jit = jit(jaxified_module)(input_batch, state_dict=params)
+
+  # Test forward pass without and with jit
+  aac(res_jax, res_torch.numpy(force=True), atol=1e-5)
+  aac(res_jax_jit, res_torch.numpy(force=True), atol=1e-5)
+
+  # Test gradients
+  jax_grad = grad(lambda p: (jaxified_module(input_batch, state_dict=p) ** 2).sum())(params)
+
+  res_torch.pow(2).sum().backward()
+  aac(jax_grad["weight"], model.weight.grad, atol=1e-3)
+
+
 ################################################################################
 # torch.nn.functional
 
@@ -323,6 +345,11 @@ def f(input, running_mean, running_var, weight, bias):
   t2j_function_test(f, [(2, 3, 5, 7), (3,), (3,), (3,), (3,)], atol=1e-6)
 
 
+def test_torch_nn_functional_prelu():
+  t2j_function_test(torch.nn.functional.prelu, [(6, 6), (1)], atol=1e-6)
+  t2j_function_test(torch.nn.functional.prelu, [(5, 3, 112, 122), (3,)], atol=1e-6)
+
+
 def test_torch_nn_functional_scaled_dot_product_attention():
   t2j_function_test(lambda x, y: x @ y, [(2, 3, 5), (5, 7)], atol=1e-6)
 
diff --git a/torch2jax/__init__.py b/torch2jax/__init__.py
@@ -915,6 +915,26 @@ def relu(x, inplace=False):
     return Torchish(jax.nn.relu(_v(x)))
 
 
+@implements(torch.nn.functional.prelu)
+def prelu(input: Torchish, weight: Torchish):
+  if weight.numel() != 1:
+    assert input.ndim > 0, "Not allow zero-dim input tensor."
+    channel_size = input.shape[1] if input.ndim >= 2 else 1
+    assert weight.numel() == channel_size, (
+      f"Mismatch of parameter numbers and input channel size. Found parameter numbers = {weight.numel()} and channel size = {channel_size}."
+    )
+  assert weight.ndim == 0 or weight.ndim == 1, (
+    f"prelu: Expected `weight` to be a scalar or 1D tensor, but got: ndim = {weight.ndim}"
+  )
+  if input.ndim == 0:
+    weight = weight[0] if weight.ndim == 1 else weight
+  else:
+    weight = Torchish(
+      jax.lax.broadcast_in_dim(_v(weight), input.shape, () if weight.ndim == 0 else (0 if input.ndim == 1 else 1,))
+    )
+  return jnp.where(_v(input) > 0, _v(input), _v(input) * _v(weight))
+
+
 @implements(torch.nn.functional.scaled_dot_product_attention)
 def scaled_dot_product_attention(query, key, value, attn_mask=None, dropout_p=0.0, is_causal=False):
   assert attn_mask is None, "TODO: implement attn_mask"