keras-team
diff --git a/‎keras/src/dtype_policies/dtype_policy.py
Lines changed: 5 additions & 1 deletion b/‎keras/src/dtype_policies/dtype_policy.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎keras/src/layers/core/dense.py
Lines changed: 36 additions & 1 deletion b/‎keras/src/layers/core/dense.py
Lines changed: 36 additions & 1 deletion
diff --git a/‎keras/src/layers/layer.py
Lines changed: 5 additions & 0 deletions b/‎keras/src/layers/layer.py
Lines changed: 5 additions & 0 deletions
@@ -350,7 +350,11 @@ def _get_quantized_dtype_policy_by_str(policy):
             f"Received: policy={policy}"
         )
     mode, source_name = split_name
-    if policy.startswith("int8") or policy.startswith("int4"):
+    if (
+        policy.startswith("int8")
+        or policy.startswith("int4")
+        or policy.startswith("gptq")
+    ):
         return QuantizedDTypePolicy(mode, source_name)
     elif policy.startswith("float8"):
         return QuantizedFloat8DTypePolicy(mode, source_name)
 
@@ -109,7 +109,7 @@ def build(self, input_shape):
         kernel_shape = (input_shape[-1], self.units)
         if self.quantization_mode:
             self.quantized_build(kernel_shape, mode=self.quantization_mode)
-        if self.quantization_mode not in ("int8", "int4"):
+        if self.quantization_mode not in ("int8", "int4", "gptq"):
             # If the layer is quantized to int8 or int4, `self._kernel` will be
             # added in `self._int8_build` or `_int4_build`. Therefore, we skip
             # it here.
@@ -332,10 +332,36 @@ def quantized_build(self, kernel_shape, mode):
             self._int4_build(kernel_shape)
         elif mode == "float8":
             self._float8_build()
+        elif mode == "gptq":
+            self._gptq_build(kernel_shape)
         else:
             raise self._quantization_mode_error(mode)
         self._is_quantized = True
 
+    def _gptq_build(self, kernel_shape):
+        self._kernel = self.add_weight(
+            name="kernel",
+            shape=kernel_shape,
+            # TODO: choose this based on weight bits
+            dtype="int8",
+            initializer="zeros",
+            trainable=False,
+        )
+        self.kernel_scale = self.add_weight(
+            name="scale",
+            shape=(kernel_shape),
+            dtype="float32",
+            initializer="zeros",
+            trainable=False,
+        )
+        self.zero_point = self.add_weight(
+            name="zero_point",
+            shape=(kernel_shape),
+            dtype="float32",
+            initializer="zeros",
+            trainable=False,
+        )
+
     def _int8_build(self, kernel_shape):
         self.inputs_quantizer = quantizers.AbsMaxQuantizer(axis=-1)
         self._kernel = self.add_weight(
@@ -526,6 +552,12 @@ def grad_fn(*args, upstream=None):
             x = self.activation(x)
         return x
 
+    def _gptq_call(self, inputs, training=None):
+        del training
+        x = ops.matmul(inputs, ops.subtract(self._kernel, self.zero_point))
+        x = ops.matmul(x, self.kernel_scale)
+        return x
+
     def _float8_call(self, inputs, training=None):
         if self.lora_enabled:
             raise NotImplementedError(
@@ -654,6 +686,9 @@ def quantize(self, mode, type_check=True):
             self.kernel_scale.assign(kernel_scale)
         elif mode == "float8":
             self.quantized_build(kernel_shape, mode)
+        elif mode == "gptq":
+            del self._kernel
+            self.quantized_build(kernel_shape, mode)
         else:
             raise self._quantization_mode_error(mode)
 
 
@@ -1318,6 +1318,8 @@ def quantized_call(self, *args, **kwargs):
             return self._float8_call(*args, **kwargs)
         elif self.quantization_mode == "int4":
             return self._int4_call(*args, **kwargs)
+        elif self.quantization_mode == "gptq":
+            return self._gptq_call(*args, **kwargs)
         else:
             raise self._quantization_mode_error(self.quantization_mode)
 
@@ -1330,6 +1332,9 @@ def _int8_call(self, *args, **kwargs):
     def _float8_call(self, *args, **kwargs):
         raise self._not_implemented_error(self._float8_call)
 
+    def _gptq_call(self, *args, **kwargs):
+        raise self._not_implemented_error(self._gptq_call)
+
     def _not_implemented_error(self, attr, msg=None):
         if callable(attr):
             attr_name = attr.__name__