Add: (Sparse)Top K Categorical Accuracy Metric (keras-team#61)

ariG23498 · web-flow · commit b5f705e203ef · 2023-05-01T11:17:26.000-07:00
* chore: addingop k categorical accuraracy

* chore: adding top k and in top k

* chore: fixing tests

* chore: y true argmax

* chore: adding sparse top k cat metric

* review coomments
diff --git a/keras_core/backend/jax/math.py b/keras_core/backend/jax/math.py
@@ -13,3 +13,10 @@ def top_k(x, k, sorted=True):
             "Jax backend does not support `sorted=False` for `ops.top_k`"
         )
     return jax.lax.top_k(x, k)
+
+
+def in_top_k(targets, predictions, k):
+    topk_indices = top_k(predictions, k)[1]
+    targets = targets[..., None]
+    mask = targets == topk_indices
+    return jax.numpy.any(mask, axis=1)
diff --git a/keras_core/backend/tensorflow/math.py b/keras_core/backend/tensorflow/math.py
@@ -10,3 +10,7 @@ def segment_sum(data, segment_ids, num_segments=None, sorted=False):
 
 def top_k(x, k, sorted=True):
     return tf.math.top_k(x, k, sorted=sorted)
+
+
+def in_top_k(targets, predictions, k):
+    return tf.math.in_top_k(targets, predictions, k)
diff --git a/keras_core/metrics/accuracy_metrics.py b/keras_core/metrics/accuracy_metrics.py
@@ -279,3 +279,154 @@ def __init__(self, name="sparse_categorical_accuracy", dtype=None):
 
     def get_config(self):
         return {"name": self.name, "dtype": self.dtype}
+
+
+def top_k_categorical_accuracy(y_true, y_pred, k=5):
+    reshape_matches = False
+    y_pred = ops.convert_to_tensor(y_pred)
+    y_true = ops.convert_to_tensor(y_true, dtype=y_true.dtype)
+    y_true = ops.argmax(y_true, axis=-1)
+    y_true_rank = len(y_true.shape)
+    y_pred_rank = len(y_pred.shape)
+    y_true_org_shape = ops.shape(y_true)
+
+    # Flatten y_pred to (batch_size, num_samples) and y_true to (num_samples,)
+    if (y_true_rank is not None) and (y_pred_rank is not None):
+        if y_pred_rank > 2:
+            y_pred = ops.reshape(y_pred, [-1, y_pred.shape[-1]])
+        if y_true_rank > 1:
+            reshape_matches = True
+            y_true = ops.reshape(y_true, [-1])
+
+    matches = ops.cast(
+        ops.in_top_k(ops.cast(y_true, "int32"), y_pred, k=k),
+        dtype=backend.floatx(),
+    )
+
+    # returned matches is expected to have same shape as y_true input
+    if reshape_matches:
+        matches = ops.reshape(matches, new_shape=y_true_org_shape)
+
+    return matches
+
+
+@keras_core_export("keras_core.metrics.TopKCategoricalAccuracy")
+class TopKCategoricalAccuracy(reduction_metrics.MeanMetricWrapper):
+    """Computes how often targets are in the top `K` predictions.
+
+    Args:
+        k: (Optional) Number of top elements to look at for computing accuracy.
+            Defaults to 5.
+        name: (Optional) string name of the metric instance.
+        dtype: (Optional) data type of the metric result.
+
+    Standalone usage:
+
+    >>> m = keras_core.metrics.TopKCategoricalAccuracy(k=1)
+    >>> m.update_state([[0, 0, 1], [0, 1, 0]],
+    ...                [[0.1, 0.9, 0.8], [0.05, 0.95, 0]])
+    >>> m.result()
+    0.5
+
+    >>> m.reset_state()
+    >>> m.update_state([[0, 0, 1], [0, 1, 0]],
+    ...                [[0.1, 0.9, 0.8], [0.05, 0.95, 0]],
+    ...                sample_weight=[0.7, 0.3])
+    >>> m.result()
+    0.3
+
+    Usage with `compile()` API:
+
+    ```python
+    model.compile(optimizer='sgd',
+                  loss='mse',
+                  metrics=[keras_core.metrics.TopKCategoricalAccuracy()])
+    ```
+    """
+
+    def __init__(self, k=5, name="top_k_categorical_accuracy", dtype=None):
+        super().__init__(
+            fn=top_k_categorical_accuracy,
+            name=name,
+            dtype=dtype,
+            k=k,
+        )
+        self.k = k
+
+    def get_config(self):
+        return {"name": self.name, "dtype": self.dtype, "k": self.k}
+
+
+def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5):
+    reshape_matches = False
+    y_pred = ops.convert_to_tensor(y_pred)
+    y_true = ops.convert_to_tensor(y_true, dtype=y_true.dtype)
+    y_true_rank = len(y_true.shape)
+    y_pred_rank = len(y_pred.shape)
+    y_true_org_shape = ops.shape(y_true)
+
+    # Flatten y_pred to (batch_size, num_samples) and y_true to (num_samples,)
+    if (y_true_rank is not None) and (y_pred_rank is not None):
+        if y_pred_rank > 2:
+            y_pred = ops.reshape(y_pred, [-1, y_pred.shape[-1]])
+        if y_true_rank > 1:
+            reshape_matches = True
+            y_true = ops.reshape(y_true, [-1])
+
+    matches = ops.cast(
+        ops.in_top_k(ops.cast(y_true, "int32"), y_pred, k=k),
+        dtype=backend.floatx(),
+    )
+
+    # returned matches is expected to have same shape as y_true input
+    if reshape_matches:
+        matches = ops.reshape(matches, new_shape=y_true_org_shape)
+
+    return matches
+
+
+@keras_core_export("keras_core.metrics.SparseTopKCategoricalAccuracy")
+class SparseTopKCategoricalAccuracy(reduction_metrics.MeanMetricWrapper):
+    """Computes how often integer targets are in the top `K` predictions.
+
+    Args:
+        k: (Optional) Number of top elements to look at for computing accuracy.
+            Defaults to 5.
+        name: (Optional) string name of the metric instance.
+        dtype: (Optional) data type of the metric result.
+
+    Standalone usage:
+
+    >>> m = keras_core.metrics.SparseTopKCategoricalAccuracy(k=1)
+    >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]])
+    >>> m.result()
+    0.5
+
+    >>> m.reset_state()
+    >>> m.update_state([2, 1], [[0.1, 0.9, 0.8], [0.05, 0.95, 0]],
+    ...                sample_weight=[0.7, 0.3])
+    >>> m.result()
+    0.3
+
+    Usage with `compile()` API:
+
+    ```python
+    model.compile(optimizer='sgd',
+                  loss='mse',
+                  metrics=[keras_core.metrics.SparseTopKCategoricalAccuracy()])
+    ```
+    """
+
+    def __init__(
+        self, k=5, name="sparse_top_k_categorical_accuracy", dtype=None
+    ):
+        super().__init__(
+            fn=sparse_top_k_categorical_accuracy,
+            name=name,
+            dtype=dtype,
+            k=k,
+        )
+        self.k = k
+
+    def get_config(self):
+        return {"name": self.name, "dtype": self.dtype, "k": self.k}
diff --git a/keras_core/metrics/accuracy_metrics_test.py b/keras_core/metrics/accuracy_metrics_test.py
@@ -10,6 +10,11 @@ def test_config(self):
         self.assertEqual(acc_obj.name, "accuracy")
         self.assertEqual(len(acc_obj.variables), 2)
         self.assertEqual(acc_obj._dtype, "float32")
+
+        # Test get_config
+        acc_obj_config = acc_obj.get_config()
+        self.assertEqual(acc_obj_config["name"], "accuracy")
+        self.assertEqual(acc_obj_config["dtype"], "float32")
         # TODO: Check save and restore config
 
     def test_unweighted(self):
@@ -38,6 +43,11 @@ def test_config(self):
         self.assertEqual(bin_acc_obj.name, "binary_accuracy")
         self.assertEqual(len(bin_acc_obj.variables), 2)
         self.assertEqual(bin_acc_obj._dtype, "float32")
+
+        # Test get_config
+        bin_acc_obj_config = bin_acc_obj.get_config()
+        self.assertEqual(bin_acc_obj_config["name"], "binary_accuracy")
+        self.assertEqual(bin_acc_obj_config["dtype"], "float32")
         # TODO: Check save and restore config
 
     def test_unweighted(self):
@@ -70,6 +80,11 @@ def test_config(self):
         self.assertEqual(cat_acc_obj.name, "categorical_accuracy")
         self.assertEqual(len(cat_acc_obj.variables), 2)
         self.assertEqual(cat_acc_obj._dtype, "float32")
+
+        # Test get_config
+        cat_acc_obj_config = cat_acc_obj.get_config()
+        self.assertEqual(cat_acc_obj_config["name"], "categorical_accuracy")
+        self.assertEqual(cat_acc_obj_config["dtype"], "float32")
         # TODO: Check save and restore config
 
     def test_unweighted(self):
@@ -102,6 +117,13 @@ def test_config(self):
         self.assertEqual(sp_cat_acc_obj.name, "sparse_categorical_accuracy")
         self.assertEqual(len(sp_cat_acc_obj.variables), 2)
         self.assertEqual(sp_cat_acc_obj._dtype, "float32")
+
+        # Test get_config
+        sp_cat_acc_obj_config = sp_cat_acc_obj.get_config()
+        self.assertEqual(
+            sp_cat_acc_obj_config["name"], "sparse_categorical_accuracy"
+        )
+        self.assertEqual(sp_cat_acc_obj_config["dtype"], "float32")
         # TODO: Check save and restore config
 
     def test_unweighted(self):
@@ -124,3 +146,90 @@ def test_weighted(self):
         sp_cat_acc_obj.update_state(y_true, y_pred, sample_weight=sample_weight)
         result = sp_cat_acc_obj.result()
         self.assertAllClose(result, 0.3, atol=1e-3)
+
+
+class TopKCategoricalAccuracyTest(testing.TestCase):
+    def test_config(self):
+        top_k_cat_acc_obj = accuracy_metrics.TopKCategoricalAccuracy(
+            k=1, name="top_k_categorical_accuracy", dtype="float32"
+        )
+        self.assertEqual(top_k_cat_acc_obj.name, "top_k_categorical_accuracy")
+        self.assertEqual(len(top_k_cat_acc_obj.variables), 2)
+        self.assertEqual(top_k_cat_acc_obj._dtype, "float32")
+
+        # Test get_config
+        top_k_cat_acc_obj_config = top_k_cat_acc_obj.get_config()
+        self.assertEqual(
+            top_k_cat_acc_obj_config["name"], "top_k_categorical_accuracy"
+        )
+        self.assertEqual(top_k_cat_acc_obj_config["dtype"], "float32")
+        self.assertEqual(top_k_cat_acc_obj_config["k"], 1)
+        # TODO: Check save and restore config
+
+    def test_unweighted(self):
+        top_k_cat_acc_obj = accuracy_metrics.TopKCategoricalAccuracy(
+            k=1, name="top_k_categorical_accuracy", dtype="float32"
+        )
+        y_true = np.array([[0, 0, 1], [0, 1, 0]])
+        y_pred = np.array([[0.1, 0.9, 0.8], [0.05, 0.95, 0]], dtype="float32")
+        top_k_cat_acc_obj.update_state(y_true, y_pred)
+        result = top_k_cat_acc_obj.result()
+        self.assertAllClose(result, 0.5, atol=1e-3)
+
+    def test_weighted(self):
+        top_k_cat_acc_obj = accuracy_metrics.TopKCategoricalAccuracy(
+            k=1, name="top_k_categorical_accuracy", dtype="float32"
+        )
+        y_true = np.array([[0, 0, 1], [0, 1, 0]])
+        y_pred = np.array([[0.1, 0.9, 0.8], [0.05, 0.95, 0]], dtype="float32")
+        sample_weight = np.array([0.7, 0.3])
+        top_k_cat_acc_obj.update_state(
+            y_true, y_pred, sample_weight=sample_weight
+        )
+        result = top_k_cat_acc_obj.result()
+        self.assertAllClose(result, 0.3, atol=1e-3)
+
+
+class SparseTopKCategoricalAccuracyTest(testing.TestCase):
+    def test_config(self):
+        sp_top_k_cat_acc_obj = accuracy_metrics.SparseTopKCategoricalAccuracy(
+            k=1, name="sparse_top_k_categorical_accuracy", dtype="float32"
+        )
+        self.assertEqual(
+            sp_top_k_cat_acc_obj.name, "sparse_top_k_categorical_accuracy"
+        )
+        self.assertEqual(len(sp_top_k_cat_acc_obj.variables), 2)
+        self.assertEqual(sp_top_k_cat_acc_obj._dtype, "float32")
+
+        # Test get_config
+        sp_top_k_cat_acc_obj_config = sp_top_k_cat_acc_obj.get_config()
+        self.assertEqual(
+            sp_top_k_cat_acc_obj_config["name"],
+            "sparse_top_k_categorical_accuracy",
+        )
+        self.assertEqual(sp_top_k_cat_acc_obj_config["dtype"], "float32")
+        self.assertEqual(sp_top_k_cat_acc_obj_config["k"], 1)
+        # TODO: Check save and restore config
+
+    def test_unweighted(self):
+        sp_top_k_cat_acc_obj = accuracy_metrics.SparseTopKCategoricalAccuracy(
+            k=1, name="sparse_top_k_categorical_accuracy", dtype="float32"
+        )
+        y_true = np.array([2, 1])
+        y_pred = np.array([[0.1, 0.9, 0.8], [0.05, 0.95, 0]], dtype="float32")
+        sp_top_k_cat_acc_obj.update_state(y_true, y_pred)
+        result = sp_top_k_cat_acc_obj.result()
+        self.assertAllClose(result, 0.5, atol=1e-3)
+
+    def test_weighted(self):
+        sp_top_k_cat_acc_obj = accuracy_metrics.SparseTopKCategoricalAccuracy(
+            k=1, name="sparse_top_k_categorical_accuracy", dtype="float32"
+        )
+        y_true = np.array([2, 1])
+        y_pred = np.array([[0.1, 0.9, 0.8], [0.05, 0.95, 0]], dtype="float32")
+        sample_weight = np.array([0.7, 0.3])
+        sp_top_k_cat_acc_obj.update_state(
+            y_true, y_pred, sample_weight=sample_weight
+        )
+        result = sp_top_k_cat_acc_obj.result()
+        self.assertAllClose(result, 0.3, atol=1e-3)
diff --git a/keras_core/operations/math.py b/keras_core/operations/math.py
@@ -1,6 +1,7 @@
 """
 segment_sum
 top_k
+in_top_k
 """
 
 from keras_core import backend
@@ -28,3 +29,14 @@ def top_k(x, k, sorted=True):
     if any_symbolic_tensors((x,)):
         return TopK().symbolic_call(x, k, sorted)
     return backend.math.top_k(x, k, sorted)
+
+
+class InTopK(Operation):
+    def call(self, targets, predictions, k):
+        return backend.math.in_top_k(targets, predictions, k)
+
+
+def in_top_k(targets, predictions, k):
+    if any_symbolic_tensors((targets, predictions)):
+        return InTopK().symbolic_call(targets, predictions, k)
+    return backend.math.in_top_k(targets, predictions, k)
diff --git a/keras_core/operations/nn.py b/keras_core/operations/nn.py
@@ -23,6 +23,8 @@
 conv_transpose
 
 one_hot
+top_k
+in_top_k
 
 ctc ??
 """