harvardnlp · justinchiu · Feb 25, 2019 · Feb 25, 2019 · srush · Feb 25, 2019
diff --git a/namedtensor/core.py b/namedtensor/core.py
@@ -207,6 +207,32 @@ def _broadcast_order(self, other):
             order.append(d)
         return order
 
+    def _broadcast_order_shape(self, other, indim, outdim):
+        """
+        Outputs two orders (list) for self and other,
+        as well as the shapes necessary to expand to a shared size.
+        Assumes update from indim to outdim.
+        Moves indim and outdim to the front to ensure the most spacing.
+        """
+        self_order = [indim]
+        other_order = [outdim]
+        self_shape = [self.shape[indim]]
+        other_shape = [other.shape[outdim]]
+        exclude = {indim, outdim}
+        for d, s in other.shape.items():
+            if d not in self._schema._names and d not in exclude:
+                self_order.append(d)
+                other_order.append(d)
+                self_shape.append(s)
+                other_shape.append(s)
+        for d, s in self.shape.items():
+            if d not in exclude:
+                self_order.append(d)
+                other_order.append(d)
+                self_shape.append(s)
+                other_shape.append(s)
+        return self_order, other_order, self_shape, other_shape
+
     def _mask_broadcast_order(self, main):
         """
         If broadcasting possible from self (mask) to main, outputs a shared order.

diff --git a/namedtensor/test_core.py b/namedtensor/test_core.py
@@ -284,7 +284,10 @@ def test_gather():
 
     t = ntorch.tensor(torch.Tensor([[1, 2], [3, 4]]), ("a", "b"))
     index = ntorch.tensor(torch.LongTensor([[0, 0], [1, 0]]), ("a", "c"))
-    ntensor = ntorch.gather(t, "b", index, "c")
+    # Gather will move "b" and "c" to the front for t and index respectively
+    # so we must force the order in order to compare to the original
+    # torch.gather.
+    ntensor = ntorch.gather(t, "b", index, "c")._force_order(("a", "c"))
     assert (ntensor.values == base).all()
     assert ntensor.shape == OrderedDict([("a", 2), ("c", 2)])
 

diff --git a/namedtensor/torch_base.py b/namedtensor/torch_base.py
@@ -160,15 +160,20 @@ def unique(input, dim=None, names=("unique", "Indices"), **kwargs):
 
     @staticmethod
     def gather(input, dim, index, index_dim):
+        "Gathers elements using `index` from `input`."
         outdim = index_dim
         indim = dim
-        index_order = [
-            (n if n != indim else outdim) for n in input._schema._names
-        ]
-        b1 = index._force_order(index_order)
-        dim = input._schema.get(indim)
-        return input._new(
-            input.values.gather(dim, b1.values), updates={indim: outdim}
+        input_order, index_order, input_shape, index_shape = (
+            input._broadcast_order_shape(index, indim, outdim)
+        )
+        input1 = input._force_order(input_order)
+        index1 = index._force_order(index_order)
+        dim = input1._schema.get(indim)
+        return input1._new(
+            input1.values.expand(input_shape).gather(
+                dim, index1.values.expand(index_shape)
+            ),
+            updates={indim: outdim}
         )
 
     @staticmethod