fps cpu version

rusty1s · rusty1s · commit b0f9f81b3394 · 2018-12-18T07:11:47.000+01:00
diff --git a/cuda/fps_kernel.cu b/cuda/fps_kernel.cu
@@ -169,7 +169,7 @@ at::Tensor fps_cuda(at::Tensor x, at::Tensor batch, float ratio, bool random) {
 
   auto deg = degree(batch, batch_size);
   auto cum_deg = at::cat({at::zeros(1, deg.options()), deg.cumsum(0)}, 0);
-  auto k = (deg.toType(at::kFloat) * ratio).round().toType(at::kLong);
+  auto k = (deg.toType(at::kFloat) * ratio).ceil().toType(at::kLong);
   auto cum_k = at::cat({at::zeros(1, k.options()), k.cumsum(0)}, 0);
 
   at::Tensor start;
diff --git a/test/test_fps.py b/test/test_fps.py
@@ -4,12 +4,9 @@
 import torch
 from torch_cluster import fps
 
-from .utils import tensor, grad_dtypes
+from .utils import grad_dtypes, devices, tensor
 
-devices = [torch.device('cuda')]
 
-
-@pytest.mark.skipif(not torch.cuda.is_available(), reason='CUDA not available')
 @pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
 def test_fps(dtype, device):
     x = tensor([
@@ -26,25 +23,3 @@ def test_fps(dtype, device):
 
     out = fps(x, batch, ratio=0.5, random_start=False)
     assert out.tolist() == [0, 2, 4, 6]
-
-
-@pytest.mark.skipif(not torch.cuda.is_available(), reason='CUDA not available')
-@pytest.mark.parametrize('dtype,device', product(grad_dtypes, devices))
-def test_fps_speed(dtype, device):
-    return
-    batch_size, num_nodes = 100, 10000
-    x = torch.randn((batch_size * num_nodes, 3), dtype=dtype, device=device)
-    batch = torch.arange(batch_size, dtype=torch.long, device=device)
-    batch = batch.view(-1, 1).repeat(1, num_nodes).view(-1)
-
-    out = fps(x, batch, ratio=0.5, random_start=True)
-    assert out.size(0) == batch_size * num_nodes * 0.5
-    assert out.min().item() >= 0 and out.max().item() < batch_size * num_nodes
-
-    batch_size, num_nodes, dim = 100, 300, 128
-    x = torch.randn((batch_size * num_nodes, dim), dtype=dtype, device=device)
-    batch = torch.arange(batch_size, dtype=torch.long, device=device)
-    batch = batch.view(-1, 1).repeat(1, num_nodes).view(-1)
-    out = fps(x, batch, ratio=0.5, random_start=True)
-    assert out.size(0) == batch_size * num_nodes * 0.5
-    assert out.min().item() >= 0 and out.max().item() < batch_size * num_nodes
diff --git a/torch_cluster/fps.py b/torch_cluster/fps.py
@@ -1,4 +1,5 @@
 import torch
+import fps_cpu
 
 if torch.cuda.is_available():
     import fps_cuda
@@ -39,12 +40,11 @@ def fps(x, batch=None, ratio=0.5, random_start=True):
 
     x = x.view(-1, 1) if x.dim() == 1 else x
 
-    assert x.is_cuda
     assert x.dim() == 2 and batch.dim() == 1
     assert x.size(0) == batch.size(0)
     assert ratio > 0 and ratio < 1
 
-    op = fps_cuda.fps if x.is_cuda else None
-    out = op(x, batch, ratio, random_start)
-
-    return out
+    if x.is_cuda:
+        return fps_cuda.fps(x, batch, ratio, random_start)
+    else:
+        return fps_cpu.fps(x, batch, ratio, random_start)