Auto detect low vram (#956)

oulgen · web-flow · commit ff8b21333b0c · 2025-10-15T17:10:42.000-07:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -131,7 +131,6 @@ jobs:
           if [[ "${{ matrix.dtype-asserts }}" == "true" ]]; then export HELION_DEBUG_DTYPE_ASSERTS=1; fi
           if [[ "${{ matrix.expecttest-accept }}" == "true" ]]; then export EXPECTTEST_ACCEPT=1; fi
           if [[ "${{ matrix.ref-eager }}" == "true" ]]; then export HELION_INTERPRET=1; fi
-          if [[ "${{ matrix.alias }}" == *"a10g"* ]]; then export HELION_DEV_LOW_VRAM=1; fi
           # -rf: print failed tests
           # --timeout: max allowed time for each test
           pytest -rf --timeout=60
diff --git a/benchmarks/run.py b/benchmarks/run.py
@@ -183,9 +183,7 @@ class RunResult:
         "tritonbench.operators.jagged_mean.operator",
         "examples.jagged_mean",
         "jagged_mean_tritonbench",
-        {"B": 32, "M": 8, "seqlen": 64}
-        if os.environ.get("HELION_DEV_LOW_VRAM", "0") == "1"
-        else {},
+        {},
     ),
     "fp8_gemm": (
         "tritonbench.operators.fp8_gemm.fp8_gemm",
@@ -208,9 +206,7 @@ class RunResult:
         "tritonbench.operators.cross_entropy.operator",
         "examples.cross_entropy",
         "cross_entropy",
-        {"B": 4, "T": 512, "v_range": "10,15"}
-        if os.environ.get("HELION_DEV_LOW_VRAM", "0") == "1"
-        else {},
+        {},
     ),
     "fp8_attention": (
         "tritonbench.operators.fp8_attention.operator",
diff --git a/helion/_testing.py b/helion/_testing.py
@@ -76,8 +76,19 @@ def skipIfNotCUDA() -> Callable[[Callable], Callable]:
 def skipIfLowVRAM(
     reason: str = "Test requires high VRAM",
 ) -> Callable[[Callable], Callable]:
-    """Skip test if HELION_DEV_LOW_VRAM=1 is set"""
-    return unittest.skipIf(os.environ.get("HELION_DEV_LOW_VRAM", "0") == "1", reason)
+    """Skip test on systems with low GPU VRAM."""
+
+    threshold_bytes = int(30.0 * (1024**3))
+    total_memory: int | None = None
+    try:
+        if torch.cuda.is_available():
+            props = torch.cuda.get_device_properties(torch.cuda.current_device())
+            total_memory = int(getattr(props, "total_memory", 0))
+    except Exception:
+        total_memory = None
+
+    low_vram = total_memory is not None and total_memory < threshold_bytes
+    return unittest.skipIf(low_vram, reason)
 
 
 def skipIfPy314(reason: str) -> Callable[[Callable], Callable]: