pytorch · jerryzh168 · Aug 6, 2025 · Jun 30, 2025
diff --git a/.github/workflows/1xH100_tests.yml b/.github/workflows/1xH100_tests.yml
@@ -25,15 +25,15 @@ jobs:
         include:
           - name: H100
             runs-on: linux.aws.h100
-            torch-spec: '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126'
+            torch-spec: '--pre torch torchvision torchaudio fbgemm-gpu-genai --index-url https://download.pytorch.org/whl/nightly/cu126'
             gpu-arch-type: "cuda"
             gpu-arch-version: "12.4"
     permissions:
       id-token: write
       contents: read
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
-      timeout: 60
+      timeout: 90
       runner: ${{ matrix.runs-on }}
       gpu-arch-type: ${{ matrix.gpu-arch-type }}
       gpu-arch-version: ${{ matrix.gpu-arch-version }}
@@ -46,8 +46,8 @@ jobs:
         pip install uv
         pip install ${{ matrix.torch-spec }}
         uv pip install -r dev-requirements.txt
-        uv pip install vllm
         pip install .
         pytest test/integration --verbose -s
         pytest test/dtypes/test_affine_quantized_float.py --verbose -s
+        python test/quantization/quantize_/workflows/float8/test_float8_tensor.py
         ./test/float8/test_everything_single_gpu.sh
diff --git a/.github/workflows/1xL4_tests.yml b/.github/workflows/1xL4_tests.yml
@@ -46,8 +46,8 @@ jobs:
         pip install uv
         pip install ${{ matrix.torch-spec }}
         uv pip install -r dev-requirements.txt
-        uv pip install vllm
         pip install .
         pytest test/integration --verbose -s
         pytest test/dtypes/test_affine_quantized_float.py --verbose -s
         ./test/float8/test_everything_single_gpu.sh
+        python test/quantization/quantize_/workflows/float8/test_float8_tensor.py
diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py
@@ -737,6 +737,7 @@ def test_expected_kernels_on_gpu(self, granularity, torch_compile_mode):
         Verify that float8 quantization + torch.compile results in the
         expected number of kernels in the GPU trace.
         """
+        torch.compiler.reset()
 
         M, K, N = 128, 256, 512
         m = torch.nn.Sequential(

diff --git a/test/dtypes/test_fbgemm_fp8.py b/test/dtypes/test_fbgemm_fp8.py