@@ -434,6 +434,48 @@ def arange_block_size_mul(x: torch.Tensor, *, _launcher=_default_launcher):
434434 _launcher(_helion_arange_block_size_mul, (triton.cdiv(64, _BLOCK_SIZE_0),), ones, out, _BLOCK_SIZE_0, 2 * _BLOCK_SIZE_0, num_warps=4, num_stages=2)
435435 return out
436436
437+ --- assertExpectedJournal(TestIndexing.test_tile_count_top_level)
438+ from __future__ import annotations
439+
440+ import torch
441+ import triton
442+ import triton.language as tl
443+ from helion.runtime import default_launcher as _default_launcher
444+
445+ @triton.jit
446+ def _helion_fn(out, n, _BLOCK_SIZE_0: tl.constexpr):
447+ pid_0 = tl.program_id(0)
448+ offset_0 = pid_0 * _BLOCK_SIZE_0
449+ indices_0 = (offset_0 + tl.arange(0, _BLOCK_SIZE_0)).to(tl.int32)
450+ mask_0 = indices_0 < n
451+ tile_count = tl.cdiv(n, _BLOCK_SIZE_0)
452+ tl.store(out + indices_0 * 1, tile_count, mask_0)
453+
454+ def fn(n: int, device: torch.device, *, _launcher=_default_launcher):
455+ out = torch.zeros([n], dtype=torch.int32, device=device)
456+ _BLOCK_SIZE_0 = 64
457+ _launcher(_helion_fn, (triton.cdiv(n, _BLOCK_SIZE_0),), out, n, _BLOCK_SIZE_0, num_warps=4, num_stages=2)
458+ return out
459+
460+ --- assertExpectedJournal(TestIndexing.test_tile_count_with_begin_end)
461+ from __future__ import annotations
462+
463+ import torch
464+ import triton
465+ import triton.language as tl
466+ from helion.runtime import default_launcher as _default_launcher
467+
468+ @triton.jit
469+ def _helion_fn(out, begin, end, _BLOCK_SIZE_0: tl.constexpr):
470+ tile_count = tl.cdiv(end + -1 * begin, _BLOCK_SIZE_0)
471+ tl.store(out + tl.zeros([], tl.int32), tile_count, None)
472+
473+ def fn(begin: int, end: int, device: torch.device, *, _launcher=_default_launcher):
474+ out = torch.zeros([1], dtype=torch.int32, device=device)
475+ _BLOCK_SIZE_0 = 32
476+ _launcher(_helion_fn, (triton.cdiv(end + -1 * begin, _BLOCK_SIZE_0),), out, begin, end, _BLOCK_SIZE_0, num_warps=4, num_stages=2)
477+ return out
478+
437479--- assertExpectedJournal(TestIndexing.test_tile_with_offset_block_ptr)
438480from __future__ import annotations
439481
0 commit comments