File tree Expand file tree Collapse file tree 2 files changed +23
-1
lines changed Expand file tree Collapse file tree 2 files changed +23
-1
lines changed Original file line number Diff line number Diff line change 18
18
19
19
from vllm .attention .backends .abstract import (AttentionBackend , AttentionImpl ,
20
20
AttentionLayer ,
21
- AttentionMetadata , AttentionType )
21
+ AttentionMetadata , AttentionType , AttentionMetadataBuilder )
22
22
from vllm .attention .backends .mla .common import MLACommonImpl
23
23
from vllm .attention .backends .utils import CommonAttentionState
24
24
from vllm_gaudi .attention .ops .hpu_paged_attn import (HPUPagedAttention ,
@@ -47,6 +47,10 @@ def get_metadata_cls() -> type["AttentionMetadata"]:
47
47
def get_state_cls () -> type ["CommonAttentionState" ]:
48
48
return CommonAttentionState
49
49
50
+ @staticmethod
51
+ def get_builder_cls () -> Type ["AttentionMetadataBuilder" ]:
52
+ return HPUAttentionMetadataBuilder
53
+
50
54
@staticmethod
51
55
def get_kv_cache_shape (
52
56
num_blocks : int ,
Original file line number Diff line number Diff line change 8
8
from typing import Optional
9
9
10
10
import torch
11
+ from vllm .attention .backends .abstract import AttentionMetadataBuilder
11
12
from vllm_gaudi .extension import cache_ops , ops
12
13
13
14
# Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
@@ -24,6 +25,23 @@ class HPUPagedAttentionMetadata:
24
25
alibi_blocks : Optional [torch .Tensor ]
25
26
26
27
28
+ @dataclass
29
+ class HPUPagedAttentionMetadataBuilder (AttentionMetadataBuilder [HPUPagedAttentionMetadata ]):
30
+
31
+ def __init__ (self , input_builder : "ModelRunnerInputBuilderBase" ) -> None :
32
+ """Create the builder, remember some configuration and parameters."""
33
+ self .input_builder = input_builder
34
+
35
+ def prepare (self ) -> None :
36
+ """Prepare for one batch."""
37
+ pass
38
+
39
+ def build (self , seq_lens : list [int ], query_lens : list [int ],
40
+ cuda_graph_pad_size : int , batch_size : int ) -> HPUPagedAttentionMetadata :
41
+ """Build attention metadata with on-device tensors."""
42
+ return HPUPagedAttentionMetadata
43
+
44
+
27
45
class HPUPagedAttention :
28
46
29
47
@staticmethod
You can’t perform that action at this time.
0 commit comments