Skip to content

Commit 30ddd56

Browse files
authored
fix: rename op_offloat to op_offload in llama.py (#2046)
1 parent af63792 commit 30ddd56

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

llama_cpp/llama.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def __init__(
9292
embedding: bool = False,
9393
offload_kqv: bool = True,
9494
flash_attn: bool = False,
95-
op_offloat: Optional[bool] = None,
95+
op_offload: Optional[bool] = None,
9696
swa_full: Optional[bool] = None,
9797
# Sampling Params
9898
no_perf: bool = False,
@@ -174,7 +174,7 @@ def __init__(
174174
embedding: Embedding mode only.
175175
offload_kqv: Offload K, Q, V to GPU.
176176
flash_attn: Use flash attention.
177-
op_offloat: offload host tensor operations to device
177+
op_offload: offload host tensor operations to device
178178
swa_full: use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
179179
no_perf: Measure performance timings.
180180
last_n_tokens_size: Maximum number of tokens to keep in the last_n_tokens deque.
@@ -343,8 +343,8 @@ def __init__(
343343
self.context_params.offload_kqv = offload_kqv
344344
self.context_params.flash_attn = flash_attn
345345

346-
if op_offloat is not None:
347-
self.context_params.op_offloat = op_offloat
346+
if op_offload is not None:
347+
self.context_params.op_offload = op_offload
348348

349349
if swa_full is not None:
350350
self.context_params.swa_full = swa_full
@@ -2097,7 +2097,7 @@ def __getstate__(self):
20972097
embedding=self.context_params.embeddings,
20982098
offload_kqv=self.context_params.offload_kqv,
20992099
flash_attn=self.context_params.flash_attn,
2100-
op_offloat=self.context_params.op_offloat,
2100+
op_offload=self.context_params.op_offload,
21012101
swa_full=self.context_params.swa_full,
21022102
# Sampling Params
21032103
no_perf=self.context_params.no_perf,

0 commit comments

Comments
 (0)