Skip to content

Commit eef9527

Browse files
committed
fix: add compatibility with v0.3.14 for Gemma3ChatHandler
1 parent 6639d3e commit eef9527

File tree

2 files changed

+67
-120
lines changed

2 files changed

+67
-120
lines changed

llama_cpp/llama_chat_format.py

Lines changed: 67 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2752,10 +2752,10 @@ def _create_bitmap_from_bytes(self, image_bytes: bytes):
27522752
(ctypes.c_uint8 * len(image_bytes)).from_buffer(bytearray(image_bytes)),
27532753
len(image_bytes)
27542754
)
2755-
2755+
27562756
if bitmap is None:
27572757
raise ValueError("Failed to create bitmap from image bytes")
2758-
2758+
27592759
return bitmap
27602760

27612761
def __call__(
@@ -2814,18 +2814,18 @@ def __call__(
28142814
trim_blocks=True,
28152815
lstrip_blocks=True,
28162816
).from_string(self.CHAT_FORMAT)
2817-
2817+
28182818
# Get the default media marker
28192819
media_marker = self._mtmd_cpp.mtmd_default_marker().decode('utf-8')
2820-
2820+
28212821
# Replace image URLs with media markers in the template
28222822
text = template.render(
28232823
messages=messages,
28242824
add_generation_prompt=True,
28252825
eos_token=llama.detokenize([llama.token_eos()]),
28262826
bos_token=llama.detokenize([llama.token_bos()]),
28272827
)
2828-
2828+
28292829
# Replace image URLs in text with media markers
28302830
for image_url in image_urls:
28312831
text = text.replace(image_url, media_marker)
@@ -2875,40 +2875,40 @@ def __call__(
28752875
# Process each chunk
28762876
n_past = llama_cpp.llama_pos(0)
28772877
n_chunks = self._mtmd_cpp.mtmd_input_chunks_size(chunks)
2878-
2878+
28792879
for i in range(n_chunks):
28802880
chunk = self._mtmd_cpp.mtmd_input_chunks_get(chunks, i)
28812881
if chunk is None:
28822882
continue
28832883

28842884
chunk_type = self._mtmd_cpp.mtmd_input_chunk_get_type(chunk)
2885-
2885+
28862886
if chunk_type == self._mtmd_cpp.MTMD_INPUT_CHUNK_TYPE_TEXT:
28872887
# Handle text chunk
28882888
n_tokens_out = ctypes.c_size_t()
28892889
tokens_ptr = self._mtmd_cpp.mtmd_input_chunk_get_tokens_text(
28902890
chunk, ctypes.byref(n_tokens_out)
28912891
)
2892-
2892+
28932893
if tokens_ptr and n_tokens_out.value > 0:
28942894
# Convert ctypes array to Python list
28952895
tokens = [tokens_ptr[j] for j in range(n_tokens_out.value)]
2896-
2896+
28972897
if llama.n_tokens + len(tokens) > llama.n_ctx():
28982898
raise ValueError(
28992899
f"Prompt exceeds n_ctx: {llama.n_tokens + len(tokens)} > {llama.n_ctx()}"
29002900
)
29012901
llama.eval(tokens)
2902-
2902+
29032903
elif chunk_type in [self._mtmd_cpp.MTMD_INPUT_CHUNK_TYPE_IMAGE, self._mtmd_cpp.MTMD_INPUT_CHUNK_TYPE_AUDIO]:
29042904
# Handle image/audio chunk using helper
29052905
chunk_n_tokens = self._mtmd_cpp.mtmd_input_chunk_get_n_tokens(chunk)
2906-
2906+
29072907
if llama.n_tokens + chunk_n_tokens > llama.n_ctx():
29082908
raise ValueError(
29092909
f"Prompt exceeds n_ctx: {llama.n_tokens + chunk_n_tokens} > {llama.n_ctx()}"
29102910
)
2911-
2911+
29122912
new_n_past = llama_cpp.llama_pos(0)
29132913
result = self._mtmd_cpp.mtmd_helper_eval_chunk_single(
29142914
self.mtmd_ctx,
@@ -2920,10 +2920,10 @@ def __call__(
29202920
False, # logits_last
29212921
ctypes.byref(new_n_past)
29222922
)
2923-
2923+
29242924
if result != 0:
29252925
raise ValueError(f"Failed to evaluate chunk: error code {result}")
2926-
2926+
29272927
# Update llama's token count
29282928
llama.n_tokens = new_n_past.value
29292929

@@ -3013,34 +3013,14 @@ def __call__(
30133013
grammar=grammar,
30143014
logit_bias=logit_bias,
30153015
)
3016-
3016+
30173017
if tool is not None:
30183018
tool_name = tool["function"]["name"]
30193019
return _convert_completion_to_chat_function(
30203020
tool_name, completion_or_chunks, stream
30213021
)
30223022
return _convert_completion_to_chat(completion_or_chunks, stream=stream)
30233023

3024-
def eval_image(self, llama: llama.Llama, image_url: str):
3025-
image_bytes = self.load_image(image_url)
3026-
embed = self._embed_image_bytes(image_bytes, llama.context_params.n_threads_batch)
3027-
if llama.n_tokens + embed.contents.n_image_pos > llama.n_ctx():
3028-
raise ValueError(
3029-
f"Prompt exceeds n_ctx: {llama.n_tokens + embed.contents.n_image_pos} > {llama.n_ctx()}"
3030-
)
3031-
n_past = ctypes.c_int(llama.n_tokens)
3032-
n_past_p = ctypes.pointer(n_past)
3033-
with suppress_stdout_stderr(disable=self.verbose):
3034-
self._llava_cpp.llava_eval_image_embed(
3035-
llama.ctx,
3036-
embed,
3037-
llama.n_batch,
3038-
n_past_p,
3039-
)
3040-
# Required to avoid issues with hf tokenizer
3041-
llama.input_ids[llama.n_tokens : n_past.value] = -1
3042-
llama.n_tokens = n_past.value
3043-
30443024
@staticmethod
30453025
def _load_image(image_url: str) -> bytes:
30463026
# TODO: Add Pillow support for other image formats beyond (jpg, png)
@@ -3533,6 +3513,58 @@ def __call__(self, **kwargs):
35333513
return super().__call__(**kwargs)
35343514

35353515

3516+
class Gemma3ChatHandler(Llava15ChatHandler):
3517+
# Chat Format:
3518+
# '<bos><start_of_turn>user\n{system_prompt}\n\n{prompt}<end_of_turn>\n<start_of_turn>model\n'
3519+
3520+
DEFAULT_SYSTEM_MESSAGE = None
3521+
3522+
CHAT_FORMAT = (
3523+
"{{ '<bos>' }}"
3524+
"{% if messages[0]['role'] == 'system' %}"
3525+
"{% if messages[0]['content'] is string %}"
3526+
"{% set first_user_prefix = messages[0]['content'] + '\n\n' %}"
3527+
"{% else %}"
3528+
"{% set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' %}"
3529+
"{% endif %}"
3530+
"{% set loop_messages = messages[1:] %}"
3531+
"{% else %}"
3532+
"{% set first_user_prefix = \"\" %}"
3533+
"{% set loop_messages = messages %}"
3534+
"{% endif %}"
3535+
"{% for message in loop_messages %}"
3536+
"{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}"
3537+
"{{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}"
3538+
"{% endif %}"
3539+
"{% if (message['role'] == 'assistant') %}"
3540+
"{% set role = \"model\" %}"
3541+
"{% else %}"
3542+
"{% set role = message['role'] %}"
3543+
"{% endif %}"
3544+
"{{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}"
3545+
"{% if message['content'] is string %}"
3546+
"{{ message['content'] | trim }}"
3547+
"{% elif message['content'] is iterable %}"
3548+
"{% for item in message['content'] %}"
3549+
"{% if item['type'] == 'image_url' and item['image_url'] is string %}"
3550+
"{{ '\n\n' + item['image_url'] + '\n\n' }}"
3551+
"{% elif item['type'] == 'image_url' and item['image_url'] is mapping %}"
3552+
"{{ '\n\n' + item['image_url']['url'] + '\n\n' }}"
3553+
"{% elif item['type'] == 'text' %}"
3554+
"{{ item['text'] | trim }}"
3555+
"{% endif %}"
3556+
"{% endfor %}"
3557+
"{% else %}"
3558+
"{{ raise_exception(\"Invalid content type\") }}"
3559+
"{% endif %}"
3560+
"{{ '<end_of_turn>\n' }}"
3561+
"{% endfor %}"
3562+
"{% if add_generation_prompt %}"
3563+
"{{ '<start_of_turn>model\n' }}"
3564+
"{% endif %}"
3565+
)
3566+
3567+
35363568
@register_chat_completion_handler("chatml-function-calling")
35373569
def chatml_function_calling(
35383570
llama: llama.Llama,

llama_cpp/llava_cpp.py

Lines changed: 0 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
c_int,
88
c_uint8,
99
c_float,
10-
c_size_t,
1110
c_void_p,
1211
POINTER,
1312
_Pointer, # type: ignore
@@ -157,87 +156,3 @@ def clip_model_load(
157156
def clip_free(ctx: clip_ctx_p, /):
158157
...
159158

160-
161-
# CLIP_API struct clip_image_u8 * clip_image_u8_init ();
162-
@ctypes_function("clip_image_u8_init", [], c_void_p)
163-
def clip_image_u8_init() -> Optional[c_void_p]:
164-
...
165-
166-
167-
# CLIP_API void clip_image_u8_free (struct clip_image_u8 * img);
168-
@ctypes_function("clip_image_u8_free", [c_void_p], None)
169-
def clip_image_u8_free(img: c_void_p, /):
170-
...
171-
172-
173-
# CLIP_API struct clip_image_f32_batch * clip_image_f32_batch_init();
174-
@ctypes_function("clip_image_f32_batch_init", [], c_void_p)
175-
def clip_image_f32_batch_init() -> Optional[c_void_p]:
176-
...
177-
178-
179-
# CLIP_API void clip_image_f32_batch_free(struct clip_image_f32_batch * batch);
180-
@ctypes_function("clip_image_f32_batch_free", [c_void_p], None)
181-
def clip_image_f32_batch_free(batch: c_void_p, /):
182-
...
183-
184-
185-
# /** preprocess img and store the result in res_imgs, pad_to_square may be overridden to false depending on model configuration */
186-
# CLIP_API bool clip_image_preprocess(struct clip_ctx * ctx, const struct clip_image_u8 * img, struct clip_image_f32_batch * res_imgs );
187-
@ctypes_function(
188-
"clip_image_preprocess",
189-
[
190-
clip_ctx_p_ctypes,
191-
c_void_p,
192-
c_void_p,
193-
],
194-
c_bool,
195-
)
196-
def clip_image_preprocess(
197-
ctx: clip_ctx_p,
198-
img: c_void_p,
199-
res_imgs: c_void_p,
200-
/,
201-
) -> bool:
202-
...
203-
204-
205-
# CLIP_API bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
206-
@ctypes_function(
207-
"clip_image_batch_encode",
208-
[
209-
clip_ctx_p_ctypes,
210-
c_int,
211-
c_void_p,
212-
POINTER(c_float),
213-
],
214-
c_bool,
215-
)
216-
def clip_image_batch_encode(
217-
ctx: clip_ctx_p,
218-
n_threads: c_int,
219-
imgs: c_void_p,
220-
vec: c_void_p,
221-
/,
222-
) -> bool:
223-
...
224-
225-
226-
# /** interpret bytes as an image file with length bytes_length, and use the result to populate img */
227-
# CLIP_API bool clip_image_load_from_bytes(const unsigned char * bytes, size_t bytes_length, struct clip_image_u8 * img);
228-
@ctypes_function(
229-
"clip_image_load_from_bytes",
230-
[
231-
c_void_p,
232-
c_size_t,
233-
c_void_p,
234-
],
235-
c_bool,
236-
)
237-
def clip_image_load_from_bytes(
238-
bytes: c_void_p,
239-
bytes_length: c_size_t,
240-
img: c_void_p,
241-
/,
242-
) -> bool:
243-
...

0 commit comments

Comments
 (0)