Adding ccl_enabled flag during model loading and passing CCL lists during compilation process

vjanfaza · vjanfaza · commit a259effc9e59 · 2025-12-02T14:50:40.000-08:00
Signed-off-by: Vahid Janfaza &lt;vjanfaza@qti.qualcomm.com&gt;
diff --git a/examples/performance/compute_context_length/gemma3.py b/examples/performance/compute_context_length/gemma3.py
@@ -38,7 +38,7 @@
     model_id,
     config=config,
     attn_implementation="eager",
-    kv_offload=False,
+    kv_offload=True,
     qaic_config={
         "ccl_enabled":True,
     },
diff --git a/examples/performance/compute_context_length/molmo.py b/examples/performance/compute_context_length/molmo.py
@@ -33,7 +33,7 @@
 
 qeff_model = QEFFAutoModelForCausalLM.from_pretrained(
     model_id,
-    kv_offload=False,
+    kv_offload=True,
     trust_remote_code=True,
     config=config,
     qaic_config={