removing gated llama3.3-70B model from test + lint/format

vaibverm · vaibverm · commit 40af1718d107 · 2025-11-20T13:15:30.000-06:00
Signed-off-by: Vaibhav Verma &lt;vaibverm@qti.qualcomm.com&gt;
diff --git a/QEfficient/customop/__init__.py b/QEfficient/customop/__init__.py
@@ -5,10 +5,16 @@
 #
 # -----------------------------------------------------------------------------
 
-from QEfficient.customop.ctx_scatter_gather import CtxGatherFunc, CtxGatherFuncBlockedKV, CtxGatherFunc3D, CtxScatterFunc, CtxScatterFunc3D
+from QEfficient.customop.ctx_scatter_gather import (
+    CtxGatherFunc,
+    CtxGatherFunc3D,
+    CtxGatherFuncBlockedKV,
+    CtxScatterFunc,
+    CtxScatterFunc3D,
+)
 from QEfficient.customop.ctx_scatter_gather_cb import (
-    CtxGatherFuncCB,
     CtxGatherFuncBlockedKVCB,
+    CtxGatherFuncCB,
     CtxGatherFuncCB3D,
     CtxScatterFuncCB,
     CtxScatterFuncCB3D,
diff --git a/QEfficient/customop/ctx_scatter_gather.py b/QEfficient/customop/ctx_scatter_gather.py
@@ -146,6 +146,7 @@ def setup_context(ctx, inputs, outputs):
     def symbolic(g: torch.Graph, data: torch.Value, ctx_indices: torch.Value, comp_ctx_len: int) -> torch.Value:
         return g.onnxscript_op(CtxGather, data, ctx_indices, comp_ctx_len).setTypeAs(data)
 
+
 @onnxscript.script(onnxscript.values.Opset("com.qualcomm.cloud", 1))
 def CtxGatherBlockedKV(data: onnxscript.FLOAT, ctx_indices: onnxscript.INT32) -> onnxscript.FLOAT:
     ctx_indices = ops.Unsqueeze(ctx_indices, [-1])
diff --git a/QEfficient/transformers/cache_utils.py b/QEfficient/transformers/cache_utils.py
@@ -14,10 +14,10 @@
 
 from QEfficient.customop import (
     CtxGatherFunc,
-    CtxGatherFuncBlockedKV,
     CtxGatherFunc3D,
-    CtxGatherFuncCB,
+    CtxGatherFuncBlockedKV,
     CtxGatherFuncBlockedKVCB,
+    CtxGatherFuncCB,
     CtxGatherFuncCB3D,
     CtxScatterFunc,
     CtxScatterFunc3D,
diff --git a/QEfficient/transformers/models/pytorch_transforms.py b/QEfficient/transformers/models/pytorch_transforms.py
@@ -5,8 +5,8 @@
 #
 # -----------------------------------------------------------------------------
 
-from functools import partial
 import warnings
+from functools import partial
 from types import MethodType
 from typing import Callable, Optional, Tuple, Union
 
diff --git a/tests/transformers/models/test_causal_lm_models.py b/tests/transformers/models/test_causal_lm_models.py
@@ -68,7 +68,8 @@
 ]
 
 test_models_blockedKV = [
-    "meta-llama/Llama-3.3-70B-Instruct",
+    # "meta-llama/Llama-3.3-70B-Instruct",
+    "meta-llama/Llama-3.2-1B",
 ]
 
 
@@ -248,7 +249,11 @@ def check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
         pytorch_hf_tokens = [pytorch_hf_tokens for _ in range(full_batch_size)]
 
     qeff_model = QEFFAutoModelForCausalLM(
-        model_hf, continuous_batching=True, is_tlm=is_tlm, pretrained_model_name_or_path=model_name, qaic_config=qaic_config
+        model_hf,
+        continuous_batching=True,
+        is_tlm=is_tlm,
+        pretrained_model_name_or_path=model_name,
+        qaic_config=qaic_config,
     )
     onnx_model_path = qeff_model.export()
 
@@ -505,9 +510,8 @@ def test_causal_blockedKV_pytorch_vs_kv_vs_ort_vs_ai100(model_name):
     n_layer = get_custom_n_layers(model_name)
 
     qaic_config = dict(num_kv_blocks=Constants.NUM_KV_BLOCKS)
-    check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
-        model_name=model_name, n_layer=n_layer, qaic_config=qaic_config
-    )
+    check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(model_name=model_name, n_layer=n_layer, qaic_config=qaic_config)
+
 
 @pytest.mark.parametrize("model_name", test_models_blockedKV)
 def test_causal_nonBlockedKV_pytorch_vs_kv_vs_ort_vs_ai100(model_name):
@@ -518,7 +522,4 @@ def test_causal_nonBlockedKV_pytorch_vs_kv_vs_ort_vs_ai100(model_name):
     """
     n_layer = get_custom_n_layers(model_name)
 
-    check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(
-        model_name=model_name, n_layer=n_layer
-    )
-
+    check_causal_lm_pytorch_vs_kv_vs_ort_vs_ai100(model_name=model_name, n_layer=n_layer)

Original file line number	Diff line number	Diff line change
`@@ -5,8 +5,8 @@`
`5`	`5`	`#`
`6`	`6`	`# -----------------------------------------------------------------------------`
`7`	`7`
`8`		`-from functools import partial`
`9`	`8`	`import warnings`
	`9`	`+from functools import partial`
`10`	`10`	`from types import MethodType`
`11`	`11`	`from typing import Callable, Optional, Tuple, Union`
`12`	`12`