pytorch
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/trunk.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/trunk.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/_passes/layout_transform.py
Lines changed: 1 addition & 1 deletion b/‎backends/qualcomm/_passes/layout_transform.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎backends/qualcomm/quantizer/annotators.py
Lines changed: 4 additions & 2 deletions b/‎backends/qualcomm/quantizer/annotators.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎backends/qualcomm/quantizer/custom_annotation.py
Lines changed: 7 additions & 4 deletions b/‎backends/qualcomm/quantizer/custom_annotation.py
Lines changed: 7 additions & 4 deletions
diff --git a/‎backends/qualcomm/scripts/build.sh
Lines changed: 8 additions & 0 deletions b/‎backends/qualcomm/scripts/build.sh
Lines changed: 8 additions & 0 deletions
diff --git a/‎backends/qualcomm/tests/test_qnn_delegate.py
Lines changed: 61 additions & 2 deletions b/‎backends/qualcomm/tests/test_qnn_delegate.py
Lines changed: 61 additions & 2 deletions
diff --git a/‎backends/test/suite/flow.py
Lines changed: 9 additions & 0 deletions b/‎backends/test/suite/flow.py
Lines changed: 9 additions & 0 deletions
diff --git a/‎backends/test/suite/flows/vulkan.py
Lines changed: 17 additions & 0 deletions b/‎backends/test/suite/flows/vulkan.py
Lines changed: 17 additions & 0 deletions
diff --git a/‎backends/vulkan/test/TARGETS
Lines changed: 10 additions & 0 deletions b/‎backends/vulkan/test/TARGETS
Lines changed: 10 additions & 0 deletions
@@ -1 +1 @@
-2dccff7dcf56b0d168ebfd7ca08bdeca37273c56
+ab43fe4bdf5ccd82897f0e982c451a0127bd175e
@@ -269,7 +269,7 @@ jobs:
         if [[ ${{ matrix.os}} == "bare_metal" ]]; then
           bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON"
         elif [[ ${{ matrix.os}} == "zephyr-preset" ]]; then
-          CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
+          CXXFLAGS=${cxx_flags} cmake --preset zephyr -DCMAKE_BUILD_TYPE=Release -DEXECUTORCH_OPTIMIZE_SIZE=ON -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out .
           cmake --build cmake-out -j9 --target install --config Release
           CXXFLAGS=${cxx_flags}  cmake -DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out -Bcmake-out/test test
           cmake --build cmake-out/test -j9 --config Release
 
@@ -103,8 +103,8 @@ class LayoutTransform(ExportPass):
         exir_ops.edge.aten.pow.Tensor_Scalar,
         exir_ops.edge.aten.prelu.default,
         exir_ops.edge.aten.repeat.default,
-        exir_ops.edge.aten.round.default,
         exir_ops.edge.aten.relu.default,
+        exir_ops.edge.aten.round.default,
         exir_ops.edge.aten.sigmoid.default,
         exir_ops.edge.aten.split_with_sizes.default,
         exir_ops.edge.aten.split_with_sizes_copy.default,
 
@@ -278,7 +278,9 @@ def annotate_masked_fill(node: Node, quantization_config: QuantizationConfig) ->
     )
 
 
-@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor])
+@register_annotator(
+    [torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul_.Tensor]
+)
 def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_binary(node, quantization_config)
 
@@ -1311,7 +1313,7 @@ def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None:
     )
 
 
-@register_annotator([torch.ops.aten.zeros.default])
+@register_annotator([torch.ops.aten.zeros.default, torch.ops.aten.zeros_like.default])
 def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None:
     if _is_annotated([node]) or not _is_float_tensor(node):
         return
 
@@ -153,7 +153,9 @@ def annotate_prefill_kv_output(gm: torch.fx.GraphModule, kv_quant_attrs: dict):
                 )
 
 
-def annotate_matmul_16a8w(gm: torch.fx.GraphModule) -> None:  # noqa: C901
+def annotate_matmul_16a8w(  # noqa: C901
+    gm: torch.fx.GraphModule, annotate_conv=True
+) -> None:
     """
     This function is specific for matmul op 16a8w.
     For k, we will tag such as the below, and
@@ -317,9 +319,10 @@ def annotate_matmul_input1(node: Node):
                 # The arguments of cat op: (the past kv cache, the new kv cache)
                 node = node.args[0][1]
             elif node.target == torch.ops.aten.conv2d.default:
-                annotate_conv2d(
-                    node, quantization_config=quantization_config_8a4w_per_channel
-                )
+                if annotate_conv:
+                    annotate_conv2d(
+                        node, quantization_config=quantization_config_8a4w_per_channel
+                    )
                 break
             elif node.target in [torch.ops.aten.add.Tensor, torch.ops.aten.sub.Tensor]:
                 break
 
@@ -85,6 +85,7 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DQNN_SDK_ROOT=$QNN_SDK_ROOT \
         -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK_ROOT/build/cmake/android.toolchain.cmake \
         -DANDROID_ABI='arm64-v8a' \
@@ -104,6 +105,9 @@ if [ "$BUILD_AARCH64" = true ]; then
         -DANDROID_ABI='arm64-v8a' \
         -DANDROID_PLATFORM=android-30 \
         -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
+        -DSUPPORT_REGEX_LOOKAHEAD=ON \
+        -DBUILD_TESTING=OFF \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
         -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
@@ -134,6 +138,7 @@ if [ "$BUILD_X86_64" = true ]; then
         -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
         -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
         -DEXECUTORCH_ENABLE_EVENT_TRACER=ON \
+        -DEXECUTORCH_ENABLE_LOGGING=ON \
         -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
         -S $PRJ_ROOT \
         -B $BUILD_ROOT \
@@ -157,6 +162,9 @@ if [ "$BUILD_X86_64" = true ]; then
        -DCMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH \
        -DCMAKE_FIND_ROOT_PATH_MODE_PACKAGE=BOTH \
        -DPYTHON_EXECUTABLE=$PYTHON_EXECUTABLE \
+       -DSUPPORT_REGEX_LOOKAHEAD=ON \
+       -DBUILD_TESTING=OFF \
+       -DEXECUTORCH_ENABLE_LOGGING=ON \
        -B$EXAMPLE_ROOT
 
    cmake --build $EXAMPLE_ROOT -j$BUILD_JOB_NUMBER
 
@@ -4049,7 +4049,7 @@ def test_llama3_2_1b(self):
             "16a4w",
             "--temperature",
             "0",
-            "--llama_model",
+            "--decoder_model",
             "llama3_2",
             "--model_mode",
             "hybrid",
@@ -4129,7 +4129,7 @@ def test_llama_stories_110m(self):
             "16a4w",
             "--temperature",
             "0",
-            "--llama_model",
+            "--decoder_model",
             "stories110m",
             "--model_mode",
             "hybrid",
@@ -4171,6 +4171,65 @@ def test_llama_stories_110m(self):
                 if not self.compile_only and not self.enable_x86_64:
                     self.assertGreaterEqual(msg["inference_speed"], 220)  # Lanai
 
+    def test_qwen2_5(self):
+        if not self.required_envs():
+            self.skipTest("missing required envs")
+
+        prompt = "My favourite condiment is "
+        cmds = [
+            "python",
+            f"{self.executorch_root}/examples/qualcomm/oss_scripts/llama/llama.py",
+            "--artifact",
+            self.artifact_dir,
+            "--build_folder",
+            self.build_folder,
+            "--model",
+            self.model,
+            "--ip",
+            self.ip,
+            "--port",
+            str(self.port),
+            "--prompt",
+            f"{prompt}",
+            "--ptq",
+            "16a8w",
+            "--decoder_model",
+            "qwen2_5",
+            "--model_mode",
+            "hybrid",
+            "--prefill_ar_len",
+            "32",
+            "--max_seq_len",
+            "128",
+        ]
+        if self.compile_only:
+            cmds.extend(["--compile_only"])
+        elif self.device:
+            cmds.extend(["--device", self.device])
+        if self.host:
+            cmds.extend(["--host", self.host])
+        elif self.enable_x86_64:
+            cmds.extend(["--enable_x86_64"])
+        if self.pre_gen_pte:
+            cmds.extend(["--pre_gen_pte", self.pre_gen_pte])
+
+        # Accuracy is bad for now. Just check user's prompt is returned.
+        golden_start_with = "My favourite condiment is "
+        p = subprocess.Popen(cmds, stdout=subprocess.DEVNULL)
+        with Listener((self.ip, self.port)) as listener:
+            conn = listener.accept()
+            p.communicate()
+            msg = json.loads(conn.recv())
+            if "Error" in msg:
+                self.fail(msg["Error"])
+            else:
+                model_out = msg["result"][0]
+                self.assertTrue(
+                    model_out.startswith(golden_start_with),
+                    f"Expected Output: {golden_start_with}. Actual Output: {model_out}",
+                )
+                self.assertGreaterEqual(msg["inference_speed"], 95)  # Lanai
+
 
 class TestExampleOssScript(TestQNN):
     def test_albert(self):
 
@@ -62,4 +62,13 @@ def all_flows() -> dict[str, TestFlow]:
     except Exception as e:
         logger.info(f"Skipping Core ML flow registration: {e}")
 
+    try:
+        from executorch.backends.test.suite.flows.vulkan import VULKAN_TEST_FLOW
+
+        flows += [
+            VULKAN_TEST_FLOW,
+        ]
+    except Exception as e:
+        logger.info(f"Skipping Vulkan flow registration: {e}")
+
     return {f.name: f for f in flows if f is not None}
@@ -0,0 +1,17 @@
+from executorch.backends.test.suite.flow import TestFlow
+from executorch.backends.vulkan.test.tester import VulkanTester
+
+
+def _create_vulkan_flow(
+    name: str,
+    quantize: bool = False,
+) -> TestFlow:
+    return TestFlow(
+        name,
+        backend="vulkan",
+        tester_factory=VulkanTester,
+        quantize=quantize,
+    )
+
+
+VULKAN_TEST_FLOW = _create_vulkan_flow("vulkan")
@@ -1,4 +1,5 @@
 load("@fbcode_macros//build_defs:python_unittest.bzl", "python_unittest")
+load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")
 
 oncall("executorch")
 
@@ -57,3 +58,12 @@ python_unittest(
         "//executorch/backends/vulkan:vulkan_preprocess",
     ],
 )
+
+runtime.python_library(
+    name = "tester",
+    srcs = ["tester.py"],
+    deps = [
+        "//executorch/backends/vulkan/partitioner:vulkan_partitioner",
+        "//executorch/backends/vulkan:vulkan_preprocess",
+    ]
+)
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-2dccff7dcf56b0d168ebfd7ca08bdeca37273c56`
	`1`	`+ab43fe4bdf5ccd82897f0e982c451a0127bd175e`
Original file line number	Diff line number	Diff line change
`@@ -278,7 +278,9 @@ def annotate_masked_fill(node: Node, quantization_config: QuantizationConfig) ->`
`278`	`278`	`)`
`279`	`279`
`280`	`280`
`281`		`-@register_annotator([torch.ops.aten.mul, torch.ops.aten.mul.Tensor])`
	`281`	`+@register_annotator(`
	`282`	`+ [torch.ops.aten.mul, torch.ops.aten.mul.Tensor, torch.ops.aten.mul_.Tensor]`
	`283`	`+)`
`282`	`284`	`def annotate_mul(node: Node, quantization_config: QuantizationConfig) -> None:`
`283`	`285`	`annotate_binary(node, quantization_config)`
`284`	`286`
`@@ -1311,7 +1313,7 @@ def annotate_where(node: Node, quantization_config: QuantizationConfig) -> None:`
`1311`	`1313`	`)`
`1312`	`1314`
`1313`	`1315`
`1314`		`-@register_annotator([torch.ops.aten.zeros.default])`
	`1316`	`+@register_annotator([torch.ops.aten.zeros.default, torch.ops.aten.zeros_like.default])`
`1315`	`1317`	`def annotate_zeros(node: Node, quantization_config: QuantizationConfig) -> None:`
`1316`	`1318`	`if _is_annotated([node]) or not _is_float_tensor(node):`
`1317`	`1319`	`return`