openvinotoolkit
diff --git a/‎src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp‎
Lines changed: 2 additions & 3 deletions b/‎src/common/transformations/src/transformations/common_optimizations/moc_transformations.cpp‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/plugins/intel_gpu/include/intel_gpu/op/moe_3gemm_fused_compressed.hpp‎
Lines changed: 46 additions & 0 deletions b/‎src/plugins/intel_gpu/include/intel_gpu/op/moe_3gemm_fused_compressed.hpp‎
Lines changed: 46 additions & 0 deletions
diff --git a/‎src/plugins/intel_gpu/include/intel_gpu/op/moe_compressed.hpp‎
Lines changed: 1 addition & 0 deletions b/‎src/plugins/intel_gpu/include/intel_gpu/op/moe_compressed.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp‎
Lines changed: 1 addition & 0 deletions b/‎src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/plugins/intel_gpu/include/intel_gpu/primitives/moe_3gemm_fused_compressed.hpp‎
Lines changed: 72 additions & 0 deletions b/‎src/plugins/intel_gpu/include/intel_gpu/primitives/moe_3gemm_fused_compressed.hpp‎
Lines changed: 72 additions & 0 deletions
@@ -291,10 +291,9 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr<ov::Model>
     REGISTER_PASS(manager, ConstantFolding)
     REGISTER_PASS(manager, SymbolicOptimizations)
     REGISTER_PASS(manager, ResolveNameCollisions, true);
-    // todo: enable after plugin support for MoE
-    // Remove pytestmark to enable e2e test:
+    // TODO: Remove pytestmark to enable e2e test:
     // tests/model_hub_tests/transformation_tests/test_moe_transformation.py
-    // REGISTER_PASS(manager, FuseMOE)
+    REGISTER_PASS(manager, FuseMOE)
     REGISTER_PASS(manager, VectorizedMOE2GEMMTransposeWeights)
 
     manager.run_passes(f);
 
@@ -0,0 +1,46 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "intel_gpu/op/moe_compressed.hpp"
+
+namespace ov::intel_gpu::op {
+
+/// \brief MOE3GemmFusedCompressed that support compressed and fused MOE for GEMM3_SWIGLU.
+class MOE3GemmFusedCompressed : public MOECompressed {
+public:
+    OPENVINO_OP("MOE3GemmFusedCompressed", "gpu_opset", MOECompressed);
+
+    MOE3GemmFusedCompressed() = default;
+
+    /// \brief Constructs a MOE3GemmFusedCompressed operation with config only
+    /// \param args The input tensors, in the following order:
+    ///   0: hidden_states - input tensor with hidden representations
+    ///   1: routing_weights - [num_seq, num_experts] routing weights for all experts
+    ///   2: w0_weight - expert weights for first projection,
+    ///   shape [num_experts, inter_size, group_num, group_size]
+    ///   3: w0_scale - expert scale for first projection for compressed experts,
+    ///   shape [num_experts, inter_size, group_num, 1]
+    ///   4: w0_zp - expert zp for first projection for compressed experts,
+    ///   shape [num_experts, inter_size, group_num, 1]
+    ///   5: w1_weight - expert weights for second projection,
+    ///   shape [num_experts, inter_size, group_num, group_size]
+    ///   6: w1_scale - expert scale for second projection for compressed experts,
+    ///   shape [num_experts, inter_size, group_num, 1]
+    ///   7: w1_zp - expert zp for second projection for compressed experts,
+    ///   shape [num_experts, inter_size, group_num, 1]
+    ///   8: w2_weight - expert weights for final projection,
+    ///   shape [num_experts, hidden_size, group_num, group_size]
+    ///   9: w2_scale - expert scale for final projection for compressed experts,
+    ///   shape [num_experts, hidden_size, group_num, 1]
+    ///   10: w2_zp - expert zp for final projection for compressed experts,
+    ///   shape [num_experts, hidden_size, group_num, 1]
+    /// \param config Configuration for the MOE 3GEMM SWIGLU fused operation
+    MOE3GemmFusedCompressed(const OutputVector& args, const MOECompressed::Config config);
+
+    std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override;
+};
+
+}  // namespace ov::intel_gpu::op
@@ -15,6 +15,7 @@ class MOECompressed : public ov::op::internal::MOE {
     OPENVINO_OP("MOECompressed", "gpu_opset", ov::op::internal::MOE);
 
     MOECompressed() = default;
+    MOECompressed(const OutputVector& args) : MOE(args) {}
 
     struct Config : public MOE::Config {
         size_t hidden_size = 0;
 
@@ -312,4 +312,5 @@ REGISTER_FACTORY(internal, PagedAttentionExtension);
 REGISTER_FACTORY(internal, LoraSubgraph);
 REGISTER_FACTORY(internal, LoraSubgraphFused);
 REGISTER_FACTORY(internal, VLSDPA);
+REGISTER_FACTORY(internal, MOE3GemmFusedCompressed);
 REGISTER_FACTORY(internal, MOECompressed);
@@ -0,0 +1,72 @@
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include <vector>
+
+#include "intel_gpu/op/moe_3gemm_fused_compressed.hpp"
+#include "intel_gpu/runtime/engine.hpp"
+#include "primitive.hpp"
+
+namespace cldnn {
+using MOE3GemmFusedCompressed = ov::intel_gpu::op::MOE3GemmFusedCompressed;
+
+/// @brief moe compressed primitive
+/// @details Performs moe compressed
+struct moe_3gemm_fused_compressed : public primitive_base<moe_3gemm_fused_compressed> {
+    CLDNN_DECLARE_PRIMITIVE(moe_3gemm_fused_compressed)
+
+    moe_3gemm_fused_compressed() : primitive_base("", {}) {}
+
+    // @brief Constructs moe primitive / layer.
+    //
+    // @param id      An identifier of new primitive.
+    // @param inputs  A list of Input primitive ids (inputs).
+    //                   0: hidden_states - input tensor with hidden representations
+    //                   1: routing_weights - [num_seq, num_experts] routing weights for all experts
+    //                   2: w0_weight - expert weights for first projection,
+    //                      shape [num_experts, inter_size, group_num, group_size]
+    //                   3: w0_scale - expert scale for first projection for compressed experts,
+    //                      shape [num_experts, inter_size, group_num, 1]
+    //                   4: w0_zp - expert zp for first projection for compressed experts,
+    //                      shape [num_experts, inter_size, group_num, 1]
+    //                   5: w1_weight - expert weights for second projection,
+    //                      shape [num_experts, inter_size, group_num, group_size]
+    //                   6: w1_scale - expert scale for second projection for compressed experts,
+    //                      shape [num_experts, inter_size, group_num, 1]
+    //                   7: w1_zp - expert zp for second projection for compressed experts,
+    //                      shape [num_experts, inter_size, group_num, 1]
+    //                   8: w2_weight - expert weights for final projection,
+    //                      shape [num_experts, hidden_size, group_num, group_size]
+    //                   9: w2_scale - expert scale for final projection for compressed experts,
+    //                      shape [num_experts, hidden_size, group_num, 1]
+    //                   10: w2_zp - expert zp for final projection for compressed experts,
+    //
+    moe_3gemm_fused_compressed(const primitive_id& id, const std::vector<input_info>& inputs, const MOE3GemmFusedCompressed::Config& config)
+        : primitive_base(id, inputs, 1, {optional_data_type()}),
+          _config(config) {}
+
+    MOE3GemmFusedCompressed::Config _config;
+
+    bool operator==(const primitive& rhs) const override {
+        if (!compare_common_params(rhs))
+            return false;
+
+        auto rhs_casted = downcast<const moe_3gemm_fused_compressed>(rhs);
+
+        return std::memcmp(&_config, &rhs_casted._config, sizeof(_config)) == 0;
+    }
+
+    void save(BinaryOutputBuffer& ob) const override {
+        primitive_base<moe_3gemm_fused_compressed>::save(ob);
+        ob << make_data(&_config, sizeof(_config));
+    }
+
+    void load(BinaryInputBuffer& ib) override {
+        primitive_base<moe_3gemm_fused_compressed>::load(ib);
+        ib >> make_data(&_config, sizeof(_config));
+    }
+};
+
+}  // namespace cldnn