[ONNX] Support reshaping LSTM input (#34657)

mvafin · web-flow · commit 44967ef88c06 · 2026-03-13T08:28:00.000Z
### Details: - *Support reshaping LSTM input that is missing `num_directions` dimension* ### Tickets: - *CVS-182578* ### AI Assistance: - *AI assistance used: no / yes* - *If yes, summarize how AI was used and what human validation was performed (build/tests/manual checks).*
diff --git a/src/frontends/onnx/frontend/src/op/lstm.cpp b/src/frontends/onnx/frontend/src/op/lstm.cpp
@@ -14,6 +14,7 @@
 #include "openvino/op/multiply.hpp"
 #include "openvino/op/shape_of.hpp"
 #include "openvino/op/squeeze.hpp"
+#include "openvino/op/unsqueeze.hpp"
 #include "openvino/util/common_util.hpp"
 #include "utils/reshape.hpp"
 #include "utils/split.hpp"
@@ -38,10 +39,12 @@ enum class LSTMInput {
     LSTM_INPUT_P
 };
 
-// Normalize tensor rank to target_rank by squeezing leading dimensions of size 1.
-// This handles models where upstream Unsqueeze operations add extra leading dimensions.
-// Static validation: if leading dims are statically known and != 1, emit clear error at conversion time.
-// If extra dimensions are != 1 at runtime, Squeeze will fail with a clear error.
+// Normalize tensor rank to target_rank:
+// - If rank > target: squeeze leading dimensions of size 1 (handles upstream Unsqueeze ops).
+//   Static validation rejects leading dims that are statically != 1.
+// - If rank < target by exactly 1: unsqueeze a leading dimension of size 1
+//   (handles models that omit the num_directions=1 dimension for unidirectional LSTMs).
+// - If rank differs by more than the above, throw an error for malformed input.
 ov::Output<ov::Node> normalize_tensor_rank(const ov::Output<ov::Node>& input,
                                            int64_t target_rank,
                                            const std::string& input_name) {
@@ -86,14 +89,23 @@ ov::Output<ov::Node> normalize_tensor_rank(const ov::Output<ov::Node>& input,
         return std::make_shared<v0::Squeeze>(input, axes_const);
     }
 
-    // input_rank < target_rank: reject non-conformant models with missing num_directions dimension
-    OPENVINO_THROW("LSTM input '",
-                   input_name,
-                   "' has rank ",
-                   input_rank.get_length(),
-                   " but expected ",
-                   target_rank,
-                   ". Missing num_directions dimension cannot be automatically inferred.");
+    // input_rank < target_rank: only allow exactly 1 missing dimension (the num_directions dim).
+    // For unidirectional LSTM (forward/reverse), num_directions=1, so some models omit it.
+    // A larger rank deficiency indicates a genuinely malformed model.
+    const auto dims_to_unsqueeze = target_rank - input_rank.get_length();
+    if (dims_to_unsqueeze != 1) {
+        OPENVINO_THROW("LSTM input '",
+                       input_name,
+                       "' has rank ",
+                       input_rank.get_length(),
+                       " but expected ",
+                       target_rank,
+                       ". Rank difference is ",
+                       dims_to_unsqueeze,
+                       " but only 1 (missing num_directions) is supported.");
+    }
+    auto axes_const = v0::Constant::create(ov::element::i64, Shape{1}, std::vector<int64_t>{0});
+    return std::make_shared<v0::Unsqueeze>(input, axes_const);
 }
 
 struct LSTMNgInputMap {
@@ -116,6 +128,24 @@ struct LSTMNgInputMap {
 
         m_input_map[LSTMInput::LSTM_INPUT_X] = input_x;
 
+        // Detect if num_directions dimension is missing from W.
+        // Some models omit the leading num_directions dimension when it equals 1.
+        // normalize_tensor_rank will unsqueeze it, but we need to squeeze the
+        // corresponding dimension from outputs to match the original model's expectations.
+        const auto& w_rank = ng_inputs.at(1).get_partial_shape().rank();
+        if (w_rank.is_static() && w_rank.get_length() < 3) {
+            // Unsqueezing adds num_directions=1, which is only valid for forward/reverse.
+            // For bidirectional LSTMs, num_directions=2 and cannot be inferred.
+            const std::string direction =
+                ov::util::to_lower(node.get_attribute_value<std::string>("direction", "forward"));
+            OPENVINO_ASSERT(direction != "bidirectional",
+                            "LSTM input 'W' has rank ",
+                            w_rank.get_length(),
+                            " but expected 3. Cannot add num_directions dimension for bidirectional LSTM "
+                            "because num_directions=2 cannot be inferred from the data.");
+            m_num_directions_unsqueezed = true;
+        }
+
         // Weight tensor for the gates.
         // ONNX Shape: [num_directions, 4*hidden_size, input_size]
         auto input_w = normalize_tensor_rank(ng_inputs.at(1), 3, "W");
@@ -252,6 +282,9 @@ struct LSTMNgInputMap {
         return m_input_map.at(key);
     }
     std::map<LSTMInput, ov::Output<ov::Node>> m_input_map;
+    // True when num_directions dimension was missing from inputs and was added via Unsqueeze.
+    // In this case, outputs need the num_directions dimension squeezed to match the original model.
+    bool m_num_directions_unsqueezed = false;
 };
 
 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ATTRIBUTES PARSING ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -308,6 +341,21 @@ ov::OutputVector lstm(const ov::frontend::onnx::Node& node) {
     const auto Y_h = lstm_sequence->output(1);
     const auto Y_c = lstm_sequence->output(2);
 
+    if (input_map.m_num_directions_unsqueezed) {
+        // The num_directions dimension was added to inputs via Unsqueeze.
+        // Squeeze it from outputs so downstream consumers see the original ranks.
+        // Y: OV [batch_size, num_directions(1), seq_length, hidden_size] -> ONNX [seq_length, batch_size, hidden_size]
+        // Y_h: OV [batch_size, num_directions(1), hidden_size] -> ONNX [batch_size, hidden_size]
+        // Y_c: OV [batch_size, num_directions(1), hidden_size] -> ONNX [batch_size, hidden_size]
+        auto num_dir_axis = v0::Constant::create(ov::element::i64, Shape{1}, {1});
+        auto Y_squeezed = std::make_shared<v0::Squeeze>(Y, num_dir_axis);
+        auto Y_h_squeezed = std::make_shared<v0::Squeeze>(Y_h, num_dir_axis);
+        auto Y_c_squeezed = std::make_shared<v0::Squeeze>(Y_c, num_dir_axis);
+
+        // Y: [batch_size, seq_length, hidden_size] -> [seq_length, batch_size, hidden_size]
+        return {ov::op::util::reorder_axes(Y_squeezed, {1, 0, 2}), Y_h_squeezed, Y_c_squeezed};
+    }
+
     return {ov::op::util::reorder_axes(Y, {2, 1, 0, 3}),
             ov::op::util::reorder_axes(Y_h, {1, 0, 2}),
             ov::op::util::reorder_axes(Y_c, {1, 0, 2})};
diff --git a/src/frontends/onnx/tests/models/lstm_missing_num_directions.prototxt b/src/frontends/onnx/tests/models/lstm_missing_num_directions.prototxt
@@ -0,0 +1,101 @@
+ir_version: 7
+producer_name: "OpenVINO Test"
+graph {
+  node {
+    input: "X"
+    input: "W"
+    input: "R"
+    input: "B"
+    output: "Y"
+    output: "Y_h"
+    output: "Y_c"
+    op_type: "LSTM"
+    attribute {
+      name: "direction"
+      s: "forward"
+      type: STRING
+    }
+    attribute {
+      name: "hidden_size"
+      i: 2
+      type: INT
+    }
+  }
+  name: "lstm_missing_num_directions"
+  input {
+    name: "X"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim { dim_value: 3 }
+          dim { dim_value: 2 }
+          dim { dim_value: 4 }
+        }
+      }
+    }
+  }
+  input {
+    name: "W"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim { dim_value: 8 }
+          dim { dim_value: 4 }
+        }
+      }
+    }
+  }
+  input {
+    name: "R"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim { dim_value: 8 }
+          dim { dim_value: 2 }
+        }
+      }
+    }
+  }
+  input {
+    name: "B"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim { dim_value: 16 }
+        }
+      }
+    }
+  }
+  output {
+    name: "Y"
+    type {
+      tensor_type {
+        elem_type: 1
+      }
+    }
+  }
+  output {
+    name: "Y_h"
+    type {
+      tensor_type {
+        elem_type: 1
+      }
+    }
+  }
+  output {
+    name: "Y_c"
+    type {
+      tensor_type {
+        elem_type: 1
+      }
+    }
+  }
+}
+opset_import {
+  domain: ""
+  version: 7
+}
diff --git a/src/frontends/onnx/tests/onnx_import_rnn.in.cpp b/src/frontends/onnx/tests/onnx_import_rnn.in.cpp
@@ -609,6 +609,109 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_rank4_with_unsqueeze) {
     test_case.run_with_tolerance_as_fp(1.0e-4f);
 }
 
+// Test for LSTM with inputs missing the num_directions dimension.
+// Some models omit the leading num_directions=1 dimension from W, R, B.
+// The converter should unsqueeze it automatically and squeeze it from outputs.
+// W shape: [8, 4] instead of [1, 8, 4], R: [8, 2] instead of [1, 8, 2], B: [16] instead of [1, 16]
+// Expected output Y shape: [3, 2, 2] (no num_directions), not [3, 1, 2, 2]
+OPENVINO_TEST(${BACKEND_NAME}, onnx_model_lstm_missing_num_directions) {
+    auto model = convert_model("lstm_missing_num_directions.onnx");
+    auto test_case = ov::test::TestCase(model, s_device);
+
+    // X: [seq_length=3, batch_size=2, input_size=4]
+    std::vector<float> X = {0.49671414494514465f, -0.13826429843902588f, 0.6476885676383972f,  1.5230298042297363f,
+                            -0.2341533750295639f, -0.23413695394992828f, 1.5792127847671509f,  0.7674347162246704f,
+                            -0.4694743752479553f, 0.5425600409507751f,   -0.4634176790714264f, -0.4657297432422638f,
+                            0.241962268948555f,   -1.9132802486419678f,  -1.7249178886413574f, -0.5622875094413757f,
+                            -1.0128310918807983f, 0.31424733996391296f,  -0.9080240726470947f, -1.4123036861419678f,
+                            1.4656487703323364f,  -0.2257762998342514f,  0.06752820312976837f, -1.424748182296753f};
+
+    // W: [4*hidden_size=8, input_size=4] - missing num_directions dimension
+    std::vector<float> W = {-0.5443827509880066f,   0.11092258989810944f, -1.1509935855865479f,  0.3756980299949646f,
+                            -0.6006386876106262f,   -0.2916937470436096f, -0.6017066240310669f,  1.852278232574463f,
+                            -0.013497225008904934f, -1.057710886001587f,  0.8225449323654175f,   -1.2208436727523804f,
+                            0.20886360108852386f,   -1.959670066833496f,  -1.32818603515625f,    0.19686123728752136f,
+                            0.7384665608406067f,    0.1713682860136032f,  -0.1156482845544815f,  -0.3011036813259125f,
+                            -1.4785219430923462f,   -0.7198442220687866f, -0.46063876152038574f, 1.0571222305297852f,
+                            0.3436183035373688f,    -1.7630401849746704f, 0.32408398389816284f,  -0.38508227467536926f,
+                            -0.6769220232963562f,   0.6116762757301331f,  1.0309995412826538f,   0.9312801361083984f};
+
+    // R: [4*hidden_size=8, hidden_size=2] - missing num_directions dimension
+    std::vector<float> R = {-0.8392175436019897f,
+                            -0.3092123866081238f,
+                            0.3312634229660034f,
+                            0.9755451083183289f,
+                            -0.4791742265224457f,
+                            -0.18565897643566132f,
+                            -1.106334924697876f,
+                            -1.1962065696716309f,
+                            0.8125258088111877f,
+                            1.3562400341033936f,
+                            -0.07201012223958969f,
+                            1.003532886505127f,
+                            0.3616360127925873f,
+                            -0.6451197266578674f,
+                            0.36139559745788574f,
+                            1.538036584854126f};
+
+    // B: [8*hidden_size=16] - missing num_directions dimension
+    std::vector<float> B = {-0.03582603856921196f,
+                            1.5646436214447021f,
+                            -2.6197450160980225f,
+                            0.8219025135040283f,
+                            0.08704707026481628f,
+                            -0.2990073561668396f,
+                            0.0917607769370079f,
+                            -1.9875688552856445f,
+                            -0.21967189013957977f,
+                            0.3571125566959381f,
+                            1.4778940677642822f,
+                            -0.5182701945304871f,
+                            -0.8084936141967773f,
+                            -0.501757025718689f,
+                            0.9154021143913269f,
+                            0.3287511169910431f};
+
+    // Expected outputs - verified against reference model with proper [1, ...] shapes
+    // Y: [seq_length=3, batch_size=2, hidden_size=2] - no num_directions dim
+    std::vector<float> expected_Y = {0.022259337827563286f,
+                                     0.003388261189684272f,
+                                     0.05276688188314438f,
+                                     0.11502056568861008f,
+                                     0.00648046750575304f,
+                                     -0.2669661343097687f,
+                                     0.35137197375297546f,
+                                     -0.5121785998344421f,
+                                     0.13379308581352234f,
+                                     -0.4288314878940582f,
+                                     0.37510907649993896f,
+                                     -0.0907968208193779f};
+
+    // Y_h: [batch_size=2, hidden_size=2] - no num_directions dim
+    std::vector<float> expected_Y_h = {0.13379308581352234f,
+                                       -0.4288314878940582f,
+                                       0.37510907649993896f,
+                                       -0.0907968208193779f};
+
+    // Y_c: [batch_size=2, hidden_size=2] - no num_directions dim
+    std::vector<float> expected_Y_c = {0.35507577657699585f,
+                                       -0.7553168535232544f,
+                                       0.6096105575561523f,
+                                       -0.12818995118141174f};
+
+    test_case.add_input<float>(Shape{3, 2, 4}, X);
+    test_case.add_input<float>(Shape{8, 4}, W);
+    test_case.add_input<float>(Shape{8, 2}, R);
+    test_case.add_input<float>(Shape{16}, B);
+
+    // Output shapes lack the num_directions dimension
+    test_case.add_expected_output<float>(Shape{3, 2, 2}, expected_Y);
+    test_case.add_expected_output<float>(Shape{2, 2}, expected_Y_h);
+    test_case.add_expected_output<float>(Shape{2, 2}, expected_Y_c);
+
+    test_case.run_with_tolerance_as_fp(1.0e-4f);
+}
+
 // RNNLikeSequenceOp test fixture for test setup reuse
 class GRUSequenceOp : public testing::Test {
 public: