Fixed review comments

AsyaPronina · AsyaPronina · commit a303604659c0 · 2025-10-23T13:38:41.000+01:00
diff --git a/src/cpp/src/whisper/models/with_past_decoder.cpp b/src/cpp/src/whisper/models/with_past_decoder.cpp
@@ -112,12 +112,10 @@ void WhisperWithPastDecoder::start_async(const Tensor& encoder_hidden_state,
     _set_encoder_hidden_states_tensor(encoder_hidden_state, batch_size, request);
     request.set_tensor("input_ids", input_ids);
 
-    if (!is_initial_step) {
-        if (m_past_decoder_has_cache_position) {
-            ov::Tensor cache_position_tensor = request.get_tensor("cache_position");
-            cache_position_tensor.set_shape({1});
-            cache_position_tensor.data<int64_t>()[0] = m_cache_position;
-        }
+    if (!is_initial_step && m_past_decoder_has_cache_position) {
+        ov::Tensor cache_position_tensor = request.get_tensor("cache_position");
+        cache_position_tensor.set_shape({1});
+        cache_position_tensor.data<int64_t>()[0] = m_cache_position;
     }
 
     _set_past_key_value(beam_idx);
diff --git a/src/cpp/src/whisper/pipeline_static.cpp b/src/cpp/src/whisper/pipeline_static.cpp
@@ -580,7 +580,7 @@ void add_cache_position_input(std::shared_ptr<ov::Model> model) {
                 cache_position->set_friendly_name("cache_position");
                 model->add_parameters({cache_position});
                 std::shared_ptr<ov::Node> cache_pos_unsqueeze_arg;
-                if (unsqueeze_node->input(0).get_element_type() == ov::element::f32) {
+                if (matched_unsqueeze->input(0).get_element_type() == ov::element::f32) {
                     cache_pos_unsqueeze_arg = std::make_shared<v0::Convert>(cache_position, ov::element::f32);
                 } else {
                     cache_pos_unsqueeze_arg = cache_position;
diff --git a/tests/python_tests/test_whisper_pipeline.py b/tests/python_tests/test_whisper_pipeline.py
@@ -55,11 +55,13 @@ def get_whisper_models_list(tiny_only=False):
 # used whisper models are relatively small
 # cache them in memory to speedup tests
 @functools.lru_cache()
-def read_whisper_model(params):
+def read_whisper_model(params, stateful=True):
     model_id, path = params
+    if not stateful:
+        path = pathlib.Path(f"{path}_with_past")
 
     if not (path / "openvino_encoder_model.xml").exists():
-        save_model(model_id=model_id, tmp_path=path)
+        save_model(model_id=model_id, tmp_path=path, stateful=stateful)
 
     opt_model = retry_request(lambda: OVModelForSpeechSeq2Seq.from_pretrained(
         path,
@@ -91,7 +93,7 @@ def read_whisper_model(params):
     )
 
 
-def save_model(model_id: str, tmp_path: pathlib.Path):
+def save_model(model_id: str, tmp_path: pathlib.Path, stateful=True):
     tokenizer = retry_request(lambda: AutoTokenizer.from_pretrained(model_id, trust_remote_code=True))
     ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(
         tokenizer,
@@ -109,6 +111,7 @@ def save_model(model_id: str, tmp_path: pathlib.Path):
         model_id,
         export=True,
         trust_remote_code=True,
+        stateful=stateful,
         compile=False,
         device="CPU",
         load_in_8bit=False,
@@ -223,6 +226,9 @@ def run_pipeline_with_ref(
     streamer: typing.Callable[[str], bool] | None = None,
 ):
     _, _, hf_pipe, genai_pipe = read_whisper_model((model_id, tmp_path))
+    _, _, _, genai_with_past_pipe = read_whisper_model(
+        (model_id, tmp_path), stateful=False
+    )
 
     if type(sample) is np.ndarray and len(sample.shape) == 1:
         sample = np.expand_dims(sample, 0)
@@ -233,6 +239,12 @@ def run_pipeline_with_ref(
 
         compare_results(hf_result, genai_result)
 
+        genai_with_past_result = run_genai(
+            genai_with_past_pipe, _sample, generation_config, streamer
+        )
+
+        compare_results(hf_result, genai_with_past_result)
+
 
 def compare_results(hf_result, genai_result):
     assert genai_result.texts[0] == hf_result["text"]
@@ -498,6 +510,33 @@ def test_longform_audio(model_descr, sample_from_dataset):
     assert "".join(streamer_result) == hf_result["text"]
 
 
+@pytest.mark.parametrize("model_descr", get_whisper_models_list())
+@pytest.mark.parametrize("sample_from_dataset", [*get_fixture_params_for_n_whisper_dataset_samples(n=2, long_form=True)], indirect=True)
+@pytest.mark.precommit
+@pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 173169")
+def test_longform_audio_with_past(model_descr, sample_from_dataset):
+    _, _, hf_pipe, genai_pipe = read_whisper_model(model_descr, stateful=True)
+
+    streamer_result = []
+
+    genai_result = run_genai(
+        genai_pipe,
+        sample_from_dataset,
+        config=ov_genai.WhisperGenerationConfig(return_timestamps=True),
+        streamer=lambda x: streamer_result.append(x),
+    )
+
+    hf_result = run_huggingface(
+        hf_pipe,
+        sample_from_dataset,
+        config=ov_genai.WhisperGenerationConfig(return_timestamps=True),
+    )
+
+    compare_results(hf_result, genai_result)
+
+    assert "".join(streamer_result) == hf_result["text"]
+
+
 @pytest.mark.parametrize("model_descr", get_whisper_models_list())
 @pytest.mark.precommit
 @pytest.mark.xfail(condition=(sys.platform == "darwin"), reason="Ticket - 173169")