55#include " openvino/genai/whisper_pipeline.hpp"
66
77int main (int argc, char * argv[]) try {
8- if (argc < 3 || argc > 4 ) {
8+ if (argc < 3 || argc > 5 ) {
99 throw std::runtime_error (std::string{" Usage: " } + argv[0 ] + " <MODEL_DIR> \" <WAV_FILE_PATH>\" <DEVICE>" );
1010 }
1111
1212 std::filesystem::path models_path = argv[1 ];
1313 std::string wav_file_path = argv[2 ];
14- std::string device = (argc = = 4 ) ? argv[3 ] : " CPU" ; // Default to CPU if no device is provided
14+ std::string device = (argc > = 4 ) ? argv[3 ] : " CPU" ; // Default to CPU if no device is provided
1515
16- ov::genai::WhisperPipeline pipeline (models_path, device);
16+ ov::AnyMap npu_config = {};
17+ if (device == " NPU" )
18+ npu_config = {{" NPU_USE_NPUW" , " YES" }, {" NPUW_DEVICES" , " CPU" }, {" NPUW_ONLINE_PIPELINE" , " NONE" }};
19+ if (device == " NPU" && argc == 5 && argv[4 ] == " STATIC" )
20+ npu_config = {{" STATIC_PIPELINE" , " YES" }};
21+ ov::genai::WhisperPipeline pipeline (models_path, device, npu_config);
1722
1823 ov::genai::WhisperGenerationConfig config = pipeline.get_generation_config ();
1924 // 'task' and 'language' parameters are supported for multilingual models only
@@ -32,6 +37,15 @@ int main(int argc, char* argv[]) try {
3237 std::cout << " timestamps: [" << chunk.start_ts << " , " << chunk.end_ts << " ] text: " << chunk.text << " \n " ;
3338 }
3439
40+ std::cout << std::fixed << std::setprecision (2 ) << " \n\n Load time: " << result.perf_metrics .get_load_time () << " ms" << std::endl;
41+ std::cout << std::fixed << std::setprecision (2 ) << " Generate time: " << result.perf_metrics .get_generate_duration ().mean << " +/- " << result.perf_metrics .get_generate_duration ().std << " ms" << std::endl;
42+ std::cout << std::fixed << std::setprecision (2 ) << " Tokenization time: " << result.perf_metrics .get_tokenization_duration ().mean << " +/- " << result.perf_metrics .get_tokenization_duration ().std << " ms" << std::endl;
43+ std::cout << std::fixed << std::setprecision (2 ) << " Detokenization time: " << result.perf_metrics .get_detokenization_duration ().mean << " +/- " << result.perf_metrics .get_detokenization_duration ().std << " ms" << std::endl;
44+ std::cout << std::fixed << std::setprecision (2 ) << " TTFT: " << result.perf_metrics .get_ttft ().mean << " +/- " << result.perf_metrics .get_ttft ().std << " ms" << std::endl;
45+ std::cout << std::fixed << std::setprecision (2 ) << " TPOT: " << result.perf_metrics .get_tpot ().mean << " +/- " << result.perf_metrics .get_tpot ().std << " ms" << std::endl;
46+ std::cout << std::fixed << std::setprecision (2 ) << " Throughput : " << result.perf_metrics .get_throughput ().mean << " +/- " << result.perf_metrics .get_throughput ().std << " tokens/s" << std::endl;
47+
48+
3549} catch (const std::exception& error) {
3650 try {
3751 std::cerr << error.what () << ' \n ' ;
0 commit comments