diff --git a/demos/embeddings/README.md b/demos/embeddings/README.md index 63aed82ba4..802e976883 100644 --- a/demos/embeddings/README.md +++ b/demos/embeddings/README.md @@ -510,7 +510,7 @@ mteb run -m thenlper/gte-small -t Banking77Classification --output_folder result # Usage of tokenize endpoint (release 2025.4 or weekly) -The `tokenize` endpoint provides a simple API for tokenizing input text using the same tokenizer as the deployed embeddings model. This allows you to see how your text will be split into tokens before feature extraction or inference. The endpoint accepts a string or list of strings and returns the corresponding token IDs and tokenized text. +The `tokenize` endpoint provides a simple API for tokenizing input text using the same tokenizer as the deployed embeddings model. This allows you to see how your text will be split into tokens before feature extraction or inference. The endpoint accepts a string or list of strings and returns the corresponding token IDs. Example usage: ```console @@ -524,10 +524,10 @@ Response: ``` It's possible to use additional parameters: - - pad_to_max_length - whether to pad the sequence to the maximum length. Default is False. - - max_length - maximum length of the sequence. If None (default), the value will be taken from the IR (where default value from original HF/GGUF model is stored). - - padding_side - side to pad the sequence, can be ‘left’ or ‘right’. Default is None. - - add_special_tokens - whether to add special tokens like BOS, EOS, PAD. Default is True. + - `pad_to_max_length` - whether to pad the sequence to the maximum length. Default is False. + - `max_length` - maximum length of the sequence. If None (default), unlimited. + - `padding_side` - side to pad the sequence, can be ‘left’ or ‘right’. Default is None. + - `add_special_tokens` - whether to add special tokens like BOS, EOS, PAD. Default is True. Example usage: ```console diff --git a/src/embeddings/embeddings_calculator_ov.cc b/src/embeddings/embeddings_calculator_ov.cc index c800af1528..0888eb7ecb 100644 --- a/src/embeddings/embeddings_calculator_ov.cc +++ b/src/embeddings/embeddings_calculator_ov.cc @@ -63,7 +63,7 @@ class EmbeddingsCalculatorOV : public CalculatorBase { mediapipe::Timestamp timestamp{0}; - absl::Status tokenizeStrings(ov::genai::Tokenizer& tokenizer, const std::vector& inputStrings, const ov::AnyMap& parameters, ov::genai::TokenizedInputs& tokens, const size_t& max_context_length) { + absl::Status tokenizeStrings(ov::genai::Tokenizer& tokenizer, const std::vector& inputStrings, const ov::AnyMap& parameters, ov::genai::TokenizedInputs& tokens) { tokens = tokenizer.encode(inputStrings, parameters); RET_CHECK(tokens.input_ids.get_shape().size() == 2); @@ -134,7 +134,7 @@ class EmbeddingsCalculatorOV : public CalculatorBase { } auto input = tokenizeRequest.input; if (auto strings = std::get_if>(&input)) { - auto tokenizationStatus = this->tokenizeStrings(embeddings_session->getTokenizer(), *strings, tokenizeRequest.parameters, tokens, max_context_length); + auto tokenizationStatus = this->tokenizeStrings(embeddings_session->getTokenizer(), *strings, tokenizeRequest.parameters, tokens); if (!tokenizationStatus.ok()) { return tokenizationStatus; } @@ -172,7 +172,7 @@ class EmbeddingsCalculatorOV : public CalculatorBase { params["max_length"] = max_context_length; } - absl::Status tokenizationStatus = this->tokenizeStrings(embeddings_session->getTokenizer(), *strings, params, tokens, max_context_length); + absl::Status tokenizationStatus = this->tokenizeStrings(embeddings_session->getTokenizer(), *strings, params, tokens); if (!tokenizationStatus.ok()) { return tokenizationStatus; }