Skip to content

Commit f0c7929

Browse files
1. Ignore SentencePiece::BYTE during encoding instead of throwing error
2. Early exit from DecodePrecompiledCharsmap when precompiled_charsmap is empty PiperOrigin-RevId: 828019077
1 parent aa839b1 commit f0c7929

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

tensorflow_text/core/kernels/sentencepiece/BUILD

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,7 @@ load("//tensorflow_text:tftext.bzl", "tf_cc_library", "tflite_cc_library")
99
licenses(["notice"])
1010

1111
# Visibility rules
12-
package(default_visibility = [
13-
"//visibility:public",
14-
])
12+
package(default_visibility = ["//visibility:public"])
1513

1614
filegroup(
1715
name = "testdata",

tensorflow_text/core/kernels/sentencepiece/model_converter.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ limitations under the License.
2828
==============================================================================*/
2929

3030
#include "tensorflow_text/core/kernels/sentencepiece/model_converter.h"
31+
#include <tuple>
3132

3233
#include "absl/status/status.h"
3334
#include "absl/strings/str_replace.h"
@@ -46,6 +47,9 @@ DecodePrecompiledCharsmap(
4647
const ::sentencepiece::NormalizerSpec& normalizer_spec) {
4748
// This function "undoes" encoding done by
4849
// sentencepiece::normalizer::Normalizer::EncodePrecompiledCharsMap.
50+
if (normalizer_spec.precompiled_charsmap().empty()) {
51+
return std::make_tuple(std::vector<uint32_t>(), std::vector<int8_t>());
52+
}
4953
const char* precompiled_map = normalizer_spec.precompiled_charsmap().data();
5054
const uint32_t trie_size =
5155
*reinterpret_cast<const uint32_t*>(precompiled_map);
@@ -89,6 +93,7 @@ absl::StatusOr<std::string> ConvertSentencepieceModelToFlatBuffer(
8993
break;
9094
case ::sentencepiece::ModelProto::SentencePiece::UNKNOWN:
9195
case ::sentencepiece::ModelProto::SentencePiece::CONTROL:
96+
case ::sentencepiece::ModelProto::SentencePiece::BYTE:
9297
// Ignore unknown and control codes.
9398
break;
9499
default:

0 commit comments

Comments
 (0)