@@ -14,7 +14,7 @@ def define_common_targets():
1414 name = "headers" ,
1515 exported_headers = subdir_glob ([
1616 ("include" , "pytorch/tokenizers/*.h" ),
17- ], exclude = [ "pcre2_regex.h" , "std_regex.h" ] ),
17+ ]),
1818 visibility = [
1919 "@EXECUTORCH_CLIENTS" ,
2020 "//pytorch/tokenizers/..." ,
@@ -23,9 +23,6 @@ def define_common_targets():
2323 platforms = PLATFORMS ,
2424 )
2525
26- # TODO: add target for regex which does lookahed with pcre2
27- # by adding "-DSUPPORT_REGEX_LOOKAHEAD" as a compiler flag
28- # and including pcre2 dependencies.
2926 runtime .cxx_library (
3027 name = "regex" ,
3128 srcs = [
@@ -43,6 +40,27 @@ def define_common_targets():
4340 platforms = PLATFORMS ,
4441 )
4542
43+ runtime .cxx_library (
44+ name = "regex_lookahead" ,
45+ srcs = [
46+ "src/pcre2_regex.cpp" ,
47+ "src/regex.cpp" ,
48+ "src/re2_regex.cpp" ,
49+ "src/std_regex.cpp" ,
50+ ],
51+ exported_deps = [
52+ ":headers" ,
53+ ],
54+ exported_external_deps = [
55+ "pcre2" ,
56+ "re2" ,
57+ ],
58+ preprocessor_flags = ["-DSUPPORT_REGEX_LOOKAHEAD=ON" ],
59+ visibility = ["//pytorch/tokenizers/..." ],
60+ header_namespace = "" ,
61+ platforms = PLATFORMS ,
62+ )
63+
4664 runtime .cxx_library (
4765 name = "bpe_tokenizer_base" ,
4866 srcs = [
@@ -101,6 +119,29 @@ def define_common_targets():
101119 platforms = PLATFORMS ,
102120 )
103121
122+ runtime .cxx_library (
123+ name = "tiktoken_lookahead" ,
124+ srcs = [
125+ "src/tiktoken.cpp" ,
126+ ],
127+ deps = [
128+ ":regex_lookahead" ,
129+ ],
130+ exported_deps = [
131+ ":bpe_tokenizer_base" ,
132+ ":headers" ,
133+ ],
134+ exported_external_deps = [
135+ "pcre2" ,
136+ "re2" ,
137+ ],
138+ visibility = [
139+ "@EXECUTORCH_CLIENTS" ,
140+ "//pytorch/tokenizers/..." ,
141+ ],
142+ platforms = PLATFORMS ,
143+ )
144+
104145 runtime .cxx_library (
105146 name = "hf_tokenizer" ,
106147 srcs = [
0 commit comments