From 4d16609aee2173be3978937a41789fb5a0c63453 Mon Sep 17 00:00:00 2001 From: He Wang Date: Wed, 23 Jul 2025 11:26:53 +0800 Subject: [PATCH] Set the default value of env var 'NLTK_DATA' for local deployment --- rag/utils/__init__.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rag/utils/__init__.py b/rag/utils/__init__.py index 8468bf4c384..b4122e2f520 100644 --- a/rag/utils/__init__.py +++ b/rag/utils/__init__.py @@ -75,8 +75,9 @@ def findMaxTm(fnm): return m -tiktoken_cache_dir = get_project_base_directory() -os.environ["TIKTOKEN_CACHE_DIR"] = tiktoken_cache_dir +base_dir = get_project_base_directory() +os.environ.setdefault("NLTK_DATA", os.path.join(base_dir, "nltk_data")) +os.environ.setdefault("TIKTOKEN_CACHE_DIR", base_dir) # encoder = tiktoken.encoding_for_model("gpt-3.5-turbo") encoder = tiktoken.get_encoding("cl100k_base")