deepray-AI · fuhailin · Nov 5, 2025 · Oct 23, 2025 · Oct 23, 2025 · Nov 5, 2025
diff --git a/.github/workflows/ci_test.yml b/.github/workflows/ci_test.yml
@@ -29,8 +29,10 @@ jobs:
     name: Notebook lint
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/setup-python@v1
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@v5
+    - uses: actions/setup-python@v6
+      with:
+        python-version: '3.13' 
     - name: Install tensorflow-docs
       run: python3 -m pip install -U git+https://github.com/tensorflow/docs
     - name: Lint notebooks

diff --git a/build_deps/tf_dependency/build_defs.bzl.tpl b/build_deps/tf_dependency/build_defs.bzl.tpl
@@ -2,4 +2,4 @@
 
 D_GLIBCXX_USE_CXX11_ABI = "%{tf_cx11_abi}"
 CPLUSPLUS_VERSION = "%{tf_cplusplus_ver}"
-DTF_VERSION_INTEGER = "%{tf_version_integer}"
+TF_VERSION_INTEGER = "%{tf_version_integer}"
diff --git a/build_deps/tf_dependency/tf_configure.bzl b/build_deps/tf_dependency/tf_configure.bzl
@@ -213,7 +213,7 @@ def _tf_pip_impl(repository_ctx):
     tf_shared_cc_library_path = "%s/%s" % (tf_shared_library_dir, tf_shared_cc_library_name)
     tf_cx11_abi = "-D_GLIBCXX_USE_CXX11_ABI=%s" % (repository_ctx.os.environ[_TF_CXX11_ABI_FLAG])
     tf_cplusplus_ver = "-std=%s" % repository_ctx.os.environ[_TF_CPLUSPLUS_VER]
-    tf_version_integer = "-DTF_VERSION_INTEGER=%s" % (repository_ctx.os.environ[_TF_VERSION_INTEGER])
+    tf_version_integer = "TF_VERSION_INTEGER=%s" % (repository_ctx.os.environ[_TF_VERSION_INTEGER])
 
     tf_shared_library_rule = _symlink_genrule_for_dir(
         repository_ctx,

diff --git a/configure.py b/configure.py
@@ -25,7 +25,7 @@
 import shutil
 import subprocess
 import sys
-from typing import Optional
+from typing import List, Optional
 
 import tensorflow as tf
 from packaging.version import Version
@@ -873,7 +873,7 @@ def _find_executable_or_die(executable_name: str, executable_path: Optional[str]
   return resolved_path_to_exe
 
 
-def _get_cuda_compute_capabilities_or_die() -> list[str]:
+def _get_cuda_compute_capabilities_or_die() -> List[str]:
   """Finds compute capabilities via nvidia-smi or rasies exception.
 
   Returns:

diff --git a/deepray/__init__.py b/deepray/__init__.py
@@ -36,16 +36,23 @@
 from deepray.utils import types
 from deepray.utils.ensure_tf_install import _check_tf_version
 from deepray.utils.flags import common_flags
+from deepray.utils.keras_utils import set_random_seed
 from deepray.version import __version__
 
 # _check_tf_version()
 
 logger = logging_util.get_logger()
 common_flags.define_common_flags()
 
+# Parsing sys.argv so we can use flags by `import deepray`
+flags.FLAGS(sys.argv, known_only=True)
+if flags.FLAGS.random_seed is not None:
+  set_random_seed(flags.FLAGS.random_seed)
+
 
 def init():
   logger.debug(f"sys.argv = {sys.argv}")  # sys.argv from Horovod
+  # Parsing `distribution_strategy` this additional flag
   flags.FLAGS(sys.argv, known_only=True)
 
   gpus = tf.config.list_physical_devices("GPU")

diff --git a/deepray/core/trainer.py b/deepray/core/trainer.py
@@ -62,13 +62,6 @@
 logger = logging_util.get_logger()
 
 
-def set_random_seed(random_seed):
-  random.seed(random_seed)  # set random seed for python
-  np.random.seed(random_seed)  # set random seed for numpy
-  tf.random.set_seed(random_seed)  # set random seed for tensorflow-cpu
-  os.environ["TF_DETERMINISTIC_OPS"] = "1"  # set random seed for tensorflow-gpu
-
-
 @keras_export("keras.Model", "keras.models.Model")
 class Trainer:
   """A model grouping layers into an object with training/inference features.
@@ -349,8 +342,6 @@ def __init__(
       logger.info("flags.FLAGS:")
       for key, value in sorted(flags.FLAGS.flag_values_dict().items()):
         logger.info(f"\t{key:25}= {value}")
-    if flags.FLAGS.random_seed is not None:
-      set_random_seed(flags.FLAGS.random_seed)
 
   def _create_counter_variable(self, init_value):
     """Helper function for counter variable creation.

diff --git a/deepray/custom_ops/parquet_dataset/BUILD b/deepray/custom_ops/parquet_dataset/BUILD
@@ -14,6 +14,7 @@ custom_op_library(
     deps = [
         ":arrow_util",
         ":parquet_batch_reader",
+        "//deepray/custom_ops/utils:ok_status_util",
     ],
 )
 
@@ -31,6 +32,7 @@ cc_library(
         "DEEPREC_ARROW_ZEROCOPY",
     ],
     deps = [
+        "//deepray/custom_ops/utils:ok_status_util",
         "@local_config_tf//:libtensorflow_framework",
         "@local_config_tf//:tf_header_lib",
         "@org_apache_arrow//:arrow",
@@ -47,6 +49,7 @@ cc_library(
     ],
     deps = [
         ":arrow_util",
+        "//deepray/custom_ops/utils:ok_status_util",
         "@local_config_tf//:libtensorflow_framework",
         "@local_config_tf//:tf_header_lib",
         "@org_apache_arrow//:arrow",

diff --git a/deepray/custom_ops/parquet_dataset/cc/kernels/arrow_util.cc b/deepray/custom_ops/parquet_dataset/cc/kernels/arrow_util.cc
@@ -25,6 +25,7 @@ limitations under the License.
 
 #include "arrow/array.h"
 #include "arrow/util/thread_pool.h"
+#include "deepray/custom_ops/utils/ok_status_util.h"
 #include "eigen.h"
 #include "tensorflow/core/framework/allocation_description.pb.h"
 
@@ -252,7 +253,7 @@ class RaggedTensorBuilder : public ::arrow::ArrayVisitor {
 #define CASE_ARROW_ENUM_SET_DTYPE(PTR, ENUM)                       \
   case ENUM: {                                                     \
     *PTR = DataTypeToEnum<ArrowEnumToDataType<ENUM>::Type>::value; \
-    return OkStatus();                                             \
+    return TFOkStatus;                                             \
   }
 
 Status MakeDataTypeAndRaggedRankFromArrowDataType(
@@ -280,7 +281,7 @@ Status MakeDataTypeAndRaggedRankFromArrowDataType(
       return errors::Unimplemented("Arrow data type ", arrow_dtype->ToString(),
                                    " not supported.");
   }
-  return OkStatus();
+  return TFOkStatus;
 }
 
 Status MakeTensorsFromArrowArray(
@@ -297,7 +298,7 @@ Status MakeTensorsFromArrowArray(
 
   RaggedTensorBuilder builder(dtype, ragged_rank);
   TF_RETURN_IF_ARROW_ERROR(builder.Build(arrow_array, output_tensors));
-  return OkStatus();
+  return TFOkStatus;
 }
 
 int UpdateArrowCpuThreadPoolCapacityFromEnv() {

diff --git a/deepray/custom_ops/parquet_dataset/cc/kernels/parquet_batch_reader.cc b/deepray/custom_ops/parquet_dataset/cc/kernels/parquet_batch_reader.cc
@@ -21,6 +21,7 @@ limitations under the License.
 
 #include "absl/strings/match.h"
 #include "arrow_util.h"
+#include "deepray/custom_ops/utils/ok_status_util.h"
 
 namespace tensorflow {
 namespace data {
@@ -44,7 +45,7 @@ class ParquetBatchReader::Impl {
 
   Status Open() {
     if (TF_PREDICT_TRUE(batch_reader_)) {
-      return OkStatus();
+      return TFOkStatus;
     }
     if (TF_PREDICT_FALSE(partition_index_ >= partition_count_)) {
       return errors::InvalidArgument("Partition index ", partition_index_,
@@ -101,7 +102,7 @@ class ParquetBatchReader::Impl {
 
     TF_RETURN_IF_ARROW_ERROR(reader_->GetRecordBatchReader(
         row_group_indices_, column_indices_, &batch_reader_));
-    return OkStatus();
+    return TFOkStatus;
   }
 
   Status Read(std::vector<Tensor>* output_tensors) {
@@ -123,7 +124,7 @@ class ParquetBatchReader::Impl {
           field_dtypes_[i], field_ragged_ranks_[i], arrays[i], output_tensors));
     }
 
-    return OkStatus();
+    return TFOkStatus;
   }
 
  private:

diff --git a/deepray/custom_ops/parquet_dataset/cc/kernels/parquet_dataset_ops.cc b/deepray/custom_ops/parquet_dataset/cc/kernels/parquet_dataset_ops.cc
@@ -16,6 +16,7 @@ limitations under the License.
 
 #include <unordered_set>
 
+#include "deepray/custom_ops/utils/ok_status_util.h"
 #include "tensorflow/core/framework/common_shape_fns.h"
 #include "tensorflow/core/framework/op.h"
 #include "tensorflow/core/framework/op_def_builder.h"
@@ -83,14 +84,14 @@ class ParquetTabularDatasetOp::Dataset : public DatasetBase {
     return output_shapes_;
   }
 
-  Status CheckExternalState() const override { return OkStatus(); }
+  Status CheckExternalState() const override { return TFOkStatus; }
 
   string DebugString() const override {
     return "ParquetTabularDatasetOp::Dataset";
   }
 
   Status InputDatasets(std::vector<const DatasetBase*>* inputs) const override {
-    return OkStatus();
+    return TFOkStatus;
   }
 
  protected:
@@ -122,7 +123,7 @@ class ParquetTabularDatasetOp::Dataset : public DatasetBase {
                        {"partition_index", partition_index},
                        {"drop_remainder", drop_remainder}},
                       output));
-    return OkStatus();
+    return TFOkStatus;
   }
 
  private:
@@ -159,7 +160,7 @@ class ParquetTabularDatasetOp::Dataset::Iterator
       return s;
     }
     *end_of_sequence = true;
-    return OkStatus();
+    return TFOkStatus;
   }
 
  protected:

diff --git a/deepray/custom_ops/utils/BUILD b/deepray/custom_ops/utils/BUILD
@@ -1,4 +1,5 @@
 load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
+load("@local_config_tf//:build_defs.bzl", "TF_VERSION_INTEGER")
 load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_copts")
 
 package(default_visibility = ["//visibility:public"])
@@ -24,6 +25,9 @@ cc_library(
     srcs = [
         "ok_status_util.h",
     ],
+    defines = [
+        TF_VERSION_INTEGER,
+    ],
     visibility = ["//visibility:public"],
 )
 

diff --git a/deepray/custom_ops/utils/ok_status_util.h b/deepray/custom_ops/utils/ok_status_util.h
@@ -33,7 +33,7 @@ This code is for compatibility.*/
 #else
 // #pragma message(PRINT_MACRO(TF_VERSION_INTEGER))
 // #define TFOkStatus Status::OK()
-#define TFOkStatus absl::OkStatus()
+#define TFOkStatus Status::OK()
 #endif
 }  // namespace deepray
 }  // namespace tensorflow

diff --git a/deepray/optimizers/adagrad.py b/deepray/optimizers/adagrad.py
@@ -18,10 +18,7 @@
 from __future__ import division
 from __future__ import print_function
 
-import sys
-
 import tensorflow as tf
-from absl import flags
 
 from deepray.custom_ops.embedding_variable import gen_kv_variable_ops
 from deepray.custom_ops.embedding_variable import kv_variable_ops

diff --git a/deepray/optimizers/ftrl.py b/deepray/optimizers/ftrl.py
@@ -1,7 +1,4 @@
-import sys
-
 import tensorflow as tf
-from absl import flags
 
 from deepray.custom_ops.embedding_variable import gen_kv_variable_ops
 from deepray.custom_ops.embedding_variable import kv_variable_ops

diff --git a/deepray/utils/keras_utils.py b/deepray/utils/keras_utils.py
@@ -15,7 +15,9 @@
 
 import multiprocessing
 import os
+import random
 
+import numpy as np
 import tensorflow as tf
 from absl import logging
 from tensorflow.python import tf2
@@ -153,3 +155,11 @@ def count_params(model):
   main_print(f"{'':20} ({model_size:,})")
 
   return model_size
+
+
+def set_random_seed(random_seed):
+  random.seed(random_seed)  # set random seed for python
+  np.random.seed(random_seed)  # set random seed for numpy
+  tf.random.set_seed(random_seed)  # set random seed for tensorflow-cpu
+  # The following operation will significantly reduce the training speed, so we have disabled it.
+  # os.environ["TF_DETERMINISTIC_OPS"] = "1"  # set random seed for tensorflow-gpu
diff --git a/deepray/version.py b/deepray/version.py
@@ -21,7 +21,7 @@
 # We follow Semantic Versioning (https://semver.org/)
 _MAJOR_VERSION = "0"
 _MINOR_VERSION = "21"
-_PATCH_VERSION = "96"
+_PATCH_VERSION = "97"
 
 # When building releases, we can update this value on the release branch to
 # reflect the current release candidate ('rc0', 'rc1') or, finally, the official

diff --git a/modelzoo/LanguageModeling/Word2Vec/dataset.py b/modelzoo/LanguageModeling/Word2Vec/dataset.py
@@ -0,0 +1,45 @@
+import tensorflow as tf
+import tqdm
+
+
+# Generates skip-gram pairs with negative sampling for a list of sequences
+# (int-encoded sentences) based on window size, number of negative samples
+# and vocabulary size.
+def generate_training_data(sequences, window_size, num_ns, vocab_size, seed):
+  # Elements of each training example are appended to these lists.
+  targets, contexts, labels = [], [], []
+
+  # Build the sampling table for `vocab_size` tokens.
+  sampling_table = tf.keras.preprocessing.sequence.make_sampling_table(vocab_size)
+
+  # Iterate over all sequences (sentences) in the dataset.
+  for sequence in tqdm.tqdm(sequences):
+    # Generate positive skip-gram pairs for a sequence (sentence).
+    positive_skip_grams, _ = tf.keras.preprocessing.sequence.skipgrams(
+      sequence, vocabulary_size=vocab_size, sampling_table=sampling_table, window_size=window_size, negative_samples=0
+    )
+
+    # Iterate over each positive skip-gram pair to produce training examples
+    # with a positive context word and negative samples.
+    for target_word, context_word in positive_skip_grams:
+      context_class = tf.expand_dims(tf.constant([context_word], dtype="int64"), 1)
+      negative_sampling_candidates, _, _ = tf.random.log_uniform_candidate_sampler(
+        true_classes=context_class,
+        num_true=1,
+        num_sampled=num_ns,
+        unique=True,
+        range_max=vocab_size,
+        seed=seed,
+        name="negative_sampling",
+      )
+
+      # Build context and label vectors (for one target word)
+      context = tf.concat([tf.squeeze(context_class, 1), negative_sampling_candidates], 0)
+      label = tf.constant([1] + [0] * num_ns, dtype="int64")
+
+      # Append each element from the training example to global lists.
+      targets.append(target_word)
+      contexts.append(context)
+      labels.append(label)
+
+  return targets, contexts, labels