Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/ci_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@ jobs:
name: Notebook lint
runs-on: ubuntu-latest
steps:
- uses: actions/setup-python@v1
- uses: actions/checkout@v4
- uses: actions/checkout@v5
- uses: actions/setup-python@v6
with:
python-version: '3.13'
- name: Install tensorflow-docs
run: python3 -m pip install -U git+https://github.com/tensorflow/docs
- name: Lint notebooks
Expand Down
2 changes: 1 addition & 1 deletion build_deps/tf_dependency/build_defs.bzl.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

D_GLIBCXX_USE_CXX11_ABI = "%{tf_cx11_abi}"
CPLUSPLUS_VERSION = "%{tf_cplusplus_ver}"
DTF_VERSION_INTEGER = "%{tf_version_integer}"
TF_VERSION_INTEGER = "%{tf_version_integer}"
2 changes: 1 addition & 1 deletion build_deps/tf_dependency/tf_configure.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def _tf_pip_impl(repository_ctx):
tf_shared_cc_library_path = "%s/%s" % (tf_shared_library_dir, tf_shared_cc_library_name)
tf_cx11_abi = "-D_GLIBCXX_USE_CXX11_ABI=%s" % (repository_ctx.os.environ[_TF_CXX11_ABI_FLAG])
tf_cplusplus_ver = "-std=%s" % repository_ctx.os.environ[_TF_CPLUSPLUS_VER]
tf_version_integer = "-DTF_VERSION_INTEGER=%s" % (repository_ctx.os.environ[_TF_VERSION_INTEGER])
tf_version_integer = "TF_VERSION_INTEGER=%s" % (repository_ctx.os.environ[_TF_VERSION_INTEGER])

tf_shared_library_rule = _symlink_genrule_for_dir(
repository_ctx,
Expand Down
4 changes: 2 additions & 2 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import shutil
import subprocess
import sys
from typing import Optional
from typing import List, Optional

import tensorflow as tf
from packaging.version import Version
Expand Down Expand Up @@ -873,7 +873,7 @@ def _find_executable_or_die(executable_name: str, executable_path: Optional[str]
return resolved_path_to_exe


def _get_cuda_compute_capabilities_or_die() -> list[str]:
def _get_cuda_compute_capabilities_or_die() -> List[str]:
"""Finds compute capabilities via nvidia-smi or rasies exception.

Returns:
Expand Down
7 changes: 7 additions & 0 deletions deepray/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,23 @@
from deepray.utils import types
from deepray.utils.ensure_tf_install import _check_tf_version
from deepray.utils.flags import common_flags
from deepray.utils.keras_utils import set_random_seed
from deepray.version import __version__

# _check_tf_version()

logger = logging_util.get_logger()
common_flags.define_common_flags()

# Parsing sys.argv so we can use flags by `import deepray`
flags.FLAGS(sys.argv, known_only=True)
if flags.FLAGS.random_seed is not None:
set_random_seed(flags.FLAGS.random_seed)


def init():
logger.debug(f"sys.argv = {sys.argv}") # sys.argv from Horovod
# Parsing `distribution_strategy` this additional flag
flags.FLAGS(sys.argv, known_only=True)

gpus = tf.config.list_physical_devices("GPU")
Expand Down
9 changes: 0 additions & 9 deletions deepray/core/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,6 @@
logger = logging_util.get_logger()


def set_random_seed(random_seed):
random.seed(random_seed) # set random seed for python
np.random.seed(random_seed) # set random seed for numpy
tf.random.set_seed(random_seed) # set random seed for tensorflow-cpu
os.environ["TF_DETERMINISTIC_OPS"] = "1" # set random seed for tensorflow-gpu


@keras_export("keras.Model", "keras.models.Model")
class Trainer:
"""A model grouping layers into an object with training/inference features.
Expand Down Expand Up @@ -349,8 +342,6 @@ def __init__(
logger.info("flags.FLAGS:")
for key, value in sorted(flags.FLAGS.flag_values_dict().items()):
logger.info(f"\t{key:25}= {value}")
if flags.FLAGS.random_seed is not None:
set_random_seed(flags.FLAGS.random_seed)

def _create_counter_variable(self, init_value):
"""Helper function for counter variable creation.
Expand Down
3 changes: 3 additions & 0 deletions deepray/custom_ops/parquet_dataset/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ custom_op_library(
deps = [
":arrow_util",
":parquet_batch_reader",
"//deepray/custom_ops/utils:ok_status_util",
],
)

Expand All @@ -31,6 +32,7 @@ cc_library(
"DEEPREC_ARROW_ZEROCOPY",
],
deps = [
"//deepray/custom_ops/utils:ok_status_util",
"@local_config_tf//:libtensorflow_framework",
"@local_config_tf//:tf_header_lib",
"@org_apache_arrow//:arrow",
Expand All @@ -47,6 +49,7 @@ cc_library(
],
deps = [
":arrow_util",
"//deepray/custom_ops/utils:ok_status_util",
"@local_config_tf//:libtensorflow_framework",
"@local_config_tf//:tf_header_lib",
"@org_apache_arrow//:arrow",
Expand Down
7 changes: 4 additions & 3 deletions deepray/custom_ops/parquet_dataset/cc/kernels/arrow_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ limitations under the License.

#include "arrow/array.h"
#include "arrow/util/thread_pool.h"
#include "deepray/custom_ops/utils/ok_status_util.h"
#include "eigen.h"
#include "tensorflow/core/framework/allocation_description.pb.h"

Expand Down Expand Up @@ -252,7 +253,7 @@ class RaggedTensorBuilder : public ::arrow::ArrayVisitor {
#define CASE_ARROW_ENUM_SET_DTYPE(PTR, ENUM) \
case ENUM: { \
*PTR = DataTypeToEnum<ArrowEnumToDataType<ENUM>::Type>::value; \
return OkStatus(); \
return TFOkStatus; \
}

Status MakeDataTypeAndRaggedRankFromArrowDataType(
Expand Down Expand Up @@ -280,7 +281,7 @@ Status MakeDataTypeAndRaggedRankFromArrowDataType(
return errors::Unimplemented("Arrow data type ", arrow_dtype->ToString(),
" not supported.");
}
return OkStatus();
return TFOkStatus;
}

Status MakeTensorsFromArrowArray(
Expand All @@ -297,7 +298,7 @@ Status MakeTensorsFromArrowArray(

RaggedTensorBuilder builder(dtype, ragged_rank);
TF_RETURN_IF_ARROW_ERROR(builder.Build(arrow_array, output_tensors));
return OkStatus();
return TFOkStatus;
}

int UpdateArrowCpuThreadPoolCapacityFromEnv() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ limitations under the License.

#include "absl/strings/match.h"
#include "arrow_util.h"
#include "deepray/custom_ops/utils/ok_status_util.h"

namespace tensorflow {
namespace data {
Expand All @@ -44,7 +45,7 @@ class ParquetBatchReader::Impl {

Status Open() {
if (TF_PREDICT_TRUE(batch_reader_)) {
return OkStatus();
return TFOkStatus;
}
if (TF_PREDICT_FALSE(partition_index_ >= partition_count_)) {
return errors::InvalidArgument("Partition index ", partition_index_,
Expand Down Expand Up @@ -101,7 +102,7 @@ class ParquetBatchReader::Impl {

TF_RETURN_IF_ARROW_ERROR(reader_->GetRecordBatchReader(
row_group_indices_, column_indices_, &batch_reader_));
return OkStatus();
return TFOkStatus;
}

Status Read(std::vector<Tensor>* output_tensors) {
Expand All @@ -123,7 +124,7 @@ class ParquetBatchReader::Impl {
field_dtypes_[i], field_ragged_ranks_[i], arrays[i], output_tensors));
}

return OkStatus();
return TFOkStatus;
}

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.

#include <unordered_set>

#include "deepray/custom_ops/utils/ok_status_util.h"
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_def_builder.h"
Expand Down Expand Up @@ -83,14 +84,14 @@ class ParquetTabularDatasetOp::Dataset : public DatasetBase {
return output_shapes_;
}

Status CheckExternalState() const override { return OkStatus(); }
Status CheckExternalState() const override { return TFOkStatus; }

string DebugString() const override {
return "ParquetTabularDatasetOp::Dataset";
}

Status InputDatasets(std::vector<const DatasetBase*>* inputs) const override {
return OkStatus();
return TFOkStatus;
}

protected:
Expand Down Expand Up @@ -122,7 +123,7 @@ class ParquetTabularDatasetOp::Dataset : public DatasetBase {
{"partition_index", partition_index},
{"drop_remainder", drop_remainder}},
output));
return OkStatus();
return TFOkStatus;
}

private:
Expand Down Expand Up @@ -159,7 +160,7 @@ class ParquetTabularDatasetOp::Dataset::Iterator
return s;
}
*end_of_sequence = true;
return OkStatus();
return TFOkStatus;
}

protected:
Expand Down
4 changes: 4 additions & 0 deletions deepray/custom_ops/utils/BUILD
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
load("@local_config_cuda//cuda:build_defs.bzl", "cuda_library")
load("@local_config_tf//:build_defs.bzl", "TF_VERSION_INTEGER")
load("@org_tensorflow//tensorflow:tensorflow.bzl", "tf_copts")

package(default_visibility = ["//visibility:public"])
Expand All @@ -24,6 +25,9 @@ cc_library(
srcs = [
"ok_status_util.h",
],
defines = [
TF_VERSION_INTEGER,
],
visibility = ["//visibility:public"],
)

Expand Down
2 changes: 1 addition & 1 deletion deepray/custom_ops/utils/ok_status_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ This code is for compatibility.*/
#else
// #pragma message(PRINT_MACRO(TF_VERSION_INTEGER))
// #define TFOkStatus Status::OK()
#define TFOkStatus absl::OkStatus()
#define TFOkStatus Status::OK()
#endif
} // namespace deepray
} // namespace tensorflow
Expand Down
3 changes: 0 additions & 3 deletions deepray/optimizers/adagrad.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,7 @@
from __future__ import division
from __future__ import print_function

import sys

import tensorflow as tf
from absl import flags

from deepray.custom_ops.embedding_variable import gen_kv_variable_ops
from deepray.custom_ops.embedding_variable import kv_variable_ops
Expand Down
3 changes: 0 additions & 3 deletions deepray/optimizers/ftrl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,4 @@
import sys

import tensorflow as tf
from absl import flags

from deepray.custom_ops.embedding_variable import gen_kv_variable_ops
from deepray.custom_ops.embedding_variable import kv_variable_ops
Expand Down
10 changes: 10 additions & 0 deletions deepray/utils/keras_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@

import multiprocessing
import os
import random

import numpy as np
import tensorflow as tf
from absl import logging
from tensorflow.python import tf2
Expand Down Expand Up @@ -153,3 +155,11 @@ def count_params(model):
main_print(f"{'':20} ({model_size:,})")

return model_size


def set_random_seed(random_seed):
random.seed(random_seed) # set random seed for python
np.random.seed(random_seed) # set random seed for numpy
tf.random.set_seed(random_seed) # set random seed for tensorflow-cpu
# The following operation will significantly reduce the training speed, so we have disabled it.
# os.environ["TF_DETERMINISTIC_OPS"] = "1" # set random seed for tensorflow-gpu
2 changes: 1 addition & 1 deletion deepray/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# We follow Semantic Versioning (https://semver.org/)
_MAJOR_VERSION = "0"
_MINOR_VERSION = "21"
_PATCH_VERSION = "96"
_PATCH_VERSION = "97"

# When building releases, we can update this value on the release branch to
# reflect the current release candidate ('rc0', 'rc1') or, finally, the official
Expand Down
45 changes: 45 additions & 0 deletions modelzoo/LanguageModeling/Word2Vec/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import tensorflow as tf
import tqdm


# Generates skip-gram pairs with negative sampling for a list of sequences
# (int-encoded sentences) based on window size, number of negative samples
# and vocabulary size.
def generate_training_data(sequences, window_size, num_ns, vocab_size, seed):
# Elements of each training example are appended to these lists.
targets, contexts, labels = [], [], []

# Build the sampling table for `vocab_size` tokens.
sampling_table = tf.keras.preprocessing.sequence.make_sampling_table(vocab_size)

# Iterate over all sequences (sentences) in the dataset.
for sequence in tqdm.tqdm(sequences):
# Generate positive skip-gram pairs for a sequence (sentence).
positive_skip_grams, _ = tf.keras.preprocessing.sequence.skipgrams(
sequence, vocabulary_size=vocab_size, sampling_table=sampling_table, window_size=window_size, negative_samples=0
)

# Iterate over each positive skip-gram pair to produce training examples
# with a positive context word and negative samples.
for target_word, context_word in positive_skip_grams:
context_class = tf.expand_dims(tf.constant([context_word], dtype="int64"), 1)
negative_sampling_candidates, _, _ = tf.random.log_uniform_candidate_sampler(
true_classes=context_class,
num_true=1,
num_sampled=num_ns,
unique=True,
range_max=vocab_size,
seed=seed,
name="negative_sampling",
)

# Build context and label vectors (for one target word)
context = tf.concat([tf.squeeze(context_class, 1), negative_sampling_candidates], 0)
label = tf.constant([1] + [0] * num_ns, dtype="int64")

# Append each element from the training example to global lists.
targets.append(target_word)
contexts.append(context)
labels.append(label)

return targets, contexts, labels
Loading
Loading