Skip to content

Commit 5496a36

Browse files
authored
Suppress C++ stacktrace on XLA_CHECK*() calls. (#9448)
1 parent 1d848e7 commit 5496a36

File tree

11 files changed

+132
-22
lines changed

11 files changed

+132
-22
lines changed

.github/scripts/run_tests.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@ function run_torch_xla_cpp_tests() {
5959
"test_runtime"
6060
"test_status"
6161
"test_status_dont_show_cpp_error_context"
62-
"test_status_show_cpp_error_context")
62+
"test_status_show_cpp_error_context"
63+
"test_debug_macros")
6364
for name in "${test_names[@]}"; do
6465
echo "Running $name cpp test..."
6566
/tmp/test/bin/${name}

BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ test_suite(
8181
"//test/cpp:test_status",
8282
"//test/cpp:test_status_dont_show_cpp_error_context",
8383
"//test/cpp:test_status_show_cpp_error_context",
84+
"//test/cpp:test_debug_macros",
8485
"//torch_xla/csrc/runtime:pjrt_computation_client_test",
8586
# "//torch_xla/csrc/runtime:ifrt_computation_client_test",
8687
],

test/cpp/BUILD

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,3 +187,14 @@ ptxla_cc_test(
187187
"@com_google_googletest//:gtest_main",
188188
],
189189
)
190+
191+
ptxla_cc_test(
192+
name = "test_debug_macros",
193+
srcs = ["test_debug_macros.cpp"],
194+
deps = [
195+
"//torch_xla/csrc:status",
196+
"//torch_xla/csrc/runtime:debug_macros",
197+
"//torch_xla/csrc/runtime:env_vars",
198+
"@com_google_googletest//:gtest_main",
199+
],
200+
)

test/cpp/run_tests.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ if [[ "$RUN_CPP_TESTS" == "cpp_tests" ]]; then
103103
"test_runtime"
104104
"test_status"
105105
"test_status_dont_show_cpp_error_context"
106-
"test_status_show_cpp_error_context")
106+
"test_status_show_cpp_error_context"
107+
"test_debug_macros")
107108
fi
108109
for name in "${test_names[@]}"; do
109110
echo "Running $name cpp test..."

test/cpp/test_debug_macros.cpp

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
#include <gmock/gmock.h>
2+
#include <gtest/gtest.h>
3+
4+
#include "torch_xla/csrc/runtime/debug_macros.h"
5+
#include "torch_xla/csrc/runtime/env_vars.h"
6+
7+
namespace torch_xla {
8+
namespace {
9+
10+
using absl::StrCat;
11+
12+
TEST(DebugMacrosTest, Check) {
13+
auto line = __LINE__ + 1;
14+
EXPECT_THAT([] { XLA_CHECK(false) << "Error message."; },
15+
testing::ThrowsMessage<std::runtime_error>(testing::StartsWith(
16+
StrCat("Check failed: false: Error message. (at ", __FILE__,
17+
":", line, ")\n*** Begin stack trace ***"))));
18+
}
19+
20+
#define TEST_XLA_CHECK_OP_(opstr, lhs, rhs, compstr, valstr) \
21+
TEST(DebugMacrosTest, Check##opstr) { \
22+
EXPECT_THAT( \
23+
[] { XLA_CHECK_##opstr(lhs, rhs) << " Error message."; }, \
24+
testing::ThrowsMessage<std::runtime_error>(testing::StartsWith(StrCat( \
25+
"Check failed: " compstr " (" valstr ") Error message. (at ", \
26+
__FILE__, ":", __LINE__, ")\n*** Begin stack trace ***")))); \
27+
}
28+
29+
#define TEST_XLA_CHECK_OP(opstr, op, lhs, rhs) \
30+
TEST_XLA_CHECK_OP_(opstr, lhs, rhs, #lhs " " #op " " #rhs, #lhs " vs. " #rhs)
31+
32+
TEST_XLA_CHECK_OP(EQ, ==, 5, 8)
33+
TEST_XLA_CHECK_OP(NE, !=, 5, 5)
34+
TEST_XLA_CHECK_OP(LE, <=, 5, 1)
35+
TEST_XLA_CHECK_OP(LT, <, 5, 1)
36+
37+
// Since the implementation of GE and GT checks use their corresponding
38+
// less-than LE and LT versions with their arguments swapped, we need to modify
39+
// the expected error message accordingly.
40+
//
41+
// In other words:
42+
//
43+
// XLA_CHECK_GE(5, 8)
44+
//
45+
// Errors with the following message:
46+
//
47+
// Check failed: 8 <= 5 (8 vs. 5)
48+
#define TEST_XLA_CHECK_GREATER(opstr, lessop, lhs, rhs) \
49+
TEST_XLA_CHECK_OP_(opstr, lhs, rhs, #rhs " " #lessop " " #lhs, \
50+
#rhs " vs. " #lhs)
51+
52+
TEST_XLA_CHECK_GREATER(GE, <=, 5, 8)
53+
TEST_XLA_CHECK_GREATER(GT, <, 5, 8)
54+
55+
} // namespace
56+
} // namespace torch_xla
57+
58+
static void SetUp() {
59+
setenv(torch_xla::runtime::env::kEnvShowCppErrorContext, /* value= */ "true",
60+
/* replace= */ 1);
61+
}
62+
63+
int main(int argc, char** argv) {
64+
SetUp();
65+
::testing::InitGoogleTest(&argc, argv);
66+
return RUN_ALL_TESTS();
67+
}

torch_xla/csrc/runtime/BUILD

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,6 @@ cc_library(
164164
hdrs = ["debug_macros.h"],
165165
deps = [
166166
":tf_logging",
167-
"@tsl//tsl/platform:stacktrace",
168167
"@tsl//tsl/platform:statusor",
169168
"@tsl//tsl/platform:macros",
170169
],
@@ -391,6 +390,8 @@ cc_library(
391390
srcs = ["tf_logging.cpp"],
392391
hdrs = ["tf_logging.h"],
393392
deps = [
393+
"//torch_xla/csrc:status",
394+
"@tsl//tsl/platform:stacktrace",
394395
"@tsl//tsl/platform:statusor",
395396
"@xla//xla/service:platform_util",
396397
"@com_google_absl//absl/base:log_severity",

torch_xla/csrc/runtime/debug_macros.h

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,28 @@
66
#include "tsl/platform/stacktrace.h"
77
#include "tsl/platform/statusor.h"
88

9+
// XLA_SHOW_CPP_ERROR_CONTEXT environment variable changes the behavior of the
10+
// macros below, such as XLA_CHECK(), XLA_CHECK_EQ(), etc. (except for
11+
// XLA_CHECK_OK) in the following way:
12+
//
13+
// If XLA_SHOW_CPP_ERROR_CONTEXT environment variable is set, error messages
14+
// generated by these macros will include detailed source location information
15+
// (file name and line number) and a C++ stacktrace, providing a comprehensive
16+
// context for debugging.
17+
//
18+
// Otherwise, only the concise error message will be displayed, without the
19+
// additional source location or stacktrace details. This allows for cleaner
20+
// error output in production environments where a full stacktrace might be
21+
// unnecessary or undesirable.
922
#define XLA_ERROR() TF_ERROR_STREAM()
10-
#define XLA_CHECK(c) TF_CHECK(c) << "\n" << tsl::CurrentStackTrace()
11-
#define XLA_CHECK_OK(c) TF_CHECK_OK(c) << "\n" << tsl::CurrentStackTrace()
12-
#define XLA_CHECK_EQ(a, b) TF_CHECK_EQ(a, b) << "\n" << tsl::CurrentStackTrace()
13-
#define XLA_CHECK_NE(a, b) TF_CHECK_NE(a, b) << "\n" << tsl::CurrentStackTrace()
14-
#define XLA_CHECK_LE(a, b) TF_CHECK_LE(a, b) << "\n" << tsl::CurrentStackTrace()
15-
#define XLA_CHECK_GE(a, b) TF_CHECK_GE(a, b) << "\n" << tsl::CurrentStackTrace()
16-
#define XLA_CHECK_LT(a, b) TF_CHECK_LT(a, b) << "\n" << tsl::CurrentStackTrace()
17-
#define XLA_CHECK_GT(a, b) TF_CHECK_GT(a, b) << "\n" << tsl::CurrentStackTrace()
23+
#define XLA_CHECK(c) TF_CHECK(c)
24+
#define XLA_CHECK_OK(c) TF_CHECK_OK(c)
25+
#define XLA_CHECK_EQ(a, b) TF_CHECK_EQ(a, b)
26+
#define XLA_CHECK_NE(a, b) TF_CHECK_NE(a, b)
27+
#define XLA_CHECK_LE(a, b) TF_CHECK_LE(a, b)
28+
#define XLA_CHECK_GE(a, b) TF_CHECK_GE(a, b)
29+
#define XLA_CHECK_LT(a, b) TF_CHECK_LT(a, b)
30+
#define XLA_CHECK_GT(a, b) TF_CHECK_GT(a, b)
1831

1932
template <typename T>
2033
T ConsumeValue(absl::StatusOr<T>&& status) {

torch_xla/csrc/runtime/tf_logging.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,25 @@
22

33
#include <stdexcept>
44

5+
#include "torch_xla/csrc/status.h"
6+
#include "tsl/platform/stacktrace.h"
7+
58
namespace torch_xla {
69
namespace runtime {
710
namespace internal {
811

912
void ErrorGenerator::operator&(const std::basic_ostream<char>& oss) const {
1013
const ErrorSink& sink = dynamic_cast<const ErrorSink&>(oss);
11-
auto sink_str = sink.str();
12-
TF_VLOG(1) << sink_str;
14+
1315
std::stringstream ess;
14-
ess << file_ << ":" << line_ << " : " << sink_str;
16+
ess << sink.str();
17+
18+
if (ShouldShowCppErrorContext()) {
19+
ess << " (at " << file_ << ":" << line_ << ")\n";
20+
ess << tsl::CurrentStackTrace();
21+
}
22+
23+
TF_VLOG(1) << ess.str();
1524
// We cannot use AT_ERROR() here, due to layering issues.
1625
throw std::runtime_error(ess.str());
1726
}

torch_xla/csrc/runtime/tf_logging.h

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class ErrorGenerator {
5353

5454
#define TF_CHECK(condition) \
5555
while (TF_PREDICT_FALSE(!(condition))) \
56-
TF_ERROR_STREAM() << "Check failed: " #condition " "
56+
TF_ERROR_STREAM() << "Check failed: " #condition ": "
5757

5858
#define TF_CHECK_OP_LOG(name, op, val1, val2) \
5959
while (::tsl::internal::CheckOpString _result{::tsl::internal::name##Impl( \
@@ -68,8 +68,12 @@ class ErrorGenerator {
6868
#define TF_CHECK_NE(val1, val2) TF_CHECK_OP(Check_NE, !=, val1, val2)
6969
#define TF_CHECK_LE(val1, val2) TF_CHECK_OP(Check_LE, <=, val1, val2)
7070
#define TF_CHECK_LT(val1, val2) TF_CHECK_OP(Check_LT, <, val1, val2)
71-
#define TF_CHECK_GE(val1, val2) TF_CHECK_OP(Check_GE, >=, val1, val2)
72-
#define TF_CHECK_GT(val1, val2) TF_CHECK_OP(Check_GT, >, val1, val2)
71+
72+
// Check_GEImpl and Check_GTImpl are actually implemented in terms of their
73+
// less-than versions. So, here, we do the same so that the error message
74+
// is consistent.
75+
#define TF_CHECK_GE(val1, val2) TF_CHECK_LE(val2, val1)
76+
#define TF_CHECK_GT(val1, val2) TF_CHECK_LT(val2, val1)
7377

7478
#undef TF_CHECK_OK
7579
#define TF_CHECK_OK(val) TF_DO_CHECK_OK(val, FATAL)

torch_xla/csrc/status.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,7 @@
66

77
namespace torch_xla {
88

9-
// Returns whether we should show C++ error context.
10-
//
11-
// More specifically, whether the `XLA_SHOW_CPP_ERROR_CONTEXT` environment
12-
// variable is set or not.
13-
static bool ShouldShowCppErrorContext() {
9+
bool ShouldShowCppErrorContext() {
1410
static const bool show_cpp_error_context = runtime::sys_util::GetEnvBool(
1511
runtime::env::kEnvShowCppErrorContext, false);
1612
return show_cpp_error_context;

0 commit comments

Comments
 (0)