Skip to content

Commit 860ebba

Browse files
authored
[SYCL][NativeCPU] Update clang-linker-wrapper. (#19422)
The current support for NativeCPU in clang-linker-wrapper had not been maintained; this commit updates it to work again and adds some minimal testing.
1 parent 11e92b2 commit 860ebba

File tree

6 files changed

+84
-8
lines changed

6 files changed

+84
-8
lines changed

clang/test/Driver/Inputs/native_cpu/libsycl-nativecpu_utils.bc

Whitespace-only changes.

clang/test/Driver/sycl-linker-wrapper.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -201,3 +201,18 @@
201201
// CHK-CMDS-DEVICE-LIB-DIR-NEXT: "{{.*}}llvm-link" {{.*}} --suppress-warnings
202202
// CHK-CMDS-DEVICE-LIB-DIR-NEXT: "{{.*}}llvm-link" -only-needed {{.*}} --suppress-warnings
203203
// CHK-CMDS-DEVICE-LIB-DIR-NEXT: "{{.*}}sycl-post-link"{{.*}} --device-lib-dir={{.*}}/Inputs/SYCL/lib {{.*}} SYCL_POST_LINK_OPTIONS {{.*}}
204+
205+
/// check for libsycl-nativecpu_utils.bc getting linked in for Native CPU
206+
// -------
207+
// Generate .o file as linker wrapper input.
208+
//
209+
// RUN: %clang %s -fsycl -fsycl-targets=native_cpu -c --offload-new-driver -o %t6.o
210+
//
211+
// RUN: clang-linker-wrapper "--host-triple=x86_64-unknown-linux-gnu" "-sycl-device-library-location=%S/Inputs/native_cpu" "--sycl-post-link-options=SYCL_POST_LINK_OPTIONS" "--linker-path=/usr/bin/ld" "--" HOST_LINKER_FLAGS "-dynamic-linker" HOST_DYN_LIB "-o" "a.out" %t6.o --dry-run 2>&1 | FileCheck -check-prefix=CHK-CMDS-NATIVE-CPU %s
212+
// CHK-CMDS-NATIVE-CPU: "{{.*}}/spirv-to-ir-wrapper" {{.*}} --llvm-spirv-opts --spirv-preserve-auxdata --spirv-target-env=SPV-IR --spirv-builtin-format=global
213+
// CHK-CMDS-NATIVE-CPU-NEXT: "{{.*}}llvm-link" {{.*}} --suppress-warnings
214+
// CHK-CMDS-NATIVE-CPU-NEXT: "{{.*}}sycl-post-link" {{.*}} SYCL_POST_LINK_OPTIONS
215+
// CHK-CMDS-NATIVE-CPU-NEXT: "{{.*}}clang" --no-default-config -o [[OUT1:.*\.img]] --target=x86_64-unknown-linux-gnu -Wno-override-module -mllvm -sycl-native-cpu-backend -c {{.*}} -Xclang -mlink-bitcode-file -Xclang {{.*}}/libsycl-nativecpu_utils.bc
216+
// CHK-CMDS-NATIVE-CPU-NEXT: offload-wrapper: input: [[OUT1]], output: [[OUT2:.*\.bc]]
217+
// CHK-CMDS-NATIVE-CPU-NEXT: "{{.*}}clang" --target=x86_64-unknown-linux-gnu -c -o [[OUT3:.*\.o]] [[OUT2]]
218+
// CHK-CMDS-NATIVE-CPU-NEXT: "{{.*}}ld" -- HOST_LINKER_FLAGS -dynamic-linker HOST_DYN_LIB -o a.out [[OUT1]] [[OUT3]] {{.*\.o}}

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 55 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -674,9 +674,8 @@ getTripleBasedSYCLPostLinkOpts(const ArgList &Args,
674674
// because it only increases amount of code for device compiler to handle,
675675
// without any actual benefits.
676676
// TODO: Try to extend this feature for non-Intel GPUs.
677-
if ((!Args.hasFlag(OPT_no_sycl_remove_unused_external_funcs,
678-
OPT_sycl_remove_unused_external_funcs, false) &&
679-
!Triple.isNativeCPU()) &&
677+
if (!Args.hasFlag(OPT_no_sycl_remove_unused_external_funcs,
678+
OPT_sycl_remove_unused_external_funcs, false) &&
680679
!Args.hasArg(OPT_sycl_allow_device_image_dependencies) &&
681680
!Triple.isNVPTX() && !Triple.isAMDGPU())
682681
PostLinkArgs.push_back("-emit-only-kernels-as-entry-points");
@@ -1567,8 +1566,7 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
15671566
return ClangPath.takeError();
15681567

15691568
llvm::Triple Triple(Args.getLastArgValue(OPT_triple_EQ));
1570-
if (Triple.isNativeCPU())
1571-
Triple = llvm::Triple(Args.getLastArgValue(OPT_host_triple_EQ));
1569+
llvm::Triple HostTriple(Args.getLastArgValue(OPT_host_triple_EQ));
15721570

15731571
StringRef Arch = Args.getLastArgValue(OPT_arch_EQ);
15741572
// Create a new file to write the linked device image to. Assume that the
@@ -1585,7 +1583,9 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
15851583
"--no-default-config",
15861584
"-o",
15871585
*TempFileOrErr,
1588-
Args.MakeArgString("--target=" + Triple.getTriple()),
1586+
Args.MakeArgString(
1587+
"--target=" +
1588+
(Triple.isNativeCPU() ? HostTriple : Triple).getTriple()),
15891589
};
15901590

15911591
if (!Arch.empty())
@@ -1602,16 +1602,24 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
16021602
{"-Xlinker",
16031603
Args.MakeArgString("--plugin-opt=" + StringRef(Arg->getValue()))});
16041604

1605-
if (!Triple.isNVPTX() && !Triple.isSPIRV())
1605+
if (!Triple.isNVPTX() && !Triple.isSPIRV() && !Triple.isNativeCPU())
16061606
CmdArgs.push_back("-Wl,--no-undefined");
16071607

16081608
if (IsSYCLKind && Triple.isNVPTX())
16091609
CmdArgs.push_back("-S");
1610+
1611+
if (IsSYCLKind && Triple.isNativeCPU()) {
1612+
CmdArgs.push_back("-Wno-override-module");
1613+
CmdArgs.push_back("-mllvm");
1614+
CmdArgs.push_back("-sycl-native-cpu-backend");
1615+
CmdArgs.push_back("-c");
1616+
}
1617+
16101618
for (StringRef InputFile : InputFiles)
16111619
CmdArgs.push_back(InputFile);
16121620

16131621
// If this is CPU offloading we copy the input libraries.
1614-
if (!Triple.isGPU()) {
1622+
if (!Triple.isGPU() && !Triple.isNativeCPU()) {
16151623
CmdArgs.push_back("-Wl,-Bsymbolic");
16161624
CmdArgs.push_back("-shared");
16171625
ArgStringList LinkerArgs;
@@ -1664,6 +1672,38 @@ Expected<StringRef> clang(ArrayRef<StringRef> InputFiles, const ArgList &Args,
16641672
Args.MakeArgString(Arg.split('=').second)});
16651673
}
16661674

1675+
// link NativeCPU utils lib if needed
1676+
if (Triple.isNativeCPU()) {
1677+
if (auto *A = Args.getLastArg(OPT_sycl_device_library_location_EQ)) {
1678+
std::string NativeCPUUtilsLib = "";
1679+
1680+
SmallVector<std::string, 8> LibraryPaths;
1681+
for (const auto &Path : A->getValues()) {
1682+
SmallString<128> LPath(Path);
1683+
if (llvm::sys::fs::exists(LPath)) {
1684+
LibraryPaths.emplace_back(LPath);
1685+
}
1686+
}
1687+
1688+
for (auto &LPath : LibraryPaths) {
1689+
// Call llvm-link without --only-needed to link to the nativecpu_utils
1690+
// lib
1691+
const char LibNativeCPUUtilsName[] = "libsycl-nativecpu_utils.bc";
1692+
SmallString<128> LibNativeCPUUtilsPath(LPath);
1693+
llvm::sys::path::append(LibNativeCPUUtilsPath, LibNativeCPUUtilsName);
1694+
if (llvm::sys::fs::exists(LibNativeCPUUtilsPath)) {
1695+
NativeCPUUtilsLib = LibNativeCPUUtilsPath.str();
1696+
break;
1697+
}
1698+
}
1699+
1700+
if (NativeCPUUtilsLib != "") {
1701+
CmdArgs.append({"-Xclang", "-mlink-bitcode-file", "-Xclang",
1702+
Args.MakeArgString(NativeCPUUtilsLib)});
1703+
}
1704+
}
1705+
}
1706+
16671707
// The OpenMPOpt pass can introduce new calls and is expensive, we do
16681708
// not want this when running CodeGen through clang.
16691709
if (Args.hasArg(OPT_clang_backend) || Args.hasArg(OPT_builtin_bitcode_EQ))
@@ -2137,6 +2177,13 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
21372177
SplitModules[I].ModuleFilePath = *BundledFileOrErr;
21382178
} else {
21392179
SplitModules[I].ModuleFilePath = *ClangOutputOrErr;
2180+
if (Triple.isNativeCPU()) {
2181+
// Add to WrappedOutput directly rather than combining this with the
2182+
// below because WrappedOutput holds references and
2183+
// SplitModules[I].ModuleFilePath will go out of scope too soon.
2184+
std::scoped_lock Guard(ImageMtx);
2185+
WrappedOutput.push_back(*ClangOutputOrErr);
2186+
}
21402187
}
21412188
}
21422189

sycl/test/native_cpu/atomic-base.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
// RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t
55
// RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t
66

7+
// RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t-new --offload-new-driver
8+
// RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t-new
9+
710
#include <sycl/sycl.hpp>
811
#include <vector>
912

sycl/test/native_cpu/multiple_tu.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@
1212
//RUN: %clangxx -fsycl -fsycl-targets=native_cpu -g %t_plusone-debug.o %t_main-debug.o %t_init-debug.o -o %t-debug
1313
//RUN: env ONEAPI_DEVICE_SELECTOR=native_cpu:cpu %t-debug
1414

15+
// New offload driver
16+
//RUN: %clangxx -fsycl -fsycl-targets=native_cpu --offload-new-driver %s -c -o %t_main-new_offload.o
17+
//RUN: %clangxx -fsycl -fsycl-targets=native_cpu --offload-new-driver %S/Inputs/init.cpp -c -o %t_init-new_offload.o
18+
//RUN: %clangxx -fsycl -fsycl-targets=native_cpu --offload-new-driver %S/Inputs/plusone.cpp -c -o %t_plusone-new_offload.o
19+
//RUN: %clangxx -fsycl -fsycl-targets=native_cpu --offload-new-driver %t_plusone-new_offload.o %t_main-new_offload.o %t_init-new_offload.o -o %t-new_offload
20+
//RUN: env ONEAPI_DEVICE_SELECTOR=native_cpu:cpu %t-new_offload
21+
1522
#include "Inputs/common.h"
1623
#include <iostream>
1724

sycl/test/native_cpu/vector-add.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
// verify the (profiling) outputs.
2323
// RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -fprofile-instr-generate -fcoverage-mapping -mllvm -system-headers-coverage -c -o %t
2424

25+
// Use new offload driver
26+
// RUN: %clangxx -fsycl -fsycl-targets=native_cpu %s -o %t-new --offload-new-driver
27+
// RUN: env ONEAPI_DEVICE_SELECTOR="native_cpu:cpu" %t-new
28+
2529
#include <sycl/sycl.hpp>
2630

2731
#include <array>

0 commit comments

Comments
 (0)