Skip to content

Commit fb458aa

Browse files
committed
Reland "[LLVM] Add GNU make jobserver support (#145131)"
With fix for JobServerTest where default parallel scheduling strategy is saved/restored.
1 parent 9c118aa commit fb458aa

File tree

18 files changed

+1398
-30
lines changed

18 files changed

+1398
-30
lines changed

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1258,8 +1258,9 @@ def offload_compression_level_EQ : Joined<["--"], "offload-compression-level=">,
12581258
HelpText<"Compression level for offload device binaries (HIP only)">;
12591259

12601260
def offload_jobs_EQ : Joined<["--"], "offload-jobs=">,
1261-
HelpText<"Specify the number of threads to use for device offloading tasks"
1262-
" during compilation.">;
1261+
HelpText<"Specify the number of threads to use for device offloading tasks "
1262+
"during compilation. Can be a positive integer or the string "
1263+
"'jobserver' to use the make-style jobserver from the environment.">;
12631264

12641265
defm offload_via_llvm : BoolFOption<"offload-via-llvm",
12651266
LangOpts<"OffloadViaLLVM">, DefaultFalse,

clang/lib/Driver/ToolChains/Clang.cpp

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9224,14 +9224,20 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA,
92249224
addOffloadCompressArgs(Args, CmdArgs);
92259225

92269226
if (Arg *A = Args.getLastArg(options::OPT_offload_jobs_EQ)) {
9227-
int NumThreads;
9228-
if (StringRef(A->getValue()).getAsInteger(10, NumThreads) ||
9229-
NumThreads <= 0)
9230-
C.getDriver().Diag(diag::err_drv_invalid_int_value)
9231-
<< A->getAsString(Args) << A->getValue();
9232-
else
9233-
CmdArgs.push_back(
9234-
Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads)));
9227+
StringRef Val = A->getValue();
9228+
9229+
if (Val.equals_insensitive("jobserver"))
9230+
CmdArgs.push_back(Args.MakeArgString("--wrapper-jobs=jobserver"));
9231+
else {
9232+
int NumThreads;
9233+
if (Val.getAsInteger(10, NumThreads) || NumThreads <= 0) {
9234+
C.getDriver().Diag(diag::err_drv_invalid_int_value)
9235+
<< A->getAsString(Args) << Val;
9236+
} else {
9237+
CmdArgs.push_back(
9238+
Args.MakeArgString("--wrapper-jobs=" + Twine(NumThreads)));
9239+
}
9240+
}
92359241
}
92369242

92379243
const char *Exec =

clang/test/Driver/hip-options.hip

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,3 +254,9 @@
254254
// RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=0x4 %s 2>&1 | \
255255
// RUN: FileCheck -check-prefix=INVJOBS %s
256256
// INVJOBS: clang: error: invalid integral value '0x4' in '--offload-jobs=0x4'
257+
258+
// RUN: %clang -### -Werror --target=x86_64-unknown-linux-gnu -nogpuinc -nogpulib \
259+
// RUN: --offload-arch=gfx1100 --offload-new-driver --offload-jobs=jobserver %s 2>&1 | \
260+
// RUN: FileCheck -check-prefix=JOBSV %s
261+
// JOBSV: clang-linker-wrapper{{.*}} "--wrapper-jobs=jobserver"
262+

clang/test/Driver/linker-wrapper.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,8 @@ __attribute__((visibility("protected"), used)) int x;
114114
// RUN: -fembed-offload-object=%t.out
115115
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=4 \
116116
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR
117+
// RUN: clang-linker-wrapper --dry-run --host-triple=x86_64-unknown-linux-gnu --wrapper-jobs=jobserver \
118+
// RUN: --linker-path=/usr/bin/ld %t.o -o a.out 2>&1 | FileCheck %s --check-prefix=CUDA-PAR
117119

118120
// CUDA-PAR: fatbinary{{.*}}-64 --create {{.*}}.fatbin
119121

clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1295,12 +1295,18 @@ int main(int Argc, char **Argv) {
12951295

12961296
parallel::strategy = hardware_concurrency(1);
12971297
if (auto *Arg = Args.getLastArg(OPT_wrapper_jobs)) {
1298-
unsigned Threads = 0;
1299-
if (!llvm::to_integer(Arg->getValue(), Threads) || Threads == 0)
1300-
reportError(createStringError("%s: expected a positive integer, got '%s'",
1301-
Arg->getSpelling().data(),
1302-
Arg->getValue()));
1303-
parallel::strategy = hardware_concurrency(Threads);
1298+
StringRef Val = Arg->getValue();
1299+
if (Val.equals_insensitive("jobserver"))
1300+
parallel::strategy = jobserver_concurrency();
1301+
else {
1302+
unsigned Threads = 0;
1303+
if (!llvm::to_integer(Val, Threads) || Threads == 0)
1304+
reportError(createStringError(
1305+
"%s: expected a positive integer or 'jobserver', got '%s'",
1306+
Arg->getSpelling().data(), Val.data()));
1307+
else
1308+
parallel::strategy = hardware_concurrency(Threads);
1309+
}
13041310
}
13051311

13061312
if (Args.hasArg(OPT_wrapper_time_trace_eq)) {

clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ def wrapper_time_trace_granularity : Joined<["--"], "wrapper-time-trace-granular
5353

5454
def wrapper_jobs : Joined<["--"], "wrapper-jobs=">,
5555
Flags<[WrapperOnlyOption]>, MetaVarName<"<number>">,
56-
HelpText<"Sets the number of parallel jobs to use for device linking">;
56+
HelpText<"Sets the number of parallel jobs for device linking. Can be a "
57+
"positive integer or 'jobserver'.">;
5758

5859
def override_image : Joined<["--"], "override-image=">,
5960
Flags<[WrapperOnlyOption]>, MetaVarName<"<kind=file>">,
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
//===- llvm/Support/Jobserver.h - Jobserver Client --------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This file defines a client for the GNU Make jobserver protocol. This allows
10+
// LLVM tools to coordinate parallel execution with a parent `make` process.
11+
//
12+
// The jobserver protocol is a mechanism for GNU Make to share its pool of
13+
// available "job slots" with the subprocesses it invokes. This is particularly
14+
// useful for tools that can perform parallel operations themselves (e.g., a
15+
// multi-threaded linker or compiler). By participating in this protocol, a
16+
// tool can ensure the total number of concurrent jobs does not exceed the
17+
// limit specified by the user (e.g., `make -j8`).
18+
//
19+
// How it works:
20+
//
21+
// 1. Establishment:
22+
// A child process discovers the jobserver by inspecting the `MAKEFLAGS`
23+
// environment variable. If a jobserver is active, this variable will
24+
// contain a `--jobserver-auth=<value>` argument. The format of `<value>`
25+
// determines how to communicate with the server.
26+
//
27+
// 2. The Implicit Slot:
28+
// Every command invoked by `make` is granted one "implicit" job slot. This
29+
// means a tool can always perform at least one unit of work without needing
30+
// to communicate with the jobserver. This implicit slot should NEVER be
31+
// released back to the jobserver.
32+
//
33+
// 3. Acquiring and Releasing Slots:
34+
// On POSIX systems, the jobserver is implemented as a pipe. The
35+
// `--jobserver-auth` value specifies either a path to a named pipe
36+
// (`fifo:PATH`) or a pair of file descriptors (`R,W`). The pipe is
37+
// pre-loaded with single-character tokens, one for each available job slot.
38+
//
39+
// - To acquire an additional slot, a client reads a single-character token
40+
// from the pipe.
41+
// - To release a slot, the client must write the *exact same* character
42+
// token back to the pipe.
43+
//
44+
// It is critical that a client releases all acquired slots before it exits,
45+
// even in cases of error, to avoid deadlocking the build.
46+
//
47+
// Example:
48+
// A multi-threaded linker invoked by `make -j8` wants to use multiple
49+
// threads. It first checks for the jobserver. It knows it has one implicit
50+
// slot, so it can use one thread. It then tries to acquire 7 more slots by
51+
// reading 7 tokens from the jobserver pipe. If it only receives 3 tokens,
52+
// it knows it can use a total of 1 (implicit) + 3 (acquired) = 4 threads.
53+
// Before exiting, it must write the 3 tokens it read back to the pipe.
54+
//
55+
// For more context, see:
56+
// - GNU Make manual on job slots:
57+
// https://www.gnu.org/software/make/manual/html_node/Job-Slots.html
58+
// - LLVM RFC discussion on jobserver support:
59+
// https://discourse.llvm.org/t/rfc-adding-gnu-make-jobserver-
60+
// support-to-llvm-for-coordinated-parallelism/87034
61+
// - Ninja’s jobserver support PR:
62+
// https://github.com/ninja-build/ninja/pull/2506
63+
//
64+
//===----------------------------------------------------------------------===//
65+
66+
#ifndef LLVM_SUPPORT_JOBSERVER_H
67+
#define LLVM_SUPPORT_JOBSERVER_H
68+
69+
#include "llvm/ADT/StringRef.h"
70+
#include <memory>
71+
#include <string>
72+
73+
namespace llvm {
74+
75+
/// A JobSlot represents a single job slot that can be acquired from or released
76+
/// to a jobserver pool. This class is move-only.
77+
class JobSlot {
78+
public:
79+
/// Default constructor creates an invalid instance.
80+
JobSlot() = default;
81+
82+
// Move operations are allowed.
83+
JobSlot(JobSlot &&Other) noexcept : Value(Other.Value) {
84+
Other.Value = kInvalidValue;
85+
}
86+
JobSlot &operator=(JobSlot &&Other) noexcept {
87+
if (this != &Other) {
88+
this->Value = Other.Value;
89+
Other.Value = kInvalidValue;
90+
}
91+
return *this;
92+
}
93+
94+
// Copy operations are disallowed.
95+
JobSlot(const JobSlot &) = delete;
96+
JobSlot &operator=(const JobSlot &) = delete;
97+
98+
/// Returns true if this instance is valid (either implicit or explicit).
99+
bool isValid() const { return Value >= 0; }
100+
101+
/// Returns true if this instance represents the implicit job slot.
102+
bool isImplicit() const { return Value == kImplicitValue; }
103+
104+
static JobSlot createExplicit(uint8_t V) {
105+
return JobSlot(static_cast<int16_t>(V));
106+
}
107+
108+
static JobSlot createImplicit() { return JobSlot(kImplicitValue); }
109+
110+
uint8_t getExplicitValue() const;
111+
bool isExplicit() const { return isValid() && !isImplicit(); }
112+
113+
private:
114+
friend class JobserverClient;
115+
friend class JobserverClientImpl;
116+
117+
JobSlot(int16_t V) : Value(V) {}
118+
119+
/// The jobserver pipe carries explicit tokens (bytes 0–255). We reserve two
120+
/// sentinels in Value for special cases:
121+
/// kInvalidValue (-1): no slot held
122+
/// kImplicitValue (INT16_MAX): implicit slot granted at startup (no pipe
123+
/// I/O)
124+
///
125+
/// We use int16_t so Value can store 0–255 explicit tokens and
126+
/// sentinels without overflow, enforces fixed 16-bit width, and avoids
127+
/// unsigned/signed mix-ups.
128+
static constexpr int16_t kInvalidValue = -1;
129+
static constexpr int16_t kImplicitValue = INT16_MAX;
130+
int16_t Value = kInvalidValue;
131+
};
132+
133+
/// The public interface for a jobserver client.
134+
/// This client is a lazy-initialized singleton that is created on first use.
135+
class JobserverClient {
136+
public:
137+
virtual ~JobserverClient();
138+
139+
/// Tries to acquire a job slot from the pool. On failure (e.g., if the pool
140+
/// is empty), this returns an invalid JobSlot instance. The first successful
141+
/// call will always return the implicit slot.
142+
virtual JobSlot tryAcquire() = 0;
143+
144+
/// Releases a job slot back to the pool.
145+
virtual void release(JobSlot Slot) = 0;
146+
147+
/// Returns the number of job slots available, as determined on first use.
148+
/// This value is cached. Returns 0 if no jobserver is active.
149+
virtual unsigned getNumJobs() const = 0;
150+
151+
/// Returns the singleton instance of the JobserverClient.
152+
/// The instance is created on the first call to this function.
153+
/// Returns a nullptr if no jobserver is configured or an error occurs.
154+
static JobserverClient *getInstance();
155+
156+
/// Resets the singleton instance. For testing purposes only.
157+
static void resetForTesting();
158+
};
159+
160+
} // end namespace llvm
161+
162+
#endif // LLVM_SUPPORT_JOBSERVER_H

llvm/include/llvm/Support/ThreadPool.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "llvm/ADT/DenseMap.h"
1717
#include "llvm/Config/llvm-config.h"
1818
#include "llvm/Support/Compiler.h"
19+
#include "llvm/Support/Jobserver.h"
1920
#include "llvm/Support/RWMutex.h"
2021
#include "llvm/Support/Threading.h"
2122
#include "llvm/Support/thread.h"
@@ -180,6 +181,7 @@ class LLVM_ABI StdThreadPool : public ThreadPoolInterface {
180181
void grow(int requested);
181182

182183
void processTasks(ThreadPoolTaskGroup *WaitingForGroup);
184+
void processTasksWithJobserver();
183185

184186
/// Threads in flight
185187
std::vector<llvm::thread> Threads;
@@ -208,6 +210,8 @@ class LLVM_ABI StdThreadPool : public ThreadPoolInterface {
208210

209211
/// Maximum number of threads to potentially grow this pool to.
210212
const unsigned MaxThreadCount;
213+
214+
JobserverClient *TheJobserver = nullptr;
211215
};
212216
#endif // LLVM_ENABLE_THREADS
213217

llvm/include/llvm/Support/Threading.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
142142
/// the thread shall remain on the actual CPU socket.
143143
LLVM_ABI std::optional<unsigned>
144144
compute_cpu_socket(unsigned ThreadPoolNum) const;
145+
146+
/// If true, the thread pool will attempt to coordinate with a GNU Make
147+
/// jobserver, acquiring a job slot before processing a task. If no
148+
/// jobserver is found in the environment, this is ignored.
149+
bool UseJobserver = false;
145150
};
146151

147152
/// Build a strategy from a number of threads as a string provided in \p Num.
@@ -210,6 +215,19 @@ constexpr bool llvm_is_multithreaded() { return LLVM_ENABLE_THREADS; }
210215
return S;
211216
}
212217

218+
/// Returns a thread strategy that attempts to coordinate with a GNU Make
219+
/// jobserver. The number of active threads will be limited by the number of
220+
/// available job slots. If no jobserver is detected in the environment, this
221+
/// strategy falls back to the default hardware_concurrency() behavior.
222+
inline ThreadPoolStrategy jobserver_concurrency() {
223+
ThreadPoolStrategy S;
224+
S.UseJobserver = true;
225+
// We can still request all threads be created, as they will simply
226+
// block waiting for a job slot if the jobserver is the limiting factor.
227+
S.ThreadsRequested = 0; // 0 means 'use all available'
228+
return S;
229+
}
230+
213231
/// Return the current thread id, as used in various OS system calls.
214232
/// Note that not all platforms guarantee that the value returned will be
215233
/// unique across the entire system, so portable code should not assume

llvm/lib/Support/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,7 @@ add_llvm_component_library(LLVMSupport
207207
InstructionCost.cpp
208208
IntEqClasses.cpp
209209
IntervalMap.cpp
210+
Jobserver.cpp
210211
JSON.cpp
211212
KnownBits.cpp
212213
KnownFPClass.cpp

0 commit comments

Comments
 (0)