Skip to content

[Arm64EC][clang] Implement varargs support in clang. #152411

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/ABIInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,10 @@ ABIInfo::getOptimalVectorMemoryType(llvm::FixedVectorType *T,
return T;
}

ABIArgInfo ABIInfo::classifyArgForArm64ECVarArg(QualType Ty) const {
llvm_unreachable("Only implemented for x86");
}

// Pin the vtable to this file.
SwiftABIInfo::~SwiftABIInfo() = default;

Expand Down
4 changes: 4 additions & 0 deletions clang/lib/CodeGen/ABIInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,10 @@ class ABIInfo {
virtual llvm::FixedVectorType *
getOptimalVectorMemoryType(llvm::FixedVectorType *T,
const LangOptions &Opt) const;

/// Used by Arm64EC calling convention code to call into x86 calling
/// convention code for varargs function.
virtual ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const;
};

/// Target specific hooks for defining how a type should be passed or returned
Expand Down
44 changes: 32 additions & 12 deletions clang/lib/CodeGen/Targets/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,16 @@ namespace {
class AArch64ABIInfo : public ABIInfo {
AArch64ABIKind Kind;

std::unique_ptr<TargetCodeGenInfo> WinX86_64CodegenInfo;

public:
AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
: ABIInfo(CGT), Kind(Kind) {}
AArch64ABIInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
: ABIInfo(CGM.getTypes()), Kind(Kind) {
if (getTarget().getTriple().isWindowsArm64EC()) {
WinX86_64CodegenInfo =
createWinX86_64TargetCodeGenInfo(CGM, X86AVXABILevel::None);
}
}

bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }

Expand Down Expand Up @@ -119,9 +126,9 @@ class AArch64SwiftABIInfo : public SwiftABIInfo {

class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
public:
AArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
: TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGT, Kind)) {
SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGT);
AArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind Kind)
: TargetCodeGenInfo(std::make_unique<AArch64ABIInfo>(CGM, Kind)) {
SwiftInfo = std::make_unique<AArch64SwiftABIInfo>(CGM.getTypes());
}

StringRef getARCRetainAutoreleasedReturnValueMarker() const override {
Expand Down Expand Up @@ -200,8 +207,8 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {

class WindowsAArch64TargetCodeGenInfo : public AArch64TargetCodeGenInfo {
public:
WindowsAArch64TargetCodeGenInfo(CodeGenTypes &CGT, AArch64ABIKind K)
: AArch64TargetCodeGenInfo(CGT, K) {}
WindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM, AArch64ABIKind K)
: AArch64TargetCodeGenInfo(CGM, K) {}

void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
CodeGen::CodeGenModule &CGM) const override;
Expand Down Expand Up @@ -368,6 +375,12 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
unsigned &NPRN) const {
Ty = useFirstFieldIfTransparentUnion(Ty);

if (IsVariadicFn && getTarget().getTriple().isWindowsArm64EC()) {
// Arm64EC varargs functions use the x86_64 classification rules,
// not the AArch64 ABI rules.
return WinX86_64CodegenInfo->getABIInfo().classifyArgForArm64ECVarArg(Ty);
}

// Handle illegal vector types here.
if (isIllegalVectorType(Ty))
return coerceIllegalVector(Ty, NSRN, NPRN);
Expand Down Expand Up @@ -1151,9 +1164,16 @@ RValue AArch64ABIInfo::EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
QualType Ty, AggValueSlot Slot) const {
bool IsIndirect = false;

// Composites larger than 16 bytes are passed by reference.
if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
IsIndirect = true;
if (getTarget().getTriple().isWindowsArm64EC()) {
// MS x64 ABI requirement: "Any argument that doesn't fit in 8 bytes, or is
// not 1, 2, 4, or 8 bytes, must be passed by reference."
uint64_t Width = getContext().getTypeSize(Ty);
IsIndirect = Width > 64 || !llvm::isPowerOf2_64(Width);
} else {
// Composites larger than 16 bytes are passed by reference.
if (isAggregateTypeForABI(Ty) && getContext().getTypeSize(Ty) > 128)
IsIndirect = true;
}

return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect,
CGF.getContext().getTypeInfoInChars(Ty),
Expand Down Expand Up @@ -1345,11 +1365,11 @@ void AArch64ABIInfo::appendAttributeMangling(StringRef AttrStr,
std::unique_ptr<TargetCodeGenInfo>
CodeGen::createAArch64TargetCodeGenInfo(CodeGenModule &CGM,
AArch64ABIKind Kind) {
return std::make_unique<AArch64TargetCodeGenInfo>(CGM.getTypes(), Kind);
return std::make_unique<AArch64TargetCodeGenInfo>(CGM, Kind);
}

std::unique_ptr<TargetCodeGenInfo>
CodeGen::createWindowsAArch64TargetCodeGenInfo(CodeGenModule &CGM,
AArch64ABIKind K) {
return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM.getTypes(), K);
return std::make_unique<WindowsAArch64TargetCodeGenInfo>(CGM, K);
}
6 changes: 6 additions & 0 deletions clang/lib/CodeGen/Targets/X86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1409,6 +1409,12 @@ class WinX86_64ABIInfo : public ABIInfo {
return isX86VectorCallAggregateSmallEnough(NumMembers);
}

ABIArgInfo classifyArgForArm64ECVarArg(QualType Ty) const override {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why we do Arm64EC in X86.cpp? Should use a common name if it's shared?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

classifyArgForArm64ECVarArg is exclusively for use by Arm64EC calling convention code... but it's implemented in the x64 calling convention code because it's exactly the algorithm we've already implemented for x64, and I don't want to copy-paste it.

I could refactor the code further to extract the underlying algorithm out of WinX86_64ABIInfo, but that doesn't seem like it would make the code more readable overall.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add some comments to explain this for anyone else who is wondering why there's Arm in their X86, I'm not sure that factoring out this code really helps with readability or maintainability.

unsigned FreeSSERegs = 0;
return classify(Ty, FreeSSERegs, /*IsReturnType=*/false,
/*IsVectorCall=*/false, /*IsRegCall=*/false);
}

private:
ABIArgInfo classify(QualType Ty, unsigned &FreeSSERegs, bool IsReturnType,
bool IsVectorCall, bool IsRegCall) const;
Expand Down
59 changes: 59 additions & 0 deletions clang/test/CodeGen/arm64ec-varargs.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs --global-value-regex "f"
// RUN: %clang_cc1 -opaque-pointers -triple arm64ec-windows-msvc -emit-llvm -o - %s | FileCheck %s

typedef struct { float x[2]; } A;
typedef struct { float x[4]; } B;
void f(A a, ...) {
__builtin_va_list b;
__builtin_va_start(b, a);
float x = __builtin_va_arg(b, A).x[0];
float y = __builtin_va_arg(b, B).x[0];
}
void g(A a, B b) { f(a, b); }
Comment on lines +4 to +12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice to have a test with a size that isn’t a power of two.
Looks good to me overall.


// CHECK-LABEL: @f(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4
// CHECK-NEXT: [[B:%.*]] = alloca ptr, align 8
// CHECK-NEXT: [[X:%.*]] = alloca float, align 4
// CHECK-NEXT: [[REF_TMP:%.*]] = alloca [[STRUCT_A]], align 4
// CHECK-NEXT: [[Y:%.*]] = alloca float, align 4
// CHECK-NEXT: [[REF_TMP2:%.*]] = alloca [[STRUCT_B:%.*]], align 4
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: store i64 [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4
// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[B]])
// CHECK-NEXT: [[ARGP_CUR:%.*]] = load ptr, ptr [[B]], align 8
// CHECK-NEXT: [[ARGP_NEXT:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR]], i64 8
// CHECK-NEXT: store ptr [[ARGP_NEXT]], ptr [[B]], align 8
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP]], ptr align 8 [[ARGP_CUR]], i64 8, i1 false)
// CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[REF_TMP]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x float], ptr [[X1]], i64 0, i64 0
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX]], align 4
// CHECK-NEXT: store float [[TMP0]], ptr [[X]], align 4
// CHECK-NEXT: [[ARGP_CUR3:%.*]] = load ptr, ptr [[B]], align 8
// CHECK-NEXT: [[ARGP_NEXT4:%.*]] = getelementptr inbounds i8, ptr [[ARGP_CUR3]], i64 8
// CHECK-NEXT: store ptr [[ARGP_NEXT4]], ptr [[B]], align 8
// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARGP_CUR3]], align 8
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[REF_TMP2]], ptr align 4 [[TMP1]], i64 16, i1 false)
// CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[REF_TMP2]], i32 0, i32 0
// CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x float], ptr [[X5]], i64 0, i64 0
// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
// CHECK-NEXT: store float [[TMP2]], ptr [[Y]], align 4
// CHECK-NEXT: ret void
//
//
// CHECK-LABEL: @g(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[A:%.*]] = alloca [[STRUCT_A:%.*]], align 4
// CHECK-NEXT: [[B:%.*]] = alloca [[STRUCT_B:%.*]], align 4
// CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca [[STRUCT_B]], align 4
// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: store [2 x float] [[A_COERCE:%.*]], ptr [[COERCE_DIVE]], align 4
// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[STRUCT_B]], ptr [[B]], i32 0, i32 0
// CHECK-NEXT: store [4 x float] [[B_COERCE:%.*]], ptr [[COERCE_DIVE1]], align 4
// CHECK-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[STRUCT_A]], ptr [[A]], i32 0, i32 0
// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[COERCE_DIVE2]], align 4
// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[BYVAL_TEMP]], ptr align 4 [[B]], i64 16, i1 false)
// CHECK-NEXT: call void (i64, ...) @f(i64 [[TMP0]], ptr dead_on_return noundef [[BYVAL_TEMP]])
// CHECK-NEXT: ret void
//