Skip to content

Commit

Permalink
[MSVC, ARM64] Add _Copy* and _Count* intrinsics (#66554)
Browse files Browse the repository at this point in the history
Implement the _Count* and _Copy* Windows ARM intrinsics:

```
double _CopyDoubleFromInt64(__int64)
float _CopyFloatFromInt32(__int32)
__int32 _CopyInt32FromFloat(float)
__int64 _CopyInt64FromDouble(double)
unsigned int _CountLeadingOnes(unsigned long)
unsigned int _CountLeadingOnes64(unsigned __int64)
unsigned int _CountLeadingSigns(long)
unsigned int _CountLeadingSigns64(__int64)
unsigned int _CountLeadingZeros(unsigned long)
unsigned int _CountLeadingZeros64(unsigned __int64)
unsigned int _CountOneBits(unsigned long)
unsigned int _CountOneBits64(unsigned __int64)
```

Full list of intrinsics here:
[https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics](https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics)

Bug: [65405](#65405)
  • Loading branch information
amykhuang authored Sep 21, 2023
1 parent 21e84e6 commit 03c698a
Show file tree
Hide file tree
Showing 4 changed files with 221 additions and 1 deletion.
15 changes: 14 additions & 1 deletion clang/include/clang/Basic/BuiltinsAArch64.def
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,6 @@ TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES,

TARGET_HEADER_BUILTIN(__break, "vi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")


TARGET_HEADER_BUILTIN(__writex18byte, "vUNiUc", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(__writex18word, "vUNiUs", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(__writex18dword, "vUNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
Expand All @@ -270,6 +269,20 @@ TARGET_HEADER_BUILTIN(__readx18word, "UsUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES,
TARGET_HEADER_BUILTIN(__readx18dword, "UNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(__readx18qword, "ULLiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")

TARGET_HEADER_BUILTIN(_CopyDoubleFromInt64, "dSLLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CopyFloatFromInt32, "fSi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CopyInt32FromFloat, "Sif", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CopyInt64FromDouble, "SLLid", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")

TARGET_HEADER_BUILTIN(_CountLeadingOnes, "UiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CountLeadingOnes64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CountLeadingSigns, "UiSNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CountLeadingSigns64, "UiSLLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CountLeadingZeros, "UiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CountLeadingZeros64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CountOneBits, "UiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_CountOneBits64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")

#undef BUILTIN
#undef LANGBUILTIN
#undef TARGET_BUILTIN
Expand Down
55 changes: 55 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10779,6 +10779,61 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Load;
}

if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
BuiltinID == AArch64::BI_CopyInt32FromFloat ||
BuiltinID == AArch64::BI_CopyInt64FromDouble) {
Value *Arg = EmitScalarExpr(E->getArg(0));
llvm::Type *RetTy = ConvertType(E->getType());
return Builder.CreateBitCast(Arg, RetTy);
}

if (BuiltinID == AArch64::BI_CountLeadingOnes ||
BuiltinID == AArch64::BI_CountLeadingOnes64 ||
BuiltinID == AArch64::BI_CountLeadingZeros ||
BuiltinID == AArch64::BI_CountLeadingZeros64) {
Value *Arg = EmitScalarExpr(E->getArg(0));
llvm::Type *ArgType = Arg->getType();

if (BuiltinID == AArch64::BI_CountLeadingOnes ||
BuiltinID == AArch64::BI_CountLeadingOnes64)
Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));

Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});

if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
BuiltinID == AArch64::BI_CountLeadingZeros64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}

if (BuiltinID == AArch64::BI_CountLeadingSigns ||
BuiltinID == AArch64::BI_CountLeadingSigns64) {
Value *Arg = EmitScalarExpr(E->getArg(0));

Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns)
? CGM.getIntrinsic(Intrinsic::aarch64_cls)
: CGM.getIntrinsic(Intrinsic::aarch64_cls64);

Value *Result = Builder.CreateCall(F, Arg, "cls");
if (BuiltinID == AArch64::BI_CountLeadingSigns64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}

if (BuiltinID == AArch64::BI_CountOneBits ||
BuiltinID == AArch64::BI_CountOneBits64) {
Value *ArgValue = EmitScalarExpr(E->getArg(0));
llvm::Type *ArgType = ArgValue->getType();
Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);

Value *Result = Builder.CreateCall(F, ArgValue);
if (BuiltinID == AArch64::BI_CountOneBits64)
Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
return Result;
}

// Handle MSVC intrinsics before argument evaluation to prevent double
// evaluation.
if (std::optional<MSVCIntrin> MsvcIntId =
Expand Down
14 changes: 14 additions & 0 deletions clang/lib/Headers/intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,20 @@ unsigned char __readx18byte(unsigned long offset);
unsigned short __readx18word(unsigned long offset);
unsigned long __readx18dword(unsigned long offset);
unsigned __int64 __readx18qword(unsigned long offset);

double _CopyDoubleFromInt64(__int64);
float _CopyFloatFromInt32(__int32);
__int32 _CopyInt32FromFloat(float);
__int64 _CopyInt64FromDouble(double);

unsigned int _CountLeadingOnes(unsigned long);
unsigned int _CountLeadingOnes64(unsigned __int64);
unsigned int _CountLeadingSigns(long);
unsigned int _CountLeadingSigns64(__int64);
unsigned int _CountLeadingZeros(unsigned long);
unsigned int _CountLeadingZeros64(unsigned _int64);
unsigned int _CountOneBits(unsigned long);
unsigned int _CountOneBits64(unsigned __int64);
#endif

/*----------------------------------------------------------------------------*\
Expand Down
138 changes: 138 additions & 0 deletions clang/test/CodeGen/arm64-microsoft-intrinsics.c
Original file line number Diff line number Diff line change
Expand Up @@ -265,5 +265,143 @@ unsigned __int64 check__readx18qword(unsigned LONG offset) {
// CHECK-MSCOMPAT: %[[RETVAL:.*]] = load i64, ptr %[[PTR]], align 1
// CHECK-MSCOMPAT: ret i64 %[[RETVAL]]

double check__CopyDoubleFromInt64(__int64 arg1) {
return _CopyDoubleFromInt64(arg1);
}

// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca i64, align 8
// CHECK-MSCOMPAT: store i64 %[[ARG]], ptr %[[ARG]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR1:.*]] = bitcast i64 %[[VAR0]] to double
// CHECK-MSCOMPAT: ret double %[[VAR1]]
// CHECK-LINUX: error: call to undeclared function '_CopyDoubleFromInt64'

float check__CopyFloatFromInt32(__int32 arg1) {
return _CopyFloatFromInt32(arg1);
}

// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca i32, align 4
// CHECK-MSCOMPAT: store i32 %[[ARG]], ptr %[[ARG]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR1:.*]] = bitcast i32 %[[VAR0]] to float
// CHECK-MSCOMPAT: ret float %[[VAR1]]
// CHECK-LINUX: error: call to undeclared function '_CopyFloatFromInt32'

__int32 check__CopyInt32FromFloat(float arg1) {
return _CopyInt32FromFloat(arg1);
}

// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca float, align 4
// CHECK-MSCOMPAT: store float %[[ARG]], ptr %[[ARG]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load float, ptr %[[ARG]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR1:.*]] = bitcast float %[[VAR0]] to i32
// CHECK-MSCOMPAT: ret i32 %[[VAR1]]
// CHECK-LINUX: error: call to undeclared function '_CopyInt32FromFloat'

__int64 check__CopyInt64FromDouble(double arg1) {
return _CopyInt64FromDouble(arg1);
}

// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca double, align 8
// CHECK-MSCOMPAT: store double %[[ARG]], ptr %[[ARG]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load double, ptr %[[ARG]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR1:.*]] = bitcast double %[[VAR0]] to i64
// CHECK-MSCOMPAT: ret i64 %[[VAR1]]
// CHECK-LINUX: error: call to undeclared function '_CopyInt64FromDouble'

unsigned int check__CountLeadingOnes(unsigned LONG arg1) {
return _CountLeadingOnes(arg1);
}

// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i32, align 4
// CHECK-MSCOMPAT: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR1:.*]] = xor i32 %[[VAR0]], -1
// CHECK-MSCOMPAT: %[[VAR2:.*]] = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
// CHECK-MSCOMPAT: ret i32 %[[VAR2]]
// CHECK-LINUX: error: call to undeclared function '_CountLeadingOnes'

unsigned int check__CountLeadingOnes64(unsigned __int64 arg1) {
return _CountLeadingOnes64(arg1);
}

// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i64, align 8
// CHECK-MSCOMPAT: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR1:.*]] = xor i64 %[[VAR0]], -1
// CHECK-MSCOMPAT: %[[VAR2:.*]] = call i64 @llvm.ctlz.i64(i64 %1, i1 false)
// CHECK-MSCOMPAT: %[[VAR3:.*]] = trunc i64 %2 to i32
// CHECK-MSCOMPAT: ret i32 %[[VAR3]]
// CHECK-LINUX: error: call to undeclared function '_CountLeadingOnes64'

unsigned int check__CountLeadingSigns(__int32 arg1) {
return _CountLeadingSigns(arg1);
}

// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i32, align 4
// CHECK-MSCOMPAT: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4
// CHECK-MSCOMPAT: %[[CLS:.*]] = call i32 @llvm.aarch64.cls(i32 %[[VAR0]])
// CHECK-MSCOMPAT: ret i32 %[[CLS]]
// CHECK-LINUX: error: call to undeclared function '_CountLeadingSigns'

unsigned int check__CountLeadingSigns64(__int64 arg1) {
return _CountLeadingSigns64(arg1);
}

// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i64, align 8
// CHECK-MSCOMPAT: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8
// CHECK-MSCOMPAT: %[[CLS:.*]] = call i32 @llvm.aarch64.cls64(i64 %[[VAR0]])
// CHECK-MSCOMPAT: ret i32 %[[CLS]]
// CHECK-LINUX: error: call to undeclared function '_CountLeadingSigns64'

unsigned int check__CountLeadingZeros(__int32 arg1) {
return _CountLeadingZeros(arg1);
}

// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i32, align 4
// CHECK-MSCOMPAT: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i32 @llvm.ctlz.i32(i32 %[[VAR0]], i1 false)
// CHECK-MSCOMPAT: ret i32 %[[VAR1]]
// CHECK-LINUX: error: call to undeclared function '_CountLeadingZeros'

unsigned int check__CountLeadingZeros64(__int64 arg1) {
return _CountLeadingZeros64(arg1);
}

// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i64, align 8
// CHECK-MSCOMPAT: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i64 @llvm.ctlz.i64(i64 %[[VAR0]], i1 false)
// CHECK-MSCOMPAT: %[[VAR2:.*]] = trunc i64 %[[VAR1]] to i32
// CHECK-MSCOMPAT: ret i32 %[[VAR2]]
// CHECK-LINUX: error: call to undeclared function '_CountLeadingZeros64'

unsigned int check_CountOneBits(unsigned LONG arg1) {
return _CountOneBits(arg1);
}

// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i32, align 4
// CHECK-MSCOMPAT: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4
// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i32 @llvm.ctpop.i32(i32 %0)
// CHECK-MSCOMPAT: ret i32 %[[VAR1]]
// CHECK-LINUX: error: call to undeclared function '_CountOneBits'

unsigned int check_CountOneBits64(unsigned __int64 arg1) {
return _CountOneBits64(arg1);
}

// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i64, align 8
// CHECK-MSCOMPAT: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8
// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i64 @llvm.ctpop.i64(i64 %0)
// CHECK-MSCOMPAT: %[[VAR2:.*]] = trunc i64 %1 to i32
// CHECK-MSCOMPAT: ret i32 %[[VAR2]]
// CHECK-LINUX: error: call to undeclared function '_CountOneBits64'


// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}

0 comments on commit 03c698a

Please sign in to comment.