Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[X86] Enhance kCFI type IDs with a 3-bit arity indicator. #117121

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2196,7 +2196,8 @@ llvm::ConstantInt *CodeGenModule::CreateCrossDsoCfiTypeId(llvm::Metadata *MD) {
}

llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) {
if (auto *FnType = T->getAs<FunctionProtoType>())
auto *FnType = T->getAs<FunctionProtoType>();
if (FnType)
T = getContext().getFunctionType(
FnType->getReturnType(), FnType->getParamTypes(),
FnType->getExtProtoInfo().withExceptionSpec(EST_None));
Expand All @@ -2209,8 +2210,32 @@ llvm::ConstantInt *CodeGenModule::CreateKCFITypeId(QualType T) {
if (getCodeGenOpts().SanitizeCfiICallNormalizeIntegers)
Out << ".normalized";

return llvm::ConstantInt::get(Int32Ty,
static_cast<uint32_t>(llvm::xxHash64(OutName)));
uint32_t OutHash = static_cast<uint32_t>(llvm::xxHash64(OutName));
const auto &Triple = getTarget().getTriple();
if (FnType && Triple.isX86() && Triple.isArch64Bit() && Triple.isOSLinux()) {
// Estimate the function's arity (i.e., the number of arguments) at the ABI
// level by counting the number of parameters that are likely to be passed
// as registers, such as pointers and 64-bit (or smaller) integers. The
// Linux x86-64 ABI allows up to 6 parameters to be passed in GPRs.
// Additional parameters or parameters larger than 64 bits may be passed on
// the stack, in which case the arity is denoted as 7.
bool MayHaveStackArgs = FnType->getNumParams() > 6;

for (unsigned int i = 0; !MayHaveStackArgs && i < FnType->getNumParams();
++i) {
const Type *PT = FnType->getParamType(i).getTypePtr();
if (!(PT->isPointerType() || (PT->isIntegralOrEnumerationType() &&
getContext().getTypeSize(PT) <= 64)))
MayHaveStackArgs = true;
}

// The 3-bit arity is concatenated with the lower 29 bits of the KCFI hash
// to form an enhanced KCFI type ID. This can prevent, for example, a
// 3-arity function's ID from ever colliding with a 2-arity function's ID.
OutHash = (OutHash << 3) | (MayHaveStackArgs ? 7 : FnType->getNumParams());
}

return llvm::ConstantInt::get(Int32Ty, OutHash);
}

void CodeGenModule::SetLLVMFunctionAttributes(GlobalDecl GD,
Expand Down
18 changes: 12 additions & 6 deletions clang/test/CodeGen/kcfi-normalize.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,25 +10,31 @@
void foo(void (*fn)(int), int arg) {
// CHECK-LABEL: define{{.*}}foo
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE1:[0-9]+]]
// CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 1162514891) ]
// KCFI ID = 0x2A548E59
// CHECK: call void %0(i32 noundef %1){{.*}}[ "kcfi"(i32 710184537) ]
fn(arg);
}

void bar(void (*fn)(int, int), int arg1, int arg2) {
// CHECK-LABEL: define{{.*}}bar
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE2:[0-9]+]]
// CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 448046469) ]
// KCFI ID = 0xD5A52C2A
// CHECK: call void %0(i32 noundef %1, i32 noundef %2){{.*}}[ "kcfi"(i32 -710595542) ]
fn(arg1, arg2);
}

void baz(void (*fn)(int, int, int), int arg1, int arg2, int arg3) {
// CHECK-LABEL: define{{.*}}baz
// CHECK-SAME: {{.*}}!kcfi_type ![[TYPE3:[0-9]+]]
// CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 -2049681433) ]
// KCFI ID = 0x2EA2BF3B
// CHECK: call void %0(i32 noundef %1, i32 noundef %2, i32 noundef %3){{.*}}[ "kcfi"(i32 782417723) ]
fn(arg1, arg2, arg3);
}

// CHECK: ![[#]] = !{i32 4, !"cfi-normalize-integers", i32 1}
// CHECK: ![[TYPE1]] = !{i32 -1143117868}
// CHECK: ![[TYPE2]] = !{i32 -460921415}
// CHECK: ![[TYPE3]] = !{i32 -333839615}
// KCFI ID = DEEB3EA2
// CHECK: ![[TYPE1]] = !{i32 -555008350}
// KCFI ID = 24372DCB
// CHECK: ![[TYPE2]] = !{i32 607595979}
// KCFI ID = 0x60D0180C
// CHECK: ![[TYPE3]] = !{i32 1624250380}
22 changes: 19 additions & 3 deletions clang/test/CodeGen/kcfi.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@

/// Must emit __kcfi_typeid symbols for address-taken function declarations
// CHECK: module asm ".weak __kcfi_typeid_[[F4:[a-zA-Z0-9_]+]]"
// CHECK: module asm ".set __kcfi_typeid_[[F4]], [[#%d,HASH:]]"
/// Must not __kcfi_typeid symbols for non-address-taken declarations
// CHECK-NOT: module asm ".weak __kcfi_typeid_{{f6|_Z2f6v}}"

Expand All @@ -29,7 +28,7 @@ int __call(fn_t f) __attribute__((__no_sanitize__("kcfi"))) {

// CHECK: define dso_local{{.*}} i32 @{{call|_Z4callPFivE}}(ptr{{.*}} %f){{.*}}
int call(fn_t f) {
// CHECK: call{{.*}} i32 %{{.}}(){{.*}} [ "kcfi"(i32 [[#HASH]]) ]
// CHECK: call{{.*}} i32 %{{.}}(){{.*}} [ "kcfi"(i32 [[#%d,HASH:]]) ]
return f();
}

Expand All @@ -48,6 +47,20 @@ static int f5(void) { return 2; }
// CHECK-DAG: declare !kcfi_type ![[#TYPE]]{{.*}} i32 @{{f6|_Z2f6v}}()
extern int f6(void);

typedef struct {
int *p1;
int *p2[16];
} s_t;

// CHECK: define internal{{.*}} i32 @{{f7|_ZL2f7PFi3s_tEPS_}}(ptr{{.*}} %f, ptr{{.*}} %s){{.*}}
static int f7(int (*f)(s_t), s_t *s) {
// CHECK: call{{.*}} i32 %{{.*}} [ "kcfi"(i32 [[#%d,HASH4:]]) ]
return f(*s) + 1;
}

// CHECK: define internal{{.*}} i32 @{{f8|_ZL2f83s_t}}(ptr{{.*}} %s){{.*}} !kcfi_type ![[#%d,TYPE4:]]
static int f8(s_t s) { return 0; }

#ifndef __cplusplus
// C: define internal ptr @resolver1() #[[#]] !kcfi_type ![[#]] {
int ifunc1(int) __attribute__((ifunc("resolver1")));
Expand All @@ -59,12 +72,14 @@ long ifunc2(long) __attribute__((ifunc("resolver2")));
#endif

int test(void) {
s_t s;
return call(f1) +
__call((fn_t)f2) +
call(f3) +
call(f4) +
f5() +
f6();
f6() +
f7(f8, &s);
}

#ifdef __cplusplus
Expand All @@ -85,3 +100,4 @@ void test_member_call(void) {
// CHECK-DAG: ![[#TYPE]] = !{i32 [[#HASH]]}
// CHECK-DAG: ![[#TYPE2]] = !{i32 [[#%d,HASH2:]]}
// MEMBER-DAG: ![[#TYPE3]] = !{i32 [[#HASH3]]}
// CHECK-DAG: ![[#TYPE4]] = !{i32 [[#HASH4]]}
31 changes: 28 additions & 3 deletions llvm/lib/Transforms/Utils/ModuleUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
//===----------------------------------------------------------------------===//

#include "llvm/Transforms/Utils/ModuleUtils.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
Expand All @@ -21,6 +21,7 @@
#include "llvm/Support/MD5.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/xxhash.h"
#include "llvm/TargetParser/Triple.h"

using namespace llvm;

Expand Down Expand Up @@ -208,10 +209,34 @@ void llvm::setKCFIType(Module &M, Function &F, StringRef MangledType) {
std::string Type = MangledType.str();
if (M.getModuleFlag("cfi-normalize-integers"))
Type += ".normalized";

uint32_t OutHash = static_cast<uint32_t>(llvm::xxHash64(Type));
Triple T(M.getTargetTriple());
if (T.isX86() && T.isArch64Bit() && T.isOSLinux()) {
// Estimate the function's arity (i.e., the number of arguments) at the ABI
// level by counting the number of parameters that are likely to be passed
// as registers, such as pointers and 64-bit (or smaller) integers. The
// Linux x86-64 ABI allows up to 6 parameters to be passed in GPRs.
// Additional parameters or parameters larger than 64 bits may be passed on
// the stack, in which case the arity is denoted as 7.
size_t NumParams = F.arg_size();
bool MayHaveStackArgs = NumParams > 6;

for (unsigned int i = 0; !MayHaveStackArgs && i < NumParams; ++i) {
const llvm::Type *PT = F.getArg(i)->getType();
if (!(PT->isPointerTy() || PT->getIntegerBitWidth() <= 64))
Copy link
Contributor Author

@scottconstable scottconstable Nov 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the if condition equivalent to what I wrote in CodeGenModule::CreateKCFITypeId with clang::Type? Specifically, is

      // typeof(*PT) = clang::Type
      if (!(PT->isPointerType() || (PT->isIntegralOrEnumerationType() &&
                                    getContext().getTypeSize(PT) <= 64)))

equivalent to:

      // typeof(*PT) = llvm::Type
      if (!(PT->isPointerTy() || PT->getIntegerBitWidth() <= 64))

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Front end like Clang has solved it already. I think we can simply checking the number.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It appears that clang does not reserve stack for large arguments and instead this is done later by the LLVM X86 backend. For example:

struct S {
    int *p1;
    int *p2;
    int array[8];
};

int foo(struct S s, struct S *sp) {
    return *s.p1 + *s.p2 + *sp->p1 + *sp->p2;
}

Then when I compile to LLVM IR I see:

define dso_local i32 @foo(ptr noundef byval(%struct.S) align 8 %s, ptr noundef %sp) #0 {

Which suggests an arity of 2. But the X86 backend transforms foo to pass s on the stack, and then sp becomes the sole argument and is passed in rdi. Hence, by the chart in the PR description, this should be treated as an arity-7 function:

Arity Indicator Description
0 0 parameters
1 1 parameter in RDI
2 2 parameters in RDI and RSI
3 3 parameters in RDI, RSI, and RDX
4 4 parameters in RDI, RSI, RDX, and RCX
5 5 parameters in RDI, RSI, RDX, RCX, and R8
6 6 parameters in RDI, RSI, RDX, RCX, R8, and R9
7 At least one parameter may be passed on the stack

This predicate:

      // typeof(*PT) = llvm::Type
      if (!(PT->isPointerTy() || PT->getIntegerBitWidth() <= 64))
        MayHaveStackArgs = true;

should prevent s from being counted as a register argument and correctly set the arity field to 7.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, byval is an exception. You can use hasPassPointeeByValueCopyAttr() to check it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the suggestion. I looked at llvm::Argument::hasPassPointeeByValueCopyAttr(), but it looks like it is only available where a function is being defined. It does not appear to be available where a call is made through a function pointer. Therefore, I'm not sure that llvm::Argument::hasPassPointeeByValueCopyAttr() will be helpful since KCFI requires the ID to be computed identically at both the call site and the call target.

Or, do you think I am overlooking something, and that there is a way to use llvm::Argument::hasPassPointeeByValueCopyAttr() or something similar at an indirect call site? As far as I can tell, the only information that is available at an indirect call site is the function pointer type, which does contain the number of arguments and their types, but does not appear to contain an indication as to whether an argument may be passed on the stack.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is an argument attribute, should be identical when defined and called, otherwise, we will have mismatch issue. I assume a simply use like this should be ok.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @phoebewang, KCFI only computes hashes for indirect calls, not direct ones. The example you cited uses CallBase::getCalledFunction(), whose documentation reads "Returns the function called, or null if this is an indirect function invocation or the function signature does not match the call signature."

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, I don't understand the point of indirect calls. Here setKCFIType has an argument F which never will be null. Why do we need another getCalledFunction()?

MayHaveStackArgs = true;
}

// The 3-bit arity is concatenated with the lower 29 bits of the KCFI hash
// to form an enhanced KCFI type ID. This can prevent, for example, a
// 3-arity function's ID from ever colliding with a 2-arity function's ID.
OutHash = (OutHash << 3) | (MayHaveStackArgs ? 7 : NumParams);
}

F.setMetadata(LLVMContext::MD_kcfi_type,
MDNode::get(Ctx, MDB.createConstant(ConstantInt::get(
Type::getInt32Ty(Ctx),
static_cast<uint32_t>(xxHash64(Type))))));
Type::getInt32Ty(Ctx), OutHash))));
// If the module was compiled with -fpatchable-function-entry, ensure
// we use the same patchable-function-prefix.
if (auto *MD = mdconst::extract_or_null<ConstantInt>(
Expand Down
7 changes: 4 additions & 3 deletions llvm/test/Transforms/GCOVProfiling/kcfi-normalize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
; RUN: mkdir -p %t && cd %t
; RUN: opt < %s -S -passes=insert-gcov-profiling \
; RUN: -mtriple=x86_64-unknown-linux-gnu | FileCheck \
; RUN: --check-prefixes=CHECK,CHECK-CTOR-INIT %s
; RUN: --check-prefixes=CHECK,CHECK-CTOR-INIT,CHECK-X86 %s
; RUN: opt < %s -S -passes=insert-gcov-profiling \
; RUN: -mtriple=powerpc64-ibm-aix | FileCheck \
; RUN: --check-prefixes=CHECK,CHECK-RT-INIT %s
; RUN: --check-prefixes=CHECK,CHECK-RT-INIT,CHECK-PPC %s

; Check for gcov initialization function pointers when we initialize
; the writeout and reset functions in the runtime.
Expand Down Expand Up @@ -39,4 +39,5 @@ entry:
; CHECK-CTOR-INIT: define internal void @__llvm_gcov_init()
; CHECK-CTOR-INIT-SAME: !kcfi_type ![[#TYPE]]

; CHECK: ![[#TYPE]] = !{i32 -440107680}
; CHECK-PPC: ![[#TYPE]] = !{i32 -440107680}
; CHECK-X86: ![[#TYPE]] = !{i32 774105856}
7 changes: 4 additions & 3 deletions llvm/test/Transforms/GCOVProfiling/kcfi.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
; RUN: mkdir -p %t && cd %t
; RUN: opt < %s -S -passes=insert-gcov-profiling \
; RUN: -mtriple=x86_64-unknown-linux-gnu | FileCheck \
; RUN: --check-prefixes=CHECK,CHECK-CTOR-INIT %s
; RUN: --check-prefixes=CHECK,CHECK-CTOR-INIT,CHECK-X86 %s
; RUN: opt < %s -S -passes=insert-gcov-profiling \
; RUN: -mtriple=powerpc64-ibm-aix | FileCheck \
; RUN: --check-prefixes=CHECK,CHECK-RT-INIT %s
; RUN: --check-prefixes=CHECK,CHECK-RT-INIT,CHECK-PPC %s

; Check for gcov initialization function pointers when we initialize
; the writeout and reset functions in the runtime.
Expand Down Expand Up @@ -37,4 +37,5 @@ entry:
; CHECK-CTOR-INIT: define internal void @__llvm_gcov_init()
; CHECK-CTOR-INIT-SAME: !kcfi_type ![[#TYPE]]

; CHECK: ![[#TYPE]] = !{i32 -1522505972}
; CHECK-PPC: ![[#TYPE]] = !{i32 -1522505972}
; CHECK-X86: ![[#TYPE]] = !{i32 704854112}
Loading