Skip to content
This repository has been archived by the owner on May 21, 2019. It is now read-only.

Commit

Permalink
[XRay] ARM 32-bit no-Thumb support in compiler-rt
Browse files Browse the repository at this point in the history
This is a port of XRay to ARM 32-bit, without Thumb support yet.
This is one of 3 commits to different repositories of XRay ARM port. The other 2 are:

https://reviews.llvm.org/D23931 (LLVM)
https://reviews.llvm.org/D23932 (Clang test)

Differential Revision: https://reviews.llvm.org/D23933

git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@281971 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
deanberris committed Sep 20, 2016
1 parent d1742d2 commit 2bc83b9
Show file tree
Hide file tree
Showing 9 changed files with 423 additions and 117 deletions.
2 changes: 1 addition & 1 deletion cmake/config-ix.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64})
set(ALL_ESAN_SUPPORTED_ARCH ${X86_64})
set(ALL_SCUDO_SUPPORTED_ARCH ${X86_64})
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64})
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32})

if(APPLE)
include(CompilerRTDarwinUtils)
Expand Down
3 changes: 2 additions & 1 deletion lib/sanitizer_common/scripts/gen_dynamic_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import re
import subprocess
import sys
import platform

new_delete = set([
'_Znam', '_ZnamRKSt9nothrow_t', # operator new[](unsigned long)
Expand Down Expand Up @@ -50,7 +51,7 @@ def get_global_functions(library):
raise subprocess.CalledProcessError(nm_proc.returncode, nm)
func_symbols = ['T', 'W']
# On PowerPC, nm prints function descriptors from .data section.
if os.uname()[4] in ["powerpc", "ppc64"]:
if platform.uname()[4] in ["powerpc", "ppc64"]:
func_symbols += ['D']
for line in nm_out:
cols = line.split(' ')
Expand Down
8 changes: 8 additions & 0 deletions lib/xray/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,17 @@ set(XRAY_SOURCES
)

set(x86_64_SOURCES
xray_x86_64.cc
xray_trampoline_x86_64.S
${XRAY_SOURCES})

set(arm_SOURCES
xray_arm.cc
xray_trampoline_arm.S
${XRAY_SOURCES})

set(armhf_SOURCES ${arm_SOURCES})

include_directories(..)
include_directories(../../include)

Expand Down
131 changes: 131 additions & 0 deletions lib/xray/xray_arm.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
//===-- xray_arm.cc ---------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of XRay, a dynamic runtime instrumentation system.
//
// Implementation of ARM-specific routines (32-bit).
//
//===----------------------------------------------------------------------===//
#include "xray_interface_internal.h"
#include "sanitizer_common/sanitizer_common.h"
#include <atomic>
#include <cassert>

namespace __xray {

// The machine codes for some instructions used in runtime patching.
enum class PatchOpcodes : uint32_t
{
PO_PushR0Lr = 0xE92D4001, // PUSH {r0, lr}
PO_BlxIp = 0xE12FFF3C, // BLX ip
PO_PopR0Lr = 0xE8BD4001, // POP {r0, lr}
PO_B20 = 0xEA000005 // B #20
};

// 0xUUUUWXYZ -> 0x000W0XYZ
inline static uint32_t getMovwMask(const uint32_t Value) {
return (Value & 0xfff) | ((Value & 0xf000) << 4);
}

// 0xWXYZUUUU -> 0x000W0XYZ
inline static uint32_t getMovtMask(const uint32_t Value) {
return getMovwMask(Value >> 16);
}

// Writes the following instructions:
// MOVW R<regNo>, #<lower 16 bits of the |Value|>
// MOVT R<regNo>, #<higher 16 bits of the |Value|>
inline static uint32_t* write32bitLoadReg(uint8_t regNo, uint32_t* Address,
const uint32_t Value) {
//This is a fatal error: we cannot just report it and continue execution.
assert(regNo <= 15 && "Register number must be 0 to 15.");
// MOVW R, #0xWXYZ in machine code is 0xE30WRXYZ
*Address = (0xE3000000 | (uint32_t(regNo)<<12) | getMovwMask(Value));
Address++;
// MOVT R, #0xWXYZ in machine code is 0xE34WRXYZ
*Address = (0xE3400000 | (uint32_t(regNo)<<12) | getMovtMask(Value));
return Address + 1;
}

// Writes the following instructions:
// MOVW r0, #<lower 16 bits of the |Value|>
// MOVT r0, #<higher 16 bits of the |Value|>
inline static uint32_t *Write32bitLoadR0(uint32_t *Address,
const uint32_t Value) {
return write32bitLoadReg(0, Address, Value);
}

// Writes the following instructions:
// MOVW ip, #<lower 16 bits of the |Value|>
// MOVT ip, #<higher 16 bits of the |Value|>
inline static uint32_t *Write32bitLoadIP(uint32_t *Address,
const uint32_t Value) {
return write32bitLoadReg(12, Address, Value);
}

inline static bool patchSled(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled, void (*TracingHook)()) {
// When |Enable| == true,
// We replace the following compile-time stub (sled):
//
// xray_sled_n:
// B #20
// 6 NOPs (24 bytes)
//
// With the following runtime patch:
//
// xray_sled_n:
// PUSH {r0, lr}
// MOVW r0, #<lower 16 bits of function ID>
// MOVT r0, #<higher 16 bits of function ID>
// MOVW ip, #<lower 16 bits of address of TracingHook>
// MOVT ip, #<higher 16 bits of address of TracingHook>
// BLX ip
// POP {r0, lr}
//
// Replacement of the first 4-byte instruction should be the last and atomic
// operation, so that the user code which reaches the sled concurrently
// either jumps over the whole sled, or executes the whole sled when the
// latter is ready.
//
// When |Enable|==false, we set back the first instruction in the sled to be
// B #20

uint32_t *FirstAddress = reinterpret_cast<uint32_t *>(Sled.Address);
if (Enable) {
uint32_t *CurAddress = FirstAddress + 1;
CurAddress =
Write32bitLoadR0(CurAddress, reinterpret_cast<uint32_t>(FuncId));
CurAddress =
Write32bitLoadIP(CurAddress, reinterpret_cast<uint32_t>(TracingHook));
*CurAddress = uint32_t(PatchOpcodes::PO_BlxIp);
CurAddress++;
*CurAddress = uint32_t(PatchOpcodes::PO_PopR0Lr);
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_PushR0Lr), std::memory_order_release);
} else {
std::atomic_store_explicit(
reinterpret_cast<std::atomic<uint32_t> *>(FirstAddress),
uint32_t(PatchOpcodes::PO_B20), std::memory_order_release);
}
return true;
}

bool patchFunctionEntry(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) {
return patchSled(Enable, FuncId, Sled, __xray_FunctionEntry);
}

bool patchFunctionExit(const bool Enable, const uint32_t FuncId,
const XRaySledEntry &Sled) {
return patchSled(Enable, FuncId, Sled, __xray_FunctionExit);
}

} // namespace __xray
52 changes: 48 additions & 4 deletions lib/xray/xray_inmemory_log.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,14 @@
#include <sys/types.h>
#include <thread>
#include <unistd.h>
#include <x86intrin.h>

#if defined(__x86_64__)
#include <x86intrin.h>
#elif defined(__arm__)
static const int64_t NanosecondsPerSecond = 1000LL*1000*1000;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */

#include "sanitizer_common/sanitizer_libc.h"
#include "xray/xray_records.h"
Expand Down Expand Up @@ -61,6 +68,7 @@ static void retryingWriteAll(int Fd, char *Begin, char *End) {
}
}

#if defined(__x86_64__)
static std::pair<ssize_t, bool> retryingReadSome(int Fd, char *Begin,
char *End) {
auto BytesToRead = std::distance(Begin, End);
Expand Down Expand Up @@ -103,6 +111,8 @@ static bool readValueFromFile(const char *Filename, long long *Value) {
return Result;
}

#endif /* CPU architecture */

class ThreadExitFlusher {
int Fd;
XRayRecord *Start;
Expand Down Expand Up @@ -164,6 +174,7 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) {

// Get the cycle frequency from SysFS on Linux.
long long CPUFrequency = -1;
#if defined(__x86_64__)
if (readValueFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz",
&CPUFrequency)) {
CPUFrequency *= 1000;
Expand All @@ -174,6 +185,20 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) {
} else {
Report("Unable to determine CPU frequency for TSC accounting.");
}
#elif defined(__arm__)
// There is no instruction like RDTSCP in user mode on ARM. ARM's CP15 does
// not have a constant frequency like TSC on x86(_64), it may go faster
// or slower depending on CPU turbo or power saving mode. Furthermore,
// to read from CP15 on ARM a kernel modification or a driver is needed.
// We can not require this from users of compiler-rt.
// So on ARM we use clock_gettime() which gives the result in nanoseconds.
// To get the measurements per second, we scale this by the number of
// nanoseconds per second, pretending that the TSC frequency is 1GHz and
// one TSC tick is 1 nanosecond.
CPUFrequency = NanosecondsPerSecond;
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */

// Since we're here, we get to write the header. We set it up so that the
// header will only be written once, at the start, and let the threads
Expand Down Expand Up @@ -201,10 +226,29 @@ void __xray_InMemoryRawLog(int32_t FuncId, XRayEntryType Type) {
// First we get the useful data, and stuff it into the already aligned buffer
// through a pointer offset.
auto &R = reinterpret_cast<__xray::XRayRecord *>(InMemoryBuffer)[Offset];
unsigned CPU;
R.RecordType = RecordTypes::NORMAL;
R.TSC = __rdtscp(&CPU);
R.CPU = CPU;
#if defined(__x86_64__)
{
unsigned CPU;
R.TSC = __rdtscp(&CPU);
R.CPU = CPU;
}
#elif defined(__arm__)
{
timespec TS;
int result = clock_gettime(CLOCK_REALTIME, &TS);
if(result != 0)
{
Report("clock_gettime() returned %d, errno=%d.", result, int(errno));
TS.tv_sec = 0;
TS.tv_nsec = 0;
}
R.TSC = TS.tv_sec * NanosecondsPerSecond + TS.tv_nsec;
R.CPU = 0;
}
#else
#error "Unsupported CPU Architecture"
#endif /* CPU architecture */
R.TId = TId;
R.Type = Type;
R.FuncId = FuncId;
Expand Down
Loading

0 comments on commit 2bc83b9

Please sign in to comment.