Skip to content

Commit

Permalink
[SYCL] Add support for -fsycl -fsycl-targets and -Xsycl-target.
Browse files Browse the repository at this point in the history
These are just option hooks and some underlying support but full fledged
support is not yet there for -Xsycl-target.  This enables an end to end
compilation solution to a fat binary containing host object and sycldevice
binary.  Supports compilation for multiple source files.

Signed-off-by: Vladimir Lazarev <vladimir.lazarev@intel.com>
  • Loading branch information
vladimirlaz committed Jan 22, 2019
1 parent 08b20c1 commit ee585e9
Show file tree
Hide file tree
Showing 17 changed files with 905 additions and 41 deletions.
10 changes: 10 additions & 0 deletions clang/include/clang/Basic/DiagnosticDriverKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ def err_drv_Xopenmp_target_missing_triple : Error<
"cannot deduce implicit triple value for -Xopenmp-target, specify triple using -Xopenmp-target=<triple>">;
def err_drv_invalid_Xopenmp_target_with_args : Error<
"invalid -Xopenmp-target argument: '%0', options requiring arguments are unsupported">;
def err_drv_Xsycl_target_missing_triple : Error<
"cannot deduce implicit triple value for -Xsycl-target, specify triple using -Xsycl-target=<triple>">;
def err_drv_invalid_Xsycl_target_with_args : Error<
"invalid -Xsycl-target argument: '%0', options requiring arguments are unsupported">;
def err_drv_argument_only_allowed_with : Error<
"invalid argument '%0' only allowed with '%1'">;
def err_drv_argument_not_allowed_with : Error<
Expand Down Expand Up @@ -204,15 +208,21 @@ def err_drv_optimization_remark_pattern : Error<
"%0 in '%1'">;
def err_drv_no_neon_modifier : Error<"[no]neon is not accepted as modifier, please use [no]simd instead">;
def err_drv_invalid_omp_target : Error<"OpenMP target is invalid: '%0'">;
def err_drv_invalid_sycl_target : Error<"SYCL target is invalid: '%0'">;
def err_drv_omp_host_ir_file_not_found : Error<
"The provided host compiler IR file '%0' is required to generate code for OpenMP target regions but cannot be found.">;
def err_drv_omp_host_target_not_supported : Error<
"The target '%0' is not a supported OpenMP host target.">;
def err_drv_expecting_fopenmp_with_fopenmp_targets : Error<
"The option -fopenmp-targets must be used in conjunction with a -fopenmp option compatible with offloading, please use -fopenmp=libomp or -fopenmp=libiomp5.">;
def err_drv_expecting_fsycl_with_fsycl_targets : Error<
"The option -fsycl-targets must be used in conjunction with -fsycl to enable offloading.">;
def warn_drv_omp_offload_target_duplicate : Warning<
"The OpenMP offloading target '%0' is similar to target '%1' already specified - will be ignored.">,
InGroup<OpenMPTarget>;
def warn_drv_sycl_offload_target_duplicate : Warning<
"The SYCL offloading target '%0' is similar to target '%1' already specified - will be ignored.">,
InGroup<SyclTarget>;
def warn_drv_omp_offload_target_missingbcruntime : Warning<
"No library '%0' found in the default clang lib directory or in LIBRARY_PATH. Expect degraded performance due to no inlining of runtime functions on target devices.">,
InGroup<OpenMPTarget>;
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticGroups.td
Original file line number Diff line number Diff line change
Expand Up @@ -995,6 +995,9 @@ def OpenMPClauses : DiagGroup<"openmp-clauses">;
def OpenMPLoopForm : DiagGroup<"openmp-loop-form">;
def OpenMPTarget : DiagGroup<"openmp-target">;

// SYCL warnings
def SyclTarget : DiagGroup<"sycl-target">;

// Backend warnings.
def BackendInlineAsm : DiagGroup<"inline-asm">;
def BackendFrameLargerThanEQ : DiagGroup<"frame-larger-than=">;
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Driver/Action.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class Action {
OFK_Cuda = 0x02,
OFK_OpenMP = 0x04,
OFK_HIP = 0x08,
OFK_SYCL = 0x10
};

static const char *getClassName(ActionClass AC);
Expand Down
10 changes: 10 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,11 @@ def Xopenmp_target : Separate<["-"], "Xopenmp-target">,
def Xopenmp_target_EQ : JoinedAndSeparate<["-"], "Xopenmp-target=">,
HelpText<"Pass <arg> to the target offloading toolchain identified by <triple>.">,
MetaVarName<"<triple> <arg>">;
def Xsycl_target : Separate<["-"], "Xsycl-target">,
HelpText<"Pass <arg> to the SYCL based target offloading toolchain.">, MetaVarName<"<arg>">;
def Xsycl_target_EQ : JoinedAndSeparate<["-"], "Xsycl-target=">,
HelpText<"Pass <arg> to the SYCL based target offloading toolchain identified by <triple>.">,
MetaVarName<"<triple> <arg>">;
def z : Separate<["-"], "z">, Flags<[LinkerInput, RenderAsInput]>,
HelpText<"Pass -z <arg> to the linker">, MetaVarName<"<arg>">,
Group<Link_Group>;
Expand Down Expand Up @@ -1677,6 +1682,11 @@ def fstrict_vtable_pointers: Flag<["-"], "fstrict-vtable-pointers">,
HelpText<"Enable optimizations based on the strict rules for overwriting "
"polymorphic C++ objects">;
def fstrict_overflow : Flag<["-"], "fstrict-overflow">, Group<f_Group>;
def fsycl : Flag<["-"], "fsycl">, Group<f_Group>, Flags<[CC1Option, NoArgumentUnused]>,
HelpText<"generate SYCL code.">;
def fno_sycl : Flag<["-"], "fno-sycl">, Group<f_Group>, Flags<[NoArgumentUnused]>;
def fsycl_targets_EQ : CommaJoined<["-"], "fsycl-targets=">, Flags<[DriverOption, CC1Option]>,
HelpText<"Specify comma-separated list of triples SYCL offloading targets to be supported">;
def fsyntax_only : Flag<["-"], "fsyntax-only">,
Flags<[DriverOption,CoreOption,CC1Option]>, Group<Action_Group>;
def ftabstop_EQ : Joined<["-"], "ftabstop=">, Group<f_Group>;
Expand Down
10 changes: 6 additions & 4 deletions clang/include/clang/Driver/ToolChain.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,14 @@ class ToolChain {
return nullptr;
}

/// TranslateOpenMPTargetArgs - Create a new derived argument list for
/// that contains the OpenMP target specific flags passed via
/// TranslateOffloadTargetArgs - Create a new derived argument list for
/// that contains the Offloat target specific flags passed via
/// -Xopenmp-target -opt=val OR -Xopenmp-target=<triple> -opt=val
virtual llvm::opt::DerivedArgList *TranslateOpenMPTargetArgs(
/// Also handles -Xsycl-target OR -Xsycl-target=<triple>
virtual llvm::opt::DerivedArgList *TranslateOffloadTargetArgs(
const llvm::opt::DerivedArgList &Args, bool SameTripleAsHost,
SmallVectorImpl<llvm::opt::Arg *> &AllocatedArgs) const;
SmallVectorImpl<llvm::opt::Arg *> &AllocatedArgs,
Action::OffloadKind DeviceOffloadKind) const;

/// Choose a tool to use to handle the action \p JA.
///
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Driver/Types.def
Original file line number Diff line number Diff line change
Expand Up @@ -102,5 +102,6 @@ TYPE("dSYM", dSYM, INVALID, "dSYM", "A")
TYPE("dependencies", Dependencies, INVALID, "d", "")
TYPE("cuda-fatbin", CUDA_FATBIN, INVALID, "fatbin","A")
TYPE("spirv", SPIRV, INVALID, "spv", "")
TYPE("sycl-header", SYCL_Header, INVALID, "h", "")
TYPE("hip-fatbin", HIP_FATBIN, INVALID, "hipfb", "A")
TYPE("none", Nothing, INVALID, nullptr, "u")
6 changes: 6 additions & 0 deletions clang/lib/Driver/Action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,8 @@ std::string Action::getOffloadingKindPrefix() const {
return "device-openmp";
case OFK_HIP:
return "device-hip";
case OFK_SYCL:
return "device-sycl";

// TODO: Add other programming models here.
}
Expand All @@ -116,6 +118,8 @@ std::string Action::getOffloadingKindPrefix() const {
Res += "-hip";
if (ActiveOffloadKindMask & OFK_OpenMP)
Res += "-openmp";
if (ActiveOffloadKindMask & OFK_SYCL)
Res += "-sycl";

// TODO: Add other programming models here.

Expand Down Expand Up @@ -152,6 +156,8 @@ StringRef Action::GetOffloadKindName(OffloadKind Kind) {
return "openmp";
case OFK_HIP:
return "hip";
case OFK_SYCL:
return "sycl";

// TODO: Add other programming models here.
}
Expand Down
1 change: 1 addition & 0 deletions clang/lib/Driver/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ add_clang_library(clangDriver
ToolChains/TCE.cpp
ToolChains/WebAssembly.cpp
ToolChains/XCore.cpp
ToolChains/SYCL.cpp
Types.cpp
XRayArgs.cpp

Expand Down
10 changes: 6 additions & 4 deletions clang/lib/Driver/Compilation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,12 +69,14 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch,
if (!Entry) {
SmallVector<Arg *, 4> AllocatedArgs;
DerivedArgList *OpenMPArgs = nullptr;
// Translate OpenMP toolchain arguments provided via the -Xopenmp-target flags.
if (DeviceOffloadKind == Action::OFK_OpenMP) {
// Translate OpenMP toolchain arguments provided via the -Xopenmp-target
// or -Xsycl-target flags.
if (DeviceOffloadKind == Action::OFK_OpenMP ||
DeviceOffloadKind == Action::OFK_SYCL) {
const ToolChain *HostTC = getSingleOffloadToolChain<Action::OFK_Host>();
bool SameTripleAsHost = (TC->getTriple() == HostTC->getTriple());
OpenMPArgs = TC->TranslateOpenMPTargetArgs(
*TranslatedArgs, SameTripleAsHost, AllocatedArgs);
OpenMPArgs = TC->TranslateOffloadTargetArgs(
*TranslatedArgs, SameTripleAsHost, AllocatedArgs, DeviceOffloadKind);
}

if (!OpenMPArgs) {
Expand Down
207 changes: 207 additions & 0 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include "ToolChains/TCE.h"
#include "ToolChains/WebAssembly.h"
#include "ToolChains/XCore.h"
#include "ToolChains/SYCL.h"
#include "clang/Basic/Version.h"
#include "clang/Config/config.h"
#include "clang/Driver/Action.h"
Expand Down Expand Up @@ -693,6 +694,61 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C,
<< OpenMPTargets->getAsString(C.getInputArgs());
}

//
// SYCL
//
// We need to generate a SYCL toolchain if the user specified targets with
// the -fsycl-targets option.
if (Arg *SYCLTargets =
C.getInputArgs().getLastArg(options::OPT_fsycl_targets_EQ)) {
if (SYCLTargets->getNumValues()) {
// We expect that -fsycl-targets is always used in conjunction with the
// -fsycl option
bool HasValidSYCLRuntime = C.getInputArgs().hasFlag(
options::OPT_fsycl, options::OPT_fno_sycl, false);

if (HasValidSYCLRuntime) {
llvm::StringMap<const char *> FoundNormalizedTriples;
for (const char *Val : SYCLTargets->getValues()) {
llvm::Triple TT(Val);
std::string NormalizedName = TT.normalize();

// Make sure we don't have a duplicate triple.
auto Duplicate = FoundNormalizedTriples.find(NormalizedName);
if (Duplicate != FoundNormalizedTriples.end()) {
Diag(clang::diag::warn_drv_sycl_offload_target_duplicate)
<< Val << Duplicate->second;
continue;
}

// Store the current triple so that we can check for duplicates in the
// following iterations.
FoundNormalizedTriples[NormalizedName] = Val;

// If the specified target is invalid, emit a diagnostic.
if (TT.getArch() == llvm::Triple::UnknownArch)
Diag(clang::diag::err_drv_invalid_sycl_target) << Val;
else {
const ToolChain *HostTC =
C.getSingleOffloadToolChain<Action::OFK_Host>();
const llvm::Triple &HostTriple = HostTC->getTriple();
// Use the SYCL and host triples as the key into the ToolChains map,
// because the device toolchain we create depends on both.
auto &SYCLTC = ToolChains[TT.str() + "/" + HostTriple.str()];
if (!SYCLTC) {
SYCLTC = llvm::make_unique<toolchains::SYCLToolChain>(
*this, TT, *HostTC, C.getInputArgs());
}
C.addOffloadDeviceToolChain(SYCLTC.get(), Action::OFK_SYCL);
}
}
} else
Diag(clang::diag::err_drv_expecting_fsycl_with_fsycl_targets);
} else
Diag(clang::diag::warn_drv_empty_joined_argument)
<< SYCLTargets->getAsString(C.getInputArgs());
}

//
// TODO: Add support for other offloading programming models here.
//
Expand Down Expand Up @@ -2852,6 +2908,154 @@ class OffloadingActionBuilder final {
}
};

/// SYCL action builder. The host bitcode is passed to the device frontend
/// and all the device linked images are passed to the host link phase.
/// SPIR related are wrapped before added to the fat binary
class SYCLActionBuilder final : public DeviceActionBuilder {
/// The SYCL actions for the current input.
ActionList SYCLDeviceActions;

/// The linker inputs obtained for each toolchain.
SmallVector<ActionList, 8> DeviceLinkerInputs;

/// The compiler inputs obtained for each toolchain
Action * DeviceCompilerInput = nullptr;

public:
SYCLActionBuilder(Compilation &C, DerivedArgList &Args,
const Driver::InputList &Inputs)
: DeviceActionBuilder(C, Args, Inputs, Action::OFK_SYCL) {}

ActionBuilderReturnCode
getDeviceDependences(OffloadAction::DeviceDependences &DA,
phases::ID CurPhase, phases::ID FinalPhase,
PhasesTy &Phases) override {

// We should always have an action for each input.
assert(SYCLDeviceActions.size() == ToolChains.size() &&
"Number of SYCL actions and toolchains do not match.");

// FIXME: This adds the integrated header generation pass before the
// Host compilation pass so the Host can use the header generated. This
// can be improved upon to where the header generation and spv generation
// is done in the same step. Currently, its not too efficient.
// The host depends on the generated integrated header from the device
// compilation.
if (CurPhase == phases::Compile) {
for (Action *&A : SYCLDeviceActions) {
DeviceCompilerInput =
C.MakeAction<CompileJobAction>(A, types::TY_SYCL_Header);
}
DA.add(*DeviceCompilerInput, *ToolChains.front(), /*BoundArch=*/nullptr,
Action::OFK_SYCL);
// Clear the input file, it is already a dependence to a host
// action.
DeviceCompilerInput = nullptr;
}

// The host only depends on device action in the linking phase, when all
// the device images have to be embedded in the host image.
if (CurPhase == phases::Link) {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");
auto LI = DeviceLinkerInputs.begin();
for (auto *A : SYCLDeviceActions) {
LI->push_back(A);
++LI;
}

// We passed the device action as a host dependence, so we don't need to
// do anything else with them.
SYCLDeviceActions.clear();
return ABRT_Success;
}

// By default, we produce an action for each device arch.
for (Action *&A : SYCLDeviceActions) {
A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A,
AssociatedOffloadKind);
}

return ABRT_Success;
}

ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override {

// If this is an input action replicate it for each SYCL toolchain.
if (auto *IA = dyn_cast<InputAction>(HostAction)) {
SYCLDeviceActions.clear();
for (unsigned I = 0; I < ToolChains.size(); ++I)
SYCLDeviceActions.push_back(
C.MakeAction<InputAction>(IA->getInputArg(), IA->getType()));
return ABRT_Success;
}

// If this is an unbundling action use it as is for each SYCL toolchain.
if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) {
SYCLDeviceActions.clear();
for (unsigned I = 0; I < ToolChains.size(); ++I) {
SYCLDeviceActions.push_back(UA);
UA->registerDependentActionInfo(
ToolChains[I], /*BoundArch=*/StringRef(), Action::OFK_SYCL);
}
return ABRT_Success;
}
return ABRT_Success;
}

void appendTopLevelActions(ActionList &AL) override {
if (SYCLDeviceActions.empty())
return;

// We should always have an action for each input.
assert(SYCLDeviceActions.size() == ToolChains.size() &&
"Number of SYCL actions and toolchains do not match.");

// Append all device actions followed by the proper offload action.
auto TI = ToolChains.begin();
for (auto *A : SYCLDeviceActions) {
OffloadAction::DeviceDependences Dep;
Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_SYCL);
AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType()));
++TI;
}
// We no longer need the action stored in this builder.
SYCLDeviceActions.clear();
}

void appendLinkDependences(OffloadAction::DeviceDependences &DA) override {
assert(ToolChains.size() == DeviceLinkerInputs.size() &&
"Toolchains and linker inputs sizes do not match.");

// Append a new link action for each device.
auto TC = ToolChains.begin();
for (auto &LI : DeviceLinkerInputs) {
auto *DeviceLinkAction =
C.MakeAction<LinkJobAction>(LI, types::TY_Image);
DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr,
Action::OFK_SYCL);
++TC;
}
}

bool initialize() override {
// Get the SYCL toolchains. If we don't get any, the action builder will
// know there is nothing to do related to SYCL offloading.
auto SYCLTCRange = C.getOffloadToolChains<Action::OFK_SYCL>();
for (auto TI = SYCLTCRange.first, TE = SYCLTCRange.second; TI != TE;
++TI)
ToolChains.push_back(TI->second);

DeviceLinkerInputs.resize(ToolChains.size());
return false;
}

bool canUseBundlerUnbundler() const override {
// SYCL should use bundled files whenever possible.
return true;
}
};

///
/// TODO: Add the implementation for other specialized builders here.
///
Expand Down Expand Up @@ -2879,6 +3083,9 @@ class OffloadingActionBuilder final {
// Create a specialized builder for OpenMP.
SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs));

// Create a specialized builder for SYCL.
SpecializedBuilders.push_back(new SYCLActionBuilder(C, Args, Inputs));

//
// TODO: Build other specialized builders here.
//
Expand Down
Loading

0 comments on commit ee585e9

Please sign in to comment.