Skip to content

Commit

Permalink
Merge Fix dpcpp memory issue and capture exception in raw_free
Browse files Browse the repository at this point in the history
This PR fix dpcpp memory issue between queue and capture exception in raw_free

Summary:
- limits the direct memcpy between different backend or device (gpu)
- add free after kernel test
- capture exception in raw_free

Related PR: #832
  • Loading branch information
yhmtsai authored Jul 16, 2021
2 parents 26ee87e + 883dd5c commit 8b49b65
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 32 deletions.
69 changes: 65 additions & 4 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,8 @@ build/dpcpp/cpu/release/static:
BUILD_SHARED_LIBS: "ON"
SYCL_DEVICE_TYPE: "CPU"

build/dpcpp/igpu/release/static:
# It gives two available backends of GPU on tests
build/dpcpp/igpu/release/shared:
<<: *default_build_with_test
extends:
- .full_test_condition
Expand All @@ -733,13 +734,59 @@ build/dpcpp/igpu/release/static:
CXX_COMPILER: "dpcpp"
BUILD_DPCPP: "ON"
BUILD_TYPE: "Release"
BUILD_SHARED_LIBS: "ON"
DPCPP_SINGLE_MODE: "ON"
SYCL_DEVICE_TYPE: "GPU"

build/dpcpp/opencl_igpu/release/static:
<<: *default_build_with_test
extends:
- .quick_test_condition
- .use_gko-oneapi-igpu
variables:
<<: *default_variables
C_COMPILER: "gcc"
CXX_COMPILER: "dpcpp"
BUILD_DPCPP: "ON"
BUILD_TYPE: "Release"
BUILD_SHARED_LIBS: "OFF"
DPCPP_SINGLE_MODE: "ON"
SYCL_DEVICE_FILTER: "OpenCL"
SYCL_DEVICE_TYPE: "GPU"

build/dpcpp/level_zero_igpu/debug/static:
<<: *default_build_with_test
extends:
- .full_test_condition
- .use_gko-oneapi-igpu
variables:
<<: *default_variables
C_COMPILER: "gcc"
CXX_COMPILER: "dpcpp"
BUILD_DPCPP: "ON"
BUILD_TYPE: "Debug"
BUILD_SHARED_LIBS: "OFF"
DPCPP_SINGLE_MODE: "ON"
SYCL_DEVICE_FILTER: "Level_Zero:GPU"

build/dpcpp/dgpu/debug/shared:
# It gives two available backends of GPU on tests
build/dpcpp/dgpu/release/static:
<<: *default_build_with_test
extends:
- .quick_test_condition
- .use_gko-oneapi-igpu
variables:
<<: *default_variables
C_COMPILER: "gcc"
CXX_COMPILER: "dpcpp"
BUILD_DPCPP: "ON"
BUILD_TYPE: "Release"
BUILD_SHARED_LIBS: "OF"
DPCPP_SINGLE_MODE: "ON"
SYCL_DEVICE_TYPE: "GPU"

build/dpcpp/level_zero_dgpu/release/shared:
<<: *default_build_with_test
image: localhost:5000/gko-oneapi
extends:
- .quick_test_condition
- .use_gko-oneapi-dgpu
Expand All @@ -748,10 +795,24 @@ build/dpcpp/dgpu/debug/shared:
C_COMPILER: "gcc"
CXX_COMPILER: "dpcpp"
BUILD_DPCPP: "ON"
BUILD_TYPE: "Debug"
BUILD_TYPE: "Release"
DPCPP_SINGLE_MODE: "ON"
SYCL_DEVICE_FILTER: "Level_Zero:GPU"

build/dpcpp/opencl_dgpu/debug/shared:
<<: *default_build_with_test
extends:
- .full_test_condition
- .use_gko-oneapi-dgpu
variables:
<<: *default_variables
C_COMPILER: "gcc"
CXX_COMPILER: "dpcpp"
BUILD_DPCPP: "ON"
BUILD_TYPE: "Debug"
DPCPP_SINGLE_MODE: "ON"
SYCL_DEVICE_FILTER: "OpenCL"
SYCL_DEVICE_TYPE: "GPU"

# Job with important warnings as error
warnings:
Expand Down
12 changes: 12 additions & 0 deletions core/test/base/executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,14 +541,20 @@ TEST(Executor, CanVerifyMemory)
std::shared_ptr<gko::DpcppExecutor> host_dpcpp;
std::shared_ptr<gko::DpcppExecutor> cpu_dpcpp;
std::shared_ptr<gko::DpcppExecutor> gpu_dpcpp;
std::shared_ptr<gko::DpcppExecutor> host_dpcpp_dup;
std::shared_ptr<gko::DpcppExecutor> cpu_dpcpp_dup;
std::shared_ptr<gko::DpcppExecutor> gpu_dpcpp_dup;
if (gko::DpcppExecutor::get_num_devices("host")) {
host_dpcpp = gko::DpcppExecutor::create(0, omp, "host");
host_dpcpp_dup = gko::DpcppExecutor::create(0, omp, "host");
}
if (gko::DpcppExecutor::get_num_devices("cpu")) {
cpu_dpcpp = gko::DpcppExecutor::create(0, omp, "cpu");
cpu_dpcpp_dup = gko::DpcppExecutor::create(0, omp, "cpu");
}
if (gko::DpcppExecutor::get_num_devices("gpu")) {
gpu_dpcpp = gko::DpcppExecutor::create(0, omp, "gpu");
gpu_dpcpp_dup = gko::DpcppExecutor::create(0, omp, "gpu");
}

ASSERT_EQ(false, ref->memory_accessible(omp));
Expand All @@ -566,18 +572,24 @@ TEST(Executor, CanVerifyMemory)
ASSERT_EQ(false, ref->memory_accessible(host_dpcpp));
ASSERT_EQ(true, host_dpcpp->memory_accessible(omp));
ASSERT_EQ(true, omp->memory_accessible(host_dpcpp));
ASSERT_EQ(true, host_dpcpp->memory_accessible(host_dpcpp_dup));
ASSERT_EQ(true, host_dpcpp_dup->memory_accessible(host_dpcpp));
}
if (gko::DpcppExecutor::get_num_devices("cpu")) {
ASSERT_EQ(false, ref->memory_accessible(cpu_dpcpp));
ASSERT_EQ(false, cpu_dpcpp->memory_accessible(ref));
ASSERT_EQ(true, cpu_dpcpp->memory_accessible(omp));
ASSERT_EQ(true, omp->memory_accessible(cpu_dpcpp));
ASSERT_EQ(true, cpu_dpcpp->memory_accessible(cpu_dpcpp_dup));
ASSERT_EQ(true, cpu_dpcpp_dup->memory_accessible(cpu_dpcpp));
}
if (gko::DpcppExecutor::get_num_devices("gpu")) {
ASSERT_EQ(false, gpu_dpcpp->memory_accessible(ref));
ASSERT_EQ(false, ref->memory_accessible(gpu_dpcpp));
ASSERT_EQ(false, gpu_dpcpp->memory_accessible(omp));
ASSERT_EQ(false, omp->memory_accessible(gpu_dpcpp));
ASSERT_EQ(false, gpu_dpcpp->memory_accessible(gpu_dpcpp_dup));
ASSERT_EQ(false, gpu_dpcpp_dup->memory_accessible(gpu_dpcpp));
}
#if GINKGO_HIP_PLATFORM_NVCC
ASSERT_EQ(true, hip->memory_accessible(cuda));
Expand Down
58 changes: 47 additions & 11 deletions dpcpp/base/executor.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,29 @@ void DpcppExecutor::populate_exec_info(const MachineTopology *mach_topo)

void DpcppExecutor::raw_free(void *ptr) const noexcept
{
queue_->wait_and_throw();
sycl::free(ptr, queue_->get_context());
// the free function may syncronize excution or not, which depends on
// implementation or backend, so it is not guaranteed.
// TODO: maybe a light wait implementation?
try {
queue_->wait_and_throw();
sycl::free(ptr, queue_->get_context());
} catch (cl::sycl::exception &err) {
#if GKO_VERBOSE_LEVEL >= 1
// Unfortunately, if memory free fails, there's not much we can do
std::cerr << "Unrecoverable Dpcpp error on device "
<< this->get_device_id() << " in " << __func__ << ": "
<< err.what() << std::endl
<< "Exiting program" << std::endl;
#endif // GKO_VERBOSE_LEVEL >= 1
// OpenCL error code use 0 for CL_SUCCESS and negative number for others
// error. if the error is not from OpenCL, it will return CL_SUCCESS.
int err_code = err.get_cl_code();
// if return CL_SUCCESS, exit 1 as DPCPP error.
if (err_code == 0) {
err_code = 1;
}
std::exit(err_code);
}
}


Expand Down Expand Up @@ -144,7 +165,21 @@ void DpcppExecutor::raw_copy_to(const DpcppExecutor *dest, size_type num_bytes,
const void *src_ptr, void *dest_ptr) const
{
if (num_bytes > 0) {
dest->get_queue()->memcpy(dest_ptr, src_ptr, num_bytes).wait();
// If the queue is different and is not cpu/host, the queue can not
// transfer the data to another queue (on the same device)
// Note. it could be changed when we ensure the behavior is expected.
auto queue = this->get_queue();
auto dest_queue = dest->get_queue();
auto device = queue->get_device();
auto dest_device = dest_queue->get_device();
if (((device.is_host() || device.is_cpu()) &&
(dest_device.is_host() || dest_device.is_cpu())) ||
(queue == dest_queue)) {
dest->get_queue()->memcpy(dest_ptr, src_ptr, num_bytes).wait();
} else {
// the memcpy only support host<->device or itself memcpy
GKO_NOT_SUPPORTED(dest);
}
}
}

Expand Down Expand Up @@ -176,15 +211,16 @@ bool DpcppExecutor::verify_memory_to(const OmpExecutor *dest_exec) const

bool DpcppExecutor::verify_memory_to(const DpcppExecutor *dest_exec) const
{
auto device = detail::get_devices(
get_exec_info().device_type)[get_exec_info().device_id];
auto other_device = detail::get_devices(
dest_exec->get_device_type())[dest_exec->get_device_id()];
// If the queue is different and is not cpu/host, the queue can not access
// the data from another queue (on the same device)
// Note. it could be changed when we ensure the behavior is expected.
auto queue = this->get_queue();
auto dest_queue = dest_exec->get_queue();
auto device = queue->get_device();
auto dest_device = dest_queue->get_device();
return ((device.is_host() || device.is_cpu()) &&
(other_device.is_host() || other_device.is_cpu())) ||
(device.get_info<cl::sycl::info::device::device_type>() ==
other_device.get_info<cl::sycl::info::device::device_type>() &&
device.get() == other_device.get());
(dest_device.is_host() || dest_device.is_cpu())) ||
(queue == dest_queue);
}


Expand Down
59 changes: 43 additions & 16 deletions dpcpp/test/base/executor.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,20 +55,20 @@ namespace {
class DpcppExecutor : public ::testing::Test {
protected:
DpcppExecutor()
: omp(gko::OmpExecutor::create()), dpcpp(nullptr), dpcpp2(nullptr)
: ref(gko::ReferenceExecutor::create()), dpcpp(nullptr), dpcpp2(nullptr)
{}

void SetUp()
{
if (gko::DpcppExecutor::get_num_devices("gpu") > 0) {
dpcpp = gko::DpcppExecutor::create(0, omp, "gpu");
dpcpp = gko::DpcppExecutor::create(0, ref, "gpu");
if (gko::DpcppExecutor::get_num_devices("gpu") > 1) {
dpcpp2 = gko::DpcppExecutor::create(1, omp, "gpu");
dpcpp2 = gko::DpcppExecutor::create(1, ref, "gpu");
}
} else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) {
dpcpp = gko::DpcppExecutor::create(0, omp, "cpu");
dpcpp = gko::DpcppExecutor::create(0, ref, "cpu");
if (gko::DpcppExecutor::get_num_devices("cpu") > 1) {
dpcpp2 = gko::DpcppExecutor::create(1, omp, "cpu");
dpcpp2 = gko::DpcppExecutor::create(1, ref, "cpu");
}
} else {
GKO_NOT_IMPLEMENTED;
Expand All @@ -84,17 +84,17 @@ class DpcppExecutor : public ::testing::Test {
}
}

std::shared_ptr<gko::Executor> omp{};
std::shared_ptr<gko::Executor> ref{};
std::shared_ptr<const gko::DpcppExecutor> dpcpp{};
std::shared_ptr<const gko::DpcppExecutor> dpcpp2{};
};


TEST_F(DpcppExecutor, CanInstantiateTwoExecutorsOnOneDevice)
{
auto dpcpp = gko::DpcppExecutor::create(0, omp);
auto dpcpp = gko::DpcppExecutor::create(0, ref);
if (dpcpp2 != nullptr) {
auto dpcpp2 = gko::DpcppExecutor::create(0, omp);
auto dpcpp2 = gko::DpcppExecutor::create(0, ref);
}

// We want automatic deinitialization to not create any error
Expand All @@ -103,7 +103,7 @@ TEST_F(DpcppExecutor, CanInstantiateTwoExecutorsOnOneDevice)

TEST_F(DpcppExecutor, CanGetExecInfo)
{
dpcpp = gko::DpcppExecutor::create(0, omp);
dpcpp = gko::DpcppExecutor::create(0, ref);

ASSERT_TRUE(dpcpp->get_num_computing_units() > 0);
ASSERT_TRUE(dpcpp->get_subgroup_sizes().size() > 0);
Expand Down Expand Up @@ -191,17 +191,17 @@ TEST_F(DpcppExecutor, CopiesDataToCPU)
{
int orig[] = {3, 8};
auto *copy = dpcpp->alloc<int>(2);
gko::Array<bool> is_set(omp, 1);
gko::Array<bool> is_set(ref, 1);

dpcpp->copy_from(omp.get(), 2, orig, copy);
dpcpp->copy_from(ref.get(), 2, orig, copy);

is_set.set_executor(dpcpp);
ASSERT_NO_THROW(dpcpp->synchronize());
ASSERT_NO_THROW(dpcpp->get_queue()->submit([&](sycl::handler &cgh) {
auto *is_set_ptr = is_set.get_data();
cgh.single_task([=]() { check_data(copy, is_set_ptr); });
}));
is_set.set_executor(omp);
is_set.set_executor(ref);
ASSERT_EQ(*is_set.get_data(), true);
ASSERT_NO_THROW(dpcpp->synchronize());
dpcpp->free(copy);
Expand All @@ -221,7 +221,7 @@ TEST_F(DpcppExecutor, CopiesDataFromCPU)
cgh.single_task([=]() { init_data(orig); });
});

omp->copy_from(dpcpp.get(), 2, orig, copy);
ref->copy_from(dpcpp.get(), 2, orig, copy);

EXPECT_EQ(3, copy[0]);
ASSERT_EQ(8, copy[1]);
Expand All @@ -236,7 +236,7 @@ TEST_F(DpcppExecutor, CopiesDataFromDpcppToDpcpp)
}

int copy[2];
gko::Array<bool> is_set(omp, 1);
gko::Array<bool> is_set(ref, 1);
auto orig = dpcpp->alloc<int>(2);
dpcpp->get_queue()->submit([&](sycl::handler &cgh) {
cgh.single_task([=]() { init_data(orig); });
Expand All @@ -250,11 +250,11 @@ TEST_F(DpcppExecutor, CopiesDataFromDpcppToDpcpp)
auto *is_set_ptr = is_set.get_data();
cgh.single_task([=]() { check_data(copy_dpcpp2, is_set_ptr); });
}));
is_set.set_executor(omp);
is_set.set_executor(ref);
ASSERT_EQ(*is_set.get_data(), true);

// Put the results on OpenMP and run CPU side assertions
omp->copy_from(dpcpp2.get(), 2, copy_dpcpp2, copy);
ref->copy_from(dpcpp2.get(), 2, copy_dpcpp2, copy);
EXPECT_EQ(3, copy[0]);
ASSERT_EQ(8, copy[1]);
dpcpp2->free(copy_dpcpp2);
Expand All @@ -269,4 +269,31 @@ TEST_F(DpcppExecutor, Synchronizes)
}


#define GTEST_ASSERT_NO_EXIT(statement) \
ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "")


TEST_F(DpcppExecutor, FreeAfterKernel)
{
testing::FLAGS_gtest_death_test_style = "threadsafe";
GTEST_ASSERT_NO_EXIT({
size_t length = 10000;
auto dpcpp =
gko::DpcppExecutor::create(0, gko::ReferenceExecutor::create());
{
gko::Array<float> x(dpcpp, length);
gko::Array<float> y(dpcpp, length);
auto x_val = x.get_data();
auto y_val = y.get_data();
dpcpp->get_queue()->submit([&](sycl::handler &cgh) {
cgh.parallel_for(sycl::range<1>{length},
[=](sycl::id<1> i) { y_val[i] += x_val[i]; });
});
}
// to ensure everything on queue is finished.
dpcpp->synchronize();
});
}


} // namespace
6 changes: 6 additions & 0 deletions include/ginkgo/core/base/executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include <array>
#include <iostream>
#include <memory>
#include <mutex>
#include <sstream>
Expand Down Expand Up @@ -621,7 +622,12 @@ class Executor : public log::EnableLogging<Executor> {
this->raw_copy_from(src_exec, num_elems * sizeof(T), src_ptr,
dest_ptr);
} catch (NotSupported &) {
#if (GKO_VERBOSE_LEVEL >= 1) && !defined(NDEBUG)
// Unoptimized copy. Try to go through the masters.
// output to log when verbose >= 1 and debug build
std::clog << "Not direct copy. Try to copy data from the masters."
<< std::endl;
#endif
auto src_master = src_exec->get_master().get();
if (num_elems > 0 && src_master != src_exec) {
auto *master_ptr = src_exec->get_master()->alloc<T>(num_elems);
Expand Down
2 changes: 1 addition & 1 deletion include/ginkgo/core/base/math.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <type_traits>


#ifdef CL_SYCL_LANGUAGE_VERSION
#ifdef SYCL_LANGUAGE_VERSION
#include <CL/sycl.hpp>
#endif

Expand Down

0 comments on commit 8b49b65

Please sign in to comment.