Skip to content

Commit

Permalink
Merge various benchmark improvements
Browse files Browse the repository at this point in the history
* Remove ELL format from benchmarks for heavily imbalanced matrices
* Fix some Hybrid conversion issues in OpenMP
* Fix IterationControl usage in conversion benchmark
* Remove CudaExecutor temporaries from SpMV formats in benchmark
* Output exception messages into JSON

Related PR: #812
  • Loading branch information
upsj authored Jul 6, 2021
2 parents 9cebc13 + c5343ab commit 06924a9
Show file tree
Hide file tree
Showing 12 changed files with 229 additions and 70 deletions.
6 changes: 6 additions & 0 deletions benchmark/blas/blas.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,12 @@ void apply_blas(const char *operation_name, std::shared_ptr<gko::Executor> exec,
} catch (const std::exception &e) {
add_or_set_member(test_case["blas"][operation_name], "completed", false,
allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["blas"][operation_name], "error",
msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down
13 changes: 9 additions & 4 deletions benchmark/conversions/conversions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,16 +88,22 @@ void convert_matrix(const gko::LinOp *matrix_from, const char *format_to,
matrix_to->copy_from(matrix_from);
}
add_or_set_member(conversion_case[conversion_name], "time",
timer->compute_average_time(), allocator);
ic.compute_average_time(), allocator);
add_or_set_member(conversion_case[conversion_name], "repetitions",
timer->get_num_repetitions(), allocator);
ic.get_num_repetitions(), allocator);

// compute and write benchmark data
add_or_set_member(conversion_case[conversion_name], "completed", true,
allocator);
} catch (const std::exception &e) {
add_or_set_member(test_case["conversions"][conversion_name],
"completed", false, allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["conversions"][conversion_name],
"error", msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down Expand Up @@ -156,8 +162,7 @@ int main(int argc, char *argv[])
try {
auto matrix_from =
share(formats::matrix_factory.at(format_from)(exec, data));
for (const auto &format : formats::matrix_factory) {
const auto format_to = std::get<0>(format);
for (const auto &format_to : formats) {
if (format_from == format_to) {
continue;
}
Expand Down
6 changes: 6 additions & 0 deletions benchmark/preconditioner/preconditioner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,12 @@ void run_preconditioner(const char *precond_name,
rapidjson::Value(rapidjson::kObjectType), allocator);
add_or_set_member(test_case["preconditioner"][encoded_name.c_str()],
"completed", false, allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["preconditioner"][encoded_name.c_str()],
"error", msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down
23 changes: 22 additions & 1 deletion benchmark/run_all_benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,16 @@ if [ ! "${EXECUTOR}" ]; then
echo "EXECUTOR environment variable not set - assuming \"${EXECUTOR}\"" 1>&2
fi

if [ ! "${REPETITIONS}" ]; then
REPETITIONS=10
echo "REPETITIONS environment variable not set - assuming ${REPETITIONS}" 1>&2
fi

if [ ! "${SOLVER_REPETITIONS}" ]; then
SOLVER_REPETITIONS=1
echo "SOLVER_REPETITIONS environment variable not set - assuming ${SOLVER_REPETITIONS}" 1>&2
fi

if [ ! "${SEGMENTS}" ]; then
echo "SEGMENTS environment variable not set - running entire suite" 1>&2
SEGMENTS=1
Expand All @@ -35,6 +45,11 @@ if [ ! "${FORMATS}" ]; then
FORMATS="csr,coo,ell,hybrid,sellp"
fi

if [ ! "${ELL_IMBALANCE_LIMIT}" ]; then
echo "ELL_IMBALANCE_LIMIT environment variable not set - assuming 100" 1>&2
ELL_IMBALANCE_LIMIT=100
fi

if [ ! "${SOLVERS}" ]; then
SOLVERS="bicgstab,cg,cgs,fcg,gmres,cb_gmres_reduce1,idr"
echo "SOLVERS environment variable not set - assuming \"${SOLVERS}\"" 1>&2
Expand Down Expand Up @@ -67,7 +82,7 @@ fi

if [ ! "${SOLVERS_JACOBI_MAX_BS}" ]; then
SOLVERS_JACOBI_MAX_BS="32"
"SOLVERS_JACOBI_MAX_BS environment variable not set - assuming \"${SOLVERS_JACOBI_MAX_BS}\"" 1>&2
echo "SOLVERS_JACOBI_MAX_BS environment variable not set - assuming \"${SOLVERS_JACOBI_MAX_BS}\"" 1>&2
fi

if [ ! "${BENCHMARK_PRECISION}" ]; then
Expand Down Expand Up @@ -202,6 +217,8 @@ run_conversion_benchmarks() {
./conversions/conversions${BENCH_SUFFIX} --backup="$1.bkp" --double_buffer="$1.bkp2" \
--executor="${EXECUTOR}" --formats="${FORMATS}" \
--device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
--repetitions="${REPETITIONS}" \
--ell_imbalance_limit="${ELL_IMBALANCE_LIMIT}" \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
}
Expand All @@ -218,6 +235,8 @@ run_spmv_benchmarks() {
./spmv/spmv${BENCH_SUFFIX} --backup="$1.bkp" --double_buffer="$1.bkp2" \
--executor="${EXECUTOR}" --formats="${FORMATS}" \
--device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
--repetitions="${REPETITIONS}" \
--ell_imbalance_limit="${ELL_IMBALANCE_LIMIT}" \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
}
Expand All @@ -239,6 +258,7 @@ run_solver_benchmarks() {
--gpu_timer=${GPU_TIMER} \
--jacobi_max_block_size=${SOLVERS_JACOBI_MAX_BS} --device_id="${DEVICE_ID}" \
--gmres_restart="${SOLVERS_GMRES_RESTART}" \
--repetitions="${SOLVER_REPETITIONS}" \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
}
Expand All @@ -265,6 +285,7 @@ run_preconditioner_benchmarks() {
--jacobi_max_block_size="${bsize}" \
--jacobi_storage="${prec}" \
--device_id="${DEVICE_ID}" --gpu_timer=${GPU_TIMER} \
--repetitions="${REPETITIONS}" \
<"$1.imd" 2>&1 >"$1"
keep_latest "$1" "$1.bkp" "$1.bkp2" "$1.imd"
done
Expand Down
6 changes: 6 additions & 0 deletions benchmark/solver/solver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -509,6 +509,12 @@ void solve_system(const std::string &solver_name,
} catch (const std::exception &e) {
add_or_set_member(test_case["solver"][precond_solver_name], "completed",
false, allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["solver"][precond_solver_name], "error",
msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down
8 changes: 7 additions & 1 deletion benchmark/spmv/spmv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ void apply_spmv(const char *format_name, std::shared_ptr<gko::Executor> exec,
for (auto _ : ic_tuning.run()) {
system_matrix->apply(lend(b), lend(x_clone));
}
tuning_case["time"].PushBack(tuning_timer->compute_average_time(),
tuning_case["time"].PushBack(ic_tuning.compute_average_time(),
allocator);
tuning_case["values"].PushBack(val, allocator);
}
Expand All @@ -154,6 +154,12 @@ void apply_spmv(const char *format_name, std::shared_ptr<gko::Executor> exec,
} catch (const std::exception &e) {
add_or_set_member(test_case["spmv"][format_name], "completed", false,
allocator);
if (FLAGS_keep_errors) {
rapidjson::Value msg_value;
msg_value.SetString(e.what(), allocator);
add_or_set_member(test_case["spmv"][format_name], "error",
msg_value, allocator);
}
std::cerr << "Error when processing test case " << test_case << "\n"
<< "what(): " << e.what() << std::endl;
}
Expand Down
80 changes: 76 additions & 4 deletions benchmark/utils/formats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/ginkgo.hpp>


#include <algorithm>
#include <map>
#include <string>

Expand Down Expand Up @@ -152,6 +153,10 @@ std::string format_command =
// the formats command-line argument
DEFINE_string(formats, "coo", formats::format_command.c_str());

DEFINE_int64(ell_imbalance_limit, 100,
"Maximal storage overhead above which ELL benchmarks will be "
"skipped. Negative values mean no limit.");


namespace formats {

Expand Down Expand Up @@ -181,6 +186,51 @@ std::unique_ptr<MatrixType> read_matrix_from_data(
return mat;
}


/**
* Creates a CSR strategy of the given type for the given executor if possible,
* falls back to csr::classical for executors without support for this strategy.
*
* @tparam Strategy one of csr::automatical or csr::load_balance
*/
template <typename Strategy>
std::shared_ptr<csr::strategy_type> create_gpu_strategy(
std::shared_ptr<const gko::Executor> exec)
{
if (auto cuda = dynamic_cast<const gko::CudaExecutor *>(exec.get())) {
return std::make_shared<Strategy>(cuda->shared_from_this());
} else if (auto hip = dynamic_cast<const gko::HipExecutor *>(exec.get())) {
return std::make_shared<Strategy>(hip->shared_from_this());
} else {
return std::make_shared<csr::classical>();
}
}


/**
* Checks whether the given matrix data exceeds the ELL imbalance limit set by
* the --ell_imbalance_limit flag
*
* @throws gko::Error if the imbalance limit is exceeded
*/
void check_ell_admissibility(const gko::matrix_data<etype> &data)
{
if (data.size[0] == 0 || FLAGS_ell_imbalance_limit < 0) {
return;
}
std::vector<gko::size_type> row_lengths(data.size[0]);
for (auto nz : data.nonzeros) {
row_lengths[nz.row]++;
}
auto max_len = *std::max_element(row_lengths.begin(), row_lengths.end());
auto avg_len = data.nonzeros.size() / std::max<double>(data.size[0], 1);
if (max_len / avg_len > FLAGS_ell_imbalance_limit) {
throw gko::Error(__FILE__, __LINE__,
"Matrix exceeds ELL imbalance limit");
}
}


/**
* Creates a Ginkgo matrix from the intermediate data representation format
* gko::matrix_data with support for variable arguments.
Expand All @@ -201,15 +251,36 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
std::shared_ptr<const gko::Executor>,
const gko::matrix_data<etype> &)>>
matrix_factory{
{"csr", READ_MATRIX(csr, std::make_shared<csr::automatical>())},
{"csri", READ_MATRIX(csr, std::make_shared<csr::load_balance>())},
{"csr",
[](std::shared_ptr<const gko::Executor> exec,
const gko::matrix_data<etype> &data) -> std::unique_ptr<csr> {
auto mat =
csr::create(exec, create_gpu_strategy<csr::automatical>(exec));
mat->read(data);
return mat;
}},
{"csri",
[](std::shared_ptr<const gko::Executor> exec,
const gko::matrix_data<etype> &data) -> std::unique_ptr<csr> {
auto mat = csr::create(
exec, create_gpu_strategy<csr::load_balance>(exec));
mat->read(data);
return mat;
}},
{"csrm", READ_MATRIX(csr, std::make_shared<csr::merge_path>())},
{"csrc", READ_MATRIX(csr, std::make_shared<csr::classical>())},
{"coo", read_matrix_from_data<gko::matrix::Coo<etype>>},
{"ell", read_matrix_from_data<gko::matrix::Ell<etype>>},
{"ell", [](std::shared_ptr<const gko::Executor> exec,
const gko::matrix_data<etype> &data) {
check_ell_admissibility(data);
auto mat = gko::matrix::Ell<etype>::create(exec);
mat->read(data);
return mat;
}},
{"ell-mixed",
[](std::shared_ptr<const gko::Executor> exec,
const gko::matrix_data<etype> &data) {
check_ell_admissibility(data);
gko::matrix_data<gko::next_precision<etype>> conv_data;
conv_data.size = data.size;
conv_data.nonzeros.resize(data.nonzeros.size());
Expand All @@ -220,7 +291,8 @@ const std::map<std::string, std::function<std::unique_ptr<gko::LinOp>(
it->value = el.value;
++it;
}
auto mat = gko::matrix::Ell<gko::next_precision<etype>>::create(std::move(exec));
auto mat = gko::matrix::Ell<gko::next_precision<etype>>::create(
std::move(exec));
mat->read(conv_data);
return mat;
}},
Expand Down
4 changes: 4 additions & 0 deletions benchmark/utils/general.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ DEFINE_string(double_buffer, "",
DEFINE_bool(detailed, true,
"If set, performs several runs to obtain more detailed results");

DEFINE_bool(keep_errors, false,
"If set, writes exception messages during the execution into the "
"JSON output");

DEFINE_bool(nested_names, false, "If set, separately logs nested operations");

DEFINE_uint32(seed, 42, "Seed used for the random number generator");
Expand Down
26 changes: 19 additions & 7 deletions omp/components/format_conversion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <ginkgo/core/base/types.hpp>


#include "core/components/prefix_sum.hpp"


namespace gko {
namespace kernels {
namespace omp {
Expand Down Expand Up @@ -84,16 +87,25 @@ inline void convert_unsorted_idxs_to_ptrs(const IndexType *idxs,
template <typename IndexType>
inline void convert_sorted_idxs_to_ptrs(const IndexType *idxs,
size_type num_nonzeros, IndexType *ptrs,
size_type length)
size_type num_rows)
{
ptrs[0] = 0;
ptrs[length - 1] = num_nonzeros;

#pragma omp parallel for schedule( \
static, ceildiv(num_nonzeros, omp_get_max_threads()))
for (size_type i = 0; i < num_nonzeros - 1; i++) {
for (size_type j = idxs[i] + 1; j <= idxs[i + 1]; j++) {
ptrs[j] = i + 1;
if (num_nonzeros == 0) {
#pragma omp parallel for
for (size_type row = 0; row < num_rows; row++) {
ptrs[row + 1] = 0;
}
} else {
// add virtual sentinel values 0 and num_rows to handle empty first and
// last rows
#pragma omp parallel for
for (size_type i = 0; i <= num_nonzeros; i++) {
auto begin_row = i == 0 ? size_type{} : idxs[i - 1];
auto end_row = i == num_nonzeros ? num_rows : idxs[i];
for (auto row = begin_row; row < end_row; row++) {
ptrs[row + 1] = i;
}
}
}
}
Expand Down
7 changes: 3 additions & 4 deletions omp/matrix/coo_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,9 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
template <typename IndexType>
void convert_row_idxs_to_ptrs(std::shared_ptr<const OmpExecutor> exec,
const IndexType *idxs, size_type num_nonzeros,
IndexType *ptrs, size_type length)
IndexType *ptrs, size_type num_rows)
{
convert_sorted_idxs_to_ptrs(idxs, num_nonzeros, ptrs, length);
convert_sorted_idxs_to_ptrs(idxs, num_nonzeros, ptrs, num_rows);
}


Expand All @@ -222,8 +222,7 @@ void convert_to_csr(std::shared_ptr<const OmpExecutor> exec,

const auto source_row_idxs = source->get_const_row_idxs();

convert_row_idxs_to_ptrs(exec, source_row_idxs, nnz, row_ptrs,
num_rows + 1);
convert_row_idxs_to_ptrs(exec, source_row_idxs, nnz, row_ptrs, num_rows);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
Expand Down
Loading

0 comments on commit 06924a9

Please sign in to comment.