Skip to content

Commit

Permalink
Merge improved HIP build time
Browse files Browse the repository at this point in the history
Splitting up the mixed-precision SpMV kernels shaves the runtime down by at least an hour.
This also outputs individual file compilation times in the CI job log.

Related PR: #1384
  • Loading branch information
upsj authored Aug 11, 2023
2 parents 7b4134d + 57af99b commit 0dfcbb6
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 34 deletions.
2 changes: 2 additions & 0 deletions .gitlab/scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
-DGINKGO_DPCPP_SINGLE_MODE=${DPCPP_SINGLE_MODE}
-DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR}
- ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install
- awk '!/^#/ { print ($2 - $1)/1000 " " $4 }' .ninja_log | sort -nr
- if [ "${EXPORT_BUILD_DIR}" == "ON" ]; then ninja test_exportbuild; fi
- LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH ninja test_pkgconfig
dependencies: []
Expand Down Expand Up @@ -94,6 +95,7 @@
-DGINKGO_RUN_EXAMPLES=${RUN_EXAMPLES}
-DGINKGO_EXPORT_BUILD_DIR=${EXPORT_BUILD_DIR}
- ninja -j${NUM_CORES} -l${CI_LOAD_LIMIT} install
- awk '!/^#/ { print ($2 - $1)/1000 " " $4 }' .ninja_log | sort -nr
- |
(( $(ctest -N | tail -1 | sed 's/Total Tests: //') != 0 )) || exit 1
- ctest -V --timeout 6000
Expand Down
83 changes: 53 additions & 30 deletions core/base/mixed_precision_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,42 +39,65 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#ifdef GINKGO_MIXED_PRECISION
#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, ...) \
template _macro(float, float, float, __VA_ARGS__); \
template _macro(float, float, double, __VA_ARGS__); \
template _macro(float, double, float, __VA_ARGS__); \
template _macro(float, double, double, __VA_ARGS__); \
template _macro(double, float, float, __VA_ARGS__); \
template _macro(double, float, double, __VA_ARGS__); \
template _macro(double, double, float, __VA_ARGS__); \
template _macro(double, double, double, __VA_ARGS__); \
template _macro(std::complex<float>, std::complex<float>, \
std::complex<float>, __VA_ARGS__); \
template _macro(std::complex<float>, std::complex<float>, \
std::complex<double>, __VA_ARGS__); \
template _macro(std::complex<float>, std::complex<double>, \
std::complex<float>, __VA_ARGS__); \
template _macro(std::complex<float>, std::complex<double>, \
std::complex<double>, __VA_ARGS__); \
template _macro(std::complex<double>, std::complex<float>, \
std::complex<float>, __VA_ARGS__); \
template _macro(std::complex<double>, std::complex<float>, \
std::complex<double>, __VA_ARGS__); \
template _macro(std::complex<double>, std::complex<double>, \
std::complex<float>, __VA_ARGS__); \
template _macro(std::complex<double>, std::complex<double>, \

#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(_macro, ...) \
template _macro(float, float, float, __VA_ARGS__); \
template _macro(float, float, double, __VA_ARGS__); \
template _macro(float, double, float, __VA_ARGS__); \
template _macro(float, double, double, __VA_ARGS__)

#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(_macro, ...) \
template _macro(double, float, float, __VA_ARGS__); \
template _macro(double, float, double, __VA_ARGS__); \
template _macro(double, double, float, __VA_ARGS__); \
template _macro(double, double, double, __VA_ARGS__)

#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(_macro, ...) \
template _macro(std::complex<float>, std::complex<float>, \
std::complex<float>, __VA_ARGS__); \
template _macro(std::complex<float>, std::complex<float>, \
std::complex<double>, __VA_ARGS__); \
template _macro(std::complex<float>, std::complex<double>, \
std::complex<float>, __VA_ARGS__); \
template _macro(std::complex<float>, std::complex<double>, \
std::complex<double>, __VA_ARGS__)

#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(_macro, ...) \
template _macro(std::complex<double>, std::complex<float>, \
std::complex<float>, __VA_ARGS__); \
template _macro(std::complex<double>, std::complex<float>, \
std::complex<double>, __VA_ARGS__); \
template _macro(std::complex<double>, std::complex<double>, \
std::complex<float>, __VA_ARGS__); \
template _macro(std::complex<double>, std::complex<double>, \
std::complex<double>, __VA_ARGS__)

#else
#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, ...) \
template _macro(float, float, float, __VA_ARGS__); \
template _macro(double, double, double, __VA_ARGS__); \
template _macro(std::complex<float>, std::complex<float>, \
std::complex<float>, __VA_ARGS__); \
template _macro(std::complex<double>, std::complex<double>, \

#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(_macro, ...) \
template _macro(float, float, float, __VA_ARGS__);

#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(_macro, ...) \
template _macro(double, double, double, __VA_ARGS__)

#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(_macro, ...) \
template _macro(std::complex<float>, std::complex<float>, \
std::complex<float>, __VA_ARGS__)

#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(_macro, ...) \
template _macro(std::complex<double>, std::complex<double>, \
std::complex<double>, __VA_ARGS__)

#endif


#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, ...) \
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(_macro, __VA_ARGS__); \
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(_macro, __VA_ARGS__); \
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(_macro, __VA_ARGS__); \
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(_macro, __VA_ARGS__)


#define GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(_macro) \
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, int32); \
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE(_macro, int64)
Expand Down
56 changes: 52 additions & 4 deletions hip/matrix/csr_kernels.instantiate.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,60 @@ namespace csr {
// begin
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL);


// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(GKO_DECLARE_CSR_SPMV_KERNEL,
int32);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(GKO_DECLARE_CSR_SPMV_KERNEL,
int32);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(GKO_DECLARE_CSR_SPMV_KERNEL,
int32);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(GKO_DECLARE_CSR_SPMV_KERNEL,
int32);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(GKO_DECLARE_CSR_SPMV_KERNEL,
int64);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(GKO_DECLARE_CSR_SPMV_KERNEL,
int64);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(GKO_DECLARE_CSR_SPMV_KERNEL,
int64);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(GKO_DECLARE_CSR_SPMV_KERNEL,
int64);


// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int32);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int32);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int32);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_SPMV_KERNEL);
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int32);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL);
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT1(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int64);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT2(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int64);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT3(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int64);
// split
GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_TYPE_SPLIT4(
GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL, int64);


// split
GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL);
// split
Expand Down

0 comments on commit 0dfcbb6

Please sign in to comment.