Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-22.10' into thrust-1.17
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice committed Aug 4, 2022
2 parents a51a42d + 217243c commit a454d0e
Show file tree
Hide file tree
Showing 51 changed files with 781 additions and 263 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ repos:
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmakelang==0.6.13
verbose: true
require_serial: true
- id: cmake-lint
name: cmake-lint
entry: ./cpp/scripts/run-cmake-format.sh cmake-lint
Expand All @@ -69,6 +71,8 @@ repos:
# of dependencies, so we'll have to update this manually.
additional_dependencies:
- cmakelang==0.6.13
verbose: true
require_serial: true
- id: copyright-check
name: copyright-check
# This hook's use of Git tools appears to conflict with
Expand Down
6 changes: 3 additions & 3 deletions ci/benchmark/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"

# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
export INSTALL_DASK_MAIN=1
export INSTALL_DASK_MAIN=0

function remove_libcudf_kernel_cache_dir {
EXITCODE=$?
Expand Down Expand Up @@ -82,8 +82,8 @@ if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
gpuci_logger "gpuci_mamba_retry update dask"
gpuci_mamba_retry update dask
else
gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall
gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall
fi

# Install the master version of streamz
Expand Down
6 changes: 3 additions & 3 deletions ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
unset GIT_DESCRIBE_TAG

# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
export INSTALL_DASK_MAIN=1
export INSTALL_DASK_MAIN=0

# ucx-py version
export UCX_PY_VERSION='0.28.*'
Expand Down Expand Up @@ -92,8 +92,8 @@ function install_dask {
gpuci_mamba_retry update dask
conda list
else
gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall
gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall
fi
# Install the main version of streamz
gpuci_logger "Install the main version of streamz"
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.5.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ dependencies:
- pydocstyle=6.1.1
- typing_extensions
- pre-commit
- dask>=2022.05.2
- distributed>=2022.05.2
- dask==2022.7.1
- distributed==2022.7.1
- streamz
- arrow-cpp=8
- dlpack>=0.5,<0.6.0a0
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/custreamz/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ requirements:
- python
- streamz
- cudf ={{ version }}
- dask>=2022.05.2
- distributed>=2022.05.2
- dask==2022.7.1
- distributed==2022.7.1
- python-confluent-kafka >=1.7.0,<1.8.0a0
- cudf_kafka ={{ version }}

Expand Down
8 changes: 4 additions & 4 deletions conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ requirements:
host:
- python
- cudf ={{ version }}
- dask>=2022.05.2
- distributed>=2022.05.2
- dask==2022.7.1
- distributed==2022.7.1
- cudatoolkit ={{ cuda_version }}
run:
- python
- cudf ={{ version }}
- dask>=2022.05.2
- distributed>=2022.05.2
- dask==2022.7.1
- distributed==2022.7.1
- {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}

test: # [linux64]
Expand Down
6 changes: 6 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ project(
VERSION 22.10.00
LANGUAGES C CXX CUDA
)
if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5)
message(
FATAL_ERROR
"libcudf requires CUDA Toolkit 11.5+ to compile (nvcc ${CMAKE_CUDA_COMPILER_VERSION} provided)"
)
endif()

# Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
# have different values for the `Threads::Threads` target. Setting this flag ensures
Expand Down
7 changes: 6 additions & 1 deletion cpp/cmake/thirdparty/get_arrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,12 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB

endfunction()

set(CUDF_VERSION_Arrow 8.0.0)
if(NOT DEFINED CUDF_VERSION_Arrow)
set(CUDF_VERSION_Arrow
8.0.0
CACHE STRING "The version of Arrow to find (or build)"
)
endif()

find_and_configure_arrow(
${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC}
Expand Down
4 changes: 2 additions & 2 deletions cpp/doxygen/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,7 @@ HTML_FILE_EXTENSION = .html
# of the possible markers and block names see the documentation.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_HEADER =
HTML_HEADER = header.html

# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
# generated HTML page. If the tag is left blank doxygen will generate a standard
Expand All @@ -1156,7 +1156,7 @@ HTML_HEADER =
# that doxygen normally uses.
# This tag requires that the tag GENERATE_HTML is set to YES.

HTML_FOOTER = footer.html
HTML_FOOTER =

# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
# sheet that is used by each HTML page. It can be used to fine-tune the look of
Expand Down
4 changes: 0 additions & 4 deletions cpp/doxygen/footer.html

This file was deleted.

61 changes: 61 additions & 0 deletions cpp/doxygen/header.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<!-- HTML header for doxygen 1.8.20-->
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=9"/>
<meta name="generator" content="Doxygen $doxygenversion"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME-->
<!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME-->
<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/>
<script type="text/javascript" src="$relpath^jquery.js"></script>
<script type="text/javascript" src="$relpath^dynsections.js"></script>
$treeview
$search
$mathjax
<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" />
$extrastylesheet

<!-- RAPIDS CUSTOM JS & CSS: START, Please add these two lines back after every version upgrade -->
<script defer src="https://docs.rapids.ai/assets/js/custom.js"></script>
<link rel="stylesheet" href="https://docs.rapids.ai/assets/css/custom.css">
<!-- RAPIDS CUSTOM JS & CSS: END -->
</head>
<body>
<div id="top"><!-- do not remove this div, it is closed by doxygen! -->

<!--BEGIN TITLEAREA-->
<div id="titlearea">
<table cellspacing="0" cellpadding="0">
<tbody>
<tr style="height: 56px;">
<!--BEGIN PROJECT_LOGO-->
<td id="projectlogo"><img alt="Logo" src="$relpath^$projectlogo"/></td>
<!--END PROJECT_LOGO-->
<!--BEGIN PROJECT_NAME-->
<td id="projectalign" style="padding-left: 0.5em;">
<div id="projectname">$projectname
<!--BEGIN PROJECT_NUMBER-->&#160;<span id="projectnumber">$projectnumber</span><!--END PROJECT_NUMBER-->
</div>
<!--BEGIN PROJECT_BRIEF--><div id="projectbrief">$projectbrief</div><!--END PROJECT_BRIEF-->
</td>
<!--END PROJECT_NAME-->
<!--BEGIN !PROJECT_NAME-->
<!--BEGIN PROJECT_BRIEF-->
<td style="padding-left: 0.5em;">
<div id="projectbrief">$projectbrief</div>
</td>
<!--END PROJECT_BRIEF-->
<!--END !PROJECT_NAME-->
<!--BEGIN DISABLE_INDEX-->
<!--BEGIN SEARCHENGINE-->
<td>$searchbox</td>
<!--END SEARCHENGINE-->
<!--END DISABLE_INDEX-->
</tr>
</tbody>
</table>
</div>
<!--END TITLEAREA-->
<!-- end header part -->
28 changes: 28 additions & 0 deletions cpp/include/cudf/column/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/per_device_resource.hpp>

#include <memory>
Expand Down Expand Up @@ -75,6 +76,33 @@ class column {
*/
column(column&& other) noexcept;

/**
* @brief Construct a new column by taking ownership of the contents of a device_uvector.
*
* @param other The device_uvector whose contents will be moved into the new column.
* @param null_mask Optional, column's null value indicator bitmask. May
* be empty if `null_count` is 0 or `UNKNOWN_NULL_COUNT`.
* @param null_count Optional, the count of null elements. If unknown, specify
* `UNKNOWN_NULL_COUNT` to indicate that the null count should be computed on
* the first invocation of `null_count()`.
*/
template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
column(rmm::device_uvector<T>&& other,
rmm::device_buffer&& null_mask = {},
size_type null_count = UNKNOWN_NULL_COUNT)
: _type{cudf::data_type{cudf::type_to_id<T>()}},
_size{[&]() {
CUDF_EXPECTS(
other.size() <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
"The device_uvector size exceeds the maximum size_type.");
return static_cast<size_type>(other.size());
}()},
_data{other.release()},
_null_mask{std::move(null_mask)},
_null_count{null_count}
{
}

/**
* @brief Construct a new column from existing device memory.
*
Expand Down
29 changes: 0 additions & 29 deletions cpp/include/cudf/detail/structs/utilities.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,35 +151,6 @@ flattened_table flatten_nested_columns(
std::vector<null_order> const& null_precedence,
column_nullability nullability = column_nullability::MATCH_INCOMING);

/**
* @brief Unflatten columns flattened as by `flatten_nested_columns()`,
* based on the provided `blueprint`.
*
* cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector
* before the child/member columns.
* E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to:
* 1. Null Vector for STRUCT_1
* 2. Null Vector for STRUCT_2
* 3. Member STRUCT_2::A
* 4. Member STRUCT_2::B
* 5. Member STRUCT_1::C
*
* `unflatten_nested_columns()` reconstructs nested columns from flattened input that follows
* the convention above.
*
* Note: This function requires a null-mask vector for each STRUCT column, including for nested
* STRUCT members.
*
* @param flattened "Flattened" `table` of input columns, following the conventions in
* `flatten_nested_columns()`.
* @param blueprint The exemplar `table_view` with nested columns intact, whose structure defines
* the nesting of the reconstructed output table.
* @return std::unique_ptr<cudf::table> Unflattened table (with nested STRUCT columns) reconstructed
* based on `blueprint`.
*/
std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::table>&& flattened,
table_view const& blueprint);

/**
* @brief Push down nulls from a parent mask into a child column, using bitwise AND.
*
Expand Down
26 changes: 12 additions & 14 deletions cpp/include/cudf/detail/utilities/device_atomics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -116,18 +116,17 @@ struct genericAtomicOperationImpl<T, Op, 4> {
using T_int = unsigned int;

T old_value = *addr;
T assumed{old_value};
T_int assumed;
T_int ret;

do {
assumed = old_value;
const T new_value = op(old_value, update_value);
T_int const new_value = type_reinterpret<T_int, T>(op(old_value, update_value));

T_int ret = atomicCAS(reinterpret_cast<T_int*>(addr),
type_reinterpret<T_int, T>(assumed),
type_reinterpret<T_int, T>(new_value));
assumed = type_reinterpret<T_int, T>(old_value);
ret = atomicCAS(reinterpret_cast<T_int*>(addr), assumed, new_value);
old_value = type_reinterpret<T, T_int>(ret);

} while (assumed != old_value);
} while (assumed != ret);

return old_value;
}
Expand All @@ -142,18 +141,17 @@ struct genericAtomicOperationImpl<T, Op, 8> {
static_assert(sizeof(T) == sizeof(T_int));

T old_value = *addr;
T assumed{old_value};
T_int assumed;
T_int ret;

do {
assumed = old_value;
const T new_value = op(old_value, update_value);
T_int const new_value = type_reinterpret<T_int, T>(op(old_value, update_value));

T_int ret = atomicCAS(reinterpret_cast<T_int*>(addr),
type_reinterpret<T_int, T>(assumed),
type_reinterpret<T_int, T>(new_value));
assumed = type_reinterpret<T_int, T>(old_value);
ret = atomicCAS(reinterpret_cast<T_int*>(addr), assumed, new_value);
old_value = type_reinterpret<T, T_int>(ret);

} while (assumed != old_value);
} while (assumed != ret);

return old_value;
}
Expand Down
13 changes: 7 additions & 6 deletions cpp/include/cudf/io/orc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <rmm/mr/device/per_device_resource.hpp>

#include <memory>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>
Expand Down Expand Up @@ -51,8 +52,8 @@ class orc_reader_options_builder;
class orc_reader_options {
source_info _source;

// Names of column to read; empty is all
std::vector<std::string> _columns;
// Names of column to read; `nullopt` is all
std::optional<std::vector<std::string>> _columns;

// List of individual stripes to read (ignored if empty)
std::vector<std::vector<size_type>> _stripes;
Expand Down Expand Up @@ -105,18 +106,18 @@ class orc_reader_options {
[[nodiscard]] source_info const& get_source() const { return _source; }

/**
* @brief Returns names of the columns to read.
* @brief Returns names of the columns to read, if set.
*
* @return Names of the columns to read
* @return Names of the columns to read; `nullopt` if the option is not set
*/
[[nodiscard]] std::vector<std::string> const& get_columns() const { return _columns; }
[[nodiscard]] auto const& get_columns() const { return _columns; }

/**
* @brief Returns vector of vectors, stripes to read for each input source
*
* @return Vector of vectors, stripes to read for each input source
*/
std::vector<std::vector<size_type>> const& get_stripes() const { return _stripes; }
[[nodiscard]] auto const& get_stripes() const { return _stripes; }

/**
* @brief Returns number of rows to skip from the start.
Expand Down
Loading

0 comments on commit a454d0e

Please sign in to comment.