diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 4051ff822be..4f838ba3f45 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -60,6 +60,8 @@ repos:
                 # of dependencies, so we'll have to update this manually.
                 additional_dependencies:
                   - cmakelang==0.6.13
+                verbose: true
+                require_serial: true
               - id: cmake-lint
                 name: cmake-lint
                 entry: ./cpp/scripts/run-cmake-format.sh cmake-lint
@@ -69,6 +71,8 @@ repos:
                 # of dependencies, so we'll have to update this manually.
                 additional_dependencies:
                   - cmakelang==0.6.13
+                verbose: true
+                require_serial: true
               - id: copyright-check
                 name: copyright-check
                 # This hook's use of Git tools appears to conflict with
diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh
index 5d03a518fcf..5593633640a 100755
--- a/ci/benchmark/build.sh
+++ b/ci/benchmark/build.sh
@@ -37,7 +37,7 @@ export GBENCH_BENCHMARKS_DIR="$WORKSPACE/cpp/build/gbenchmarks/"
 export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
+export INSTALL_DASK_MAIN=0
 
 function remove_libcudf_kernel_cache_dir {
     EXITCODE=$?
@@ -82,8 +82,8 @@ if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
     gpuci_logger "gpuci_mamba_retry update dask"
     gpuci_mamba_retry update dask
 else
-    gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall"
-    gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall
+    gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall"
+    gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall
 fi
 
 # Install the master version of streamz
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 7b26519aa7d..8f215d1bb54 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -32,7 +32,7 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
 unset GIT_DESCRIBE_TAG
 
 # Dask & Distributed option to install main(nightly) or `conda-forge` packages.
-export INSTALL_DASK_MAIN=1
+export INSTALL_DASK_MAIN=0
 
 # ucx-py version
 export UCX_PY_VERSION='0.28.*'
@@ -92,8 +92,8 @@ function install_dask {
         gpuci_mamba_retry update dask
         conda list
     else
-        gpuci_logger "gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall"
-        gpuci_mamba_retry install conda-forge::dask>=2022.05.2 conda-forge::distributed>=2022.05.2 conda-forge::dask-core>=2022.05.2 --force-reinstall
+        gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall"
+        gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall
     fi
     # Install the main version of streamz
     gpuci_logger "Install the main version of streamz"
diff --git a/conda/environments/cudf_dev_cuda11.5.yml b/conda/environments/cudf_dev_cuda11.5.yml
index 56531a7ae58..1e323182ffd 100644
--- a/conda/environments/cudf_dev_cuda11.5.yml
+++ b/conda/environments/cudf_dev_cuda11.5.yml
@@ -48,8 +48,8 @@ dependencies:
   - pydocstyle=6.1.1
   - typing_extensions
   - pre-commit
-  - dask>=2022.05.2
-  - distributed>=2022.05.2
+  - dask==2022.7.1
+  - distributed==2022.7.1
   - streamz
   - arrow-cpp=8
   - dlpack>=0.5,<0.6.0a0
diff --git a/conda/recipes/custreamz/meta.yaml b/conda/recipes/custreamz/meta.yaml
index acf85426d09..118f084b436 100644
--- a/conda/recipes/custreamz/meta.yaml
+++ b/conda/recipes/custreamz/meta.yaml
@@ -29,8 +29,8 @@ requirements:
     - python
     - streamz
     - cudf ={{ version }}
-    - dask>=2022.05.2
-    - distributed>=2022.05.2
+    - dask==2022.7.1
+    - distributed==2022.7.1
     - python-confluent-kafka >=1.7.0,<1.8.0a0
     - cudf_kafka ={{ version }}
 
diff --git a/conda/recipes/dask-cudf/meta.yaml b/conda/recipes/dask-cudf/meta.yaml
index 3d7e7895578..c9a179301b0 100644
--- a/conda/recipes/dask-cudf/meta.yaml
+++ b/conda/recipes/dask-cudf/meta.yaml
@@ -24,14 +24,14 @@ requirements:
   host:
     - python
     - cudf ={{ version }}
-    - dask>=2022.05.2
-    - distributed>=2022.05.2
+    - dask==2022.7.1
+    - distributed==2022.7.1
     - cudatoolkit ={{ cuda_version }}
   run:
     - python
     - cudf ={{ version }}
-    - dask>=2022.05.2
-    - distributed>=2022.05.2
+    - dask==2022.7.1
+    - distributed==2022.7.1
     - {{ pin_compatible('cudatoolkit', max_pin='x', min_pin='x') }}
 
 test:                                   # [linux64]
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 8e5e2a53692..2f96b6ce9ae 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -28,6 +28,12 @@ project(
   VERSION 22.10.00
   LANGUAGES C CXX CUDA
 )
+if(CMAKE_CUDA_COMPILER_ID STREQUAL "NVIDIA" AND CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.5)
+  message(
+    FATAL_ERROR
+      "libcudf requires CUDA Toolkit 11.5+ to compile (nvcc ${CMAKE_CUDA_COMPILER_VERSION} provided)"
+  )
+endif()
 
 # Needed because GoogleBenchmark changes the state of FindThreads.cmake, causing subsequent runs to
 # have different values for the `Threads::Threads` target. Setting this flag ensures
diff --git a/cpp/cmake/thirdparty/get_arrow.cmake b/cpp/cmake/thirdparty/get_arrow.cmake
index 116c5442dc3..e0f9a711776 100644
--- a/cpp/cmake/thirdparty/get_arrow.cmake
+++ b/cpp/cmake/thirdparty/get_arrow.cmake
@@ -273,7 +273,12 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB
 
 endfunction()
 
-set(CUDF_VERSION_Arrow 8.0.0)
+if(NOT DEFINED CUDF_VERSION_Arrow)
+  set(CUDF_VERSION_Arrow
+      8.0.0
+      CACHE STRING "The version of Arrow to find (or build)"
+  )
+endif()
 
 find_and_configure_arrow(
   ${CUDF_VERSION_Arrow} ${CUDF_USE_ARROW_STATIC} ${CUDF_ENABLE_ARROW_S3} ${CUDF_ENABLE_ARROW_ORC}
diff --git a/cpp/doxygen/Doxyfile b/cpp/doxygen/Doxyfile
index e6b12948d85..5f43f5af0e4 100644
--- a/cpp/doxygen/Doxyfile
+++ b/cpp/doxygen/Doxyfile
@@ -1146,7 +1146,7 @@ HTML_FILE_EXTENSION    = .html
 # of the possible markers and block names see the documentation.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_HEADER            =
+HTML_HEADER            = header.html
 
 # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each
 # generated HTML page. If the tag is left blank doxygen will generate a standard
@@ -1156,7 +1156,7 @@ HTML_HEADER            =
 # that doxygen normally uses.
 # This tag requires that the tag GENERATE_HTML is set to YES.
 
-HTML_FOOTER            = footer.html
+HTML_FOOTER            =
 
 # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style
 # sheet that is used by each HTML page. It can be used to fine-tune the look of
diff --git a/cpp/doxygen/footer.html b/cpp/doxygen/footer.html
deleted file mode 100644
index 9bd79eeb539..00000000000
--- a/cpp/doxygen/footer.html
+++ /dev/null
@@ -1,4 +0,0 @@
-<!-- Copyright (c) 2022, NVIDIA CORPORATION. -->
-
-<script src="https://docs.rapids.ai/assets/js/custom.js"></script>
-<link rel="stylesheet" href="https://docs.rapids.ai/assets/css/custom.css">
diff --git a/cpp/doxygen/header.html b/cpp/doxygen/header.html
new file mode 100644
index 00000000000..569b8450e3a
--- /dev/null
+++ b/cpp/doxygen/header.html
@@ -0,0 +1,61 @@
+<!-- HTML header for doxygen 1.8.20-->
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml">
+<head>
+<meta http-equiv="Content-Type" content="text/xhtml;charset=UTF-8"/>
+<meta http-equiv="X-UA-Compatible" content="IE=9"/>
+<meta name="generator" content="Doxygen $doxygenversion"/>
+<meta name="viewport" content="width=device-width, initial-scale=1"/>
+<!--BEGIN PROJECT_NAME--><title>$projectname: $title</title><!--END PROJECT_NAME-->
+<!--BEGIN !PROJECT_NAME--><title>$title</title><!--END !PROJECT_NAME-->
+<link href="$relpath^tabs.css" rel="stylesheet" type="text/css"/>
+<script type="text/javascript" src="$relpath^jquery.js"></script>
+<script type="text/javascript" src="$relpath^dynsections.js"></script>
+$treeview
+$search
+$mathjax
+<link href="$relpath^$stylesheet" rel="stylesheet" type="text/css" />
+$extrastylesheet
+
+<!-- RAPIDS CUSTOM JS & CSS: START, Please add these two lines back after every version upgrade -->
+<script defer src="https://docs.rapids.ai/assets/js/custom.js"></script>
+<link rel="stylesheet" href="https://docs.rapids.ai/assets/css/custom.css">
+<!-- RAPIDS CUSTOM JS & CSS: END -->
+</head>
+<body>
+<div id="top"><!-- do not remove this div, it is closed by doxygen! -->
+
+<!--BEGIN TITLEAREA-->
+<div id="titlearea">
+<table cellspacing="0" cellpadding="0">
+ <tbody>
+ <tr style="height: 56px;">
+  <!--BEGIN PROJECT_LOGO-->
+  <td id="projectlogo"><img alt="Logo" src="$relpath^$projectlogo"/></td>
+  <!--END PROJECT_LOGO-->
+  <!--BEGIN PROJECT_NAME-->
+  <td id="projectalign" style="padding-left: 0.5em;">
+   <div id="projectname">$projectname
+   <!--BEGIN PROJECT_NUMBER-->&#160;<span id="projectnumber">$projectnumber</span><!--END PROJECT_NUMBER-->
+   </div>
+   <!--BEGIN PROJECT_BRIEF--><div id="projectbrief">$projectbrief</div><!--END PROJECT_BRIEF-->
+  </td>
+  <!--END PROJECT_NAME-->
+  <!--BEGIN !PROJECT_NAME-->
+   <!--BEGIN PROJECT_BRIEF-->
+    <td style="padding-left: 0.5em;">
+    <div id="projectbrief">$projectbrief</div>
+    </td>
+   <!--END PROJECT_BRIEF-->
+  <!--END !PROJECT_NAME-->
+  <!--BEGIN DISABLE_INDEX-->
+   <!--BEGIN SEARCHENGINE-->
+   <td>$searchbox</td>
+   <!--END SEARCHENGINE-->
+  <!--END DISABLE_INDEX-->
+ </tr>
+ </tbody>
+</table>
+</div>
+<!--END TITLEAREA-->
+<!-- end header part -->
diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp
index ac3824dfc21..c5f6d339ae9 100644
--- a/cpp/include/cudf/column/column.hpp
+++ b/cpp/include/cudf/column/column.hpp
@@ -23,6 +23,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
+#include <rmm/device_uvector.hpp>
 #include <rmm/mr/device/per_device_resource.hpp>
 
 #include <memory>
@@ -75,6 +76,33 @@ class column {
    */
   column(column&& other) noexcept;
 
+  /**
+   * @brief Construct a new column by taking ownership of the contents of a device_uvector.
+   *
+   * @param other The device_uvector whose contents will be moved into the new column.
+   * @param null_mask Optional, column's null value indicator bitmask. May
+   * be empty if `null_count` is 0 or `UNKNOWN_NULL_COUNT`.
+   * @param null_count Optional, the count of null elements. If unknown, specify
+   * `UNKNOWN_NULL_COUNT` to indicate that the null count should be computed on
+   * the first invocation of `null_count()`.
+   */
+  template <typename T, CUDF_ENABLE_IF(cudf::is_numeric<T>() or cudf::is_chrono<T>())>
+  column(rmm::device_uvector<T>&& other,
+         rmm::device_buffer&& null_mask = {},
+         size_type null_count           = UNKNOWN_NULL_COUNT)
+    : _type{cudf::data_type{cudf::type_to_id<T>()}},
+      _size{[&]() {
+        CUDF_EXPECTS(
+          other.size() <= static_cast<std::size_t>(std::numeric_limits<size_type>::max()),
+          "The device_uvector size exceeds the maximum size_type.");
+        return static_cast<size_type>(other.size());
+      }()},
+      _data{other.release()},
+      _null_mask{std::move(null_mask)},
+      _null_count{null_count}
+  {
+  }
+
   /**
    * @brief Construct a new column from existing device memory.
    *
diff --git a/cpp/include/cudf/detail/structs/utilities.hpp b/cpp/include/cudf/detail/structs/utilities.hpp
index 7d8ac5c9325..1a4b8f02dd3 100644
--- a/cpp/include/cudf/detail/structs/utilities.hpp
+++ b/cpp/include/cudf/detail/structs/utilities.hpp
@@ -151,35 +151,6 @@ flattened_table flatten_nested_columns(
   std::vector<null_order> const& null_precedence,
   column_nullability nullability = column_nullability::MATCH_INCOMING);
 
-/**
- * @brief Unflatten columns flattened as by `flatten_nested_columns()`,
- *        based on the provided `blueprint`.
- *
- * cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector
- * before the child/member columns.
- * E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to:
- *      1. Null Vector for STRUCT_1
- *      2. Null Vector for STRUCT_2
- *      3. Member STRUCT_2::A
- *      4. Member STRUCT_2::B
- *      5. Member STRUCT_1::C
- *
- * `unflatten_nested_columns()` reconstructs nested columns from flattened input that follows
- * the convention above.
- *
- * Note: This function requires a null-mask vector for each STRUCT column, including for nested
- * STRUCT members.
- *
- * @param flattened "Flattened" `table` of input columns, following the conventions in
- * `flatten_nested_columns()`.
- * @param blueprint The exemplar `table_view` with nested columns intact, whose structure defines
- * the nesting of the reconstructed output table.
- * @return std::unique_ptr<cudf::table> Unflattened table (with nested STRUCT columns) reconstructed
- * based on `blueprint`.
- */
-std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::table>&& flattened,
-                                                      table_view const& blueprint);
-
 /**
  * @brief Push down nulls from a parent mask into a child column, using bitwise AND.
  *
diff --git a/cpp/include/cudf/detail/utilities/device_atomics.cuh b/cpp/include/cudf/detail/utilities/device_atomics.cuh
index f985135064f..0521418d2d3 100644
--- a/cpp/include/cudf/detail/utilities/device_atomics.cuh
+++ b/cpp/include/cudf/detail/utilities/device_atomics.cuh
@@ -116,18 +116,17 @@ struct genericAtomicOperationImpl<T, Op, 4> {
     using T_int = unsigned int;
 
     T old_value = *addr;
-    T assumed{old_value};
+    T_int assumed;
+    T_int ret;
 
     do {
-      assumed           = old_value;
-      const T new_value = op(old_value, update_value);
+      T_int const new_value = type_reinterpret<T_int, T>(op(old_value, update_value));
 
-      T_int ret = atomicCAS(reinterpret_cast<T_int*>(addr),
-                            type_reinterpret<T_int, T>(assumed),
-                            type_reinterpret<T_int, T>(new_value));
+      assumed   = type_reinterpret<T_int, T>(old_value);
+      ret       = atomicCAS(reinterpret_cast<T_int*>(addr), assumed, new_value);
       old_value = type_reinterpret<T, T_int>(ret);
 
-    } while (assumed != old_value);
+    } while (assumed != ret);
 
     return old_value;
   }
@@ -142,18 +141,17 @@ struct genericAtomicOperationImpl<T, Op, 8> {
     static_assert(sizeof(T) == sizeof(T_int));
 
     T old_value = *addr;
-    T assumed{old_value};
+    T_int assumed;
+    T_int ret;
 
     do {
-      assumed           = old_value;
-      const T new_value = op(old_value, update_value);
+      T_int const new_value = type_reinterpret<T_int, T>(op(old_value, update_value));
 
-      T_int ret = atomicCAS(reinterpret_cast<T_int*>(addr),
-                            type_reinterpret<T_int, T>(assumed),
-                            type_reinterpret<T_int, T>(new_value));
+      assumed   = type_reinterpret<T_int, T>(old_value);
+      ret       = atomicCAS(reinterpret_cast<T_int*>(addr), assumed, new_value);
       old_value = type_reinterpret<T, T_int>(ret);
 
-    } while (assumed != old_value);
+    } while (assumed != ret);
 
     return old_value;
   }
diff --git a/cpp/include/cudf/io/orc.hpp b/cpp/include/cudf/io/orc.hpp
index 30acf80548b..7f3cb95e4b2 100644
--- a/cpp/include/cudf/io/orc.hpp
+++ b/cpp/include/cudf/io/orc.hpp
@@ -24,6 +24,7 @@
 #include <rmm/mr/device/per_device_resource.hpp>
 
 #include <memory>
+#include <optional>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -51,8 +52,8 @@ class orc_reader_options_builder;
 class orc_reader_options {
   source_info _source;
 
-  // Names of column to read; empty is all
-  std::vector<std::string> _columns;
+  // Names of column to read; `nullopt` is all
+  std::optional<std::vector<std::string>> _columns;
 
   // List of individual stripes to read (ignored if empty)
   std::vector<std::vector<size_type>> _stripes;
@@ -105,18 +106,18 @@ class orc_reader_options {
   [[nodiscard]] source_info const& get_source() const { return _source; }
 
   /**
-   * @brief Returns names of the columns to read.
+   * @brief Returns names of the columns to read, if set.
    *
-   * @return Names of the columns to read
+   * @return Names of the columns to read; `nullopt` if the option is not set
    */
-  [[nodiscard]] std::vector<std::string> const& get_columns() const { return _columns; }
+  [[nodiscard]] auto const& get_columns() const { return _columns; }
 
   /**
    * @brief Returns vector of vectors, stripes to read for each input source
    *
    * @return Vector of vectors, stripes to read for each input source
    */
-  std::vector<std::vector<size_type>> const& get_stripes() const { return _stripes; }
+  [[nodiscard]] auto const& get_stripes() const { return _stripes; }
 
   /**
    * @brief Returns number of rows to skip from the start.
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index 10368f84824..19156e01c1e 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -51,7 +51,7 @@ class parquet_reader_options_builder;
 class parquet_reader_options {
   source_info _source;
 
-  // Path in schema of column to read; empty is all
+  // Path in schema of column to read; `nullopt` is all
   std::optional<std::vector<std::string>> _columns;
 
   // List of individual row groups to read (ignored if empty)
@@ -152,17 +152,14 @@ class parquet_reader_options {
    *
    * @return Names of column to be read; `nullopt` if the option is not set
    */
-  [[nodiscard]] std::optional<std::vector<std::string>> const& get_columns() const
-  {
-    return _columns;
-  }
+  [[nodiscard]] auto const& get_columns() const { return _columns; }
 
   /**
    * @brief Returns list of individual row groups to be read.
    *
    * @return List of individual row groups to be read
    */
-  std::vector<std::vector<size_type>> const& get_row_groups() const { return _row_groups; }
+  [[nodiscard]] auto const& get_row_groups() const { return _row_groups; }
 
   /**
    * @brief Returns timestamp type used to cast timestamp columns.
diff --git a/cpp/include/cudf/io/types.hpp b/cpp/include/cudf/io/types.hpp
index 6504e790677..c31176ab51c 100644
--- a/cpp/include/cudf/io/types.hpp
+++ b/cpp/include/cudf/io/types.hpp
@@ -23,11 +23,11 @@
 
 #include <cudf/types.hpp>
 
-#include <thrust/optional.h>
-
 #include <map>
 #include <memory>
+#include <optional>
 #include <string>
+#include <unordered_map>
 #include <vector>
 
 // Forward declarations
@@ -383,12 +383,12 @@ class table_input_metadata;
 class column_in_metadata {
   friend table_input_metadata;
   std::string _name = "";
-  thrust::optional<bool> _nullable;
+  std::optional<bool> _nullable;
   bool _list_column_is_map  = false;
   bool _use_int96_timestamp = false;
   bool _output_as_binary    = false;
-  thrust::optional<uint8_t> _decimal_precision;
-  thrust::optional<int32_t> _parquet_field_id;
+  std::optional<uint8_t> _decimal_precision;
+  std::optional<int32_t> _parquet_field_id;
   std::vector<column_in_metadata> children;
 
  public:
diff --git a/cpp/include/cudf/utilities/traits.hpp b/cpp/include/cudf/utilities/traits.hpp
index af66eb32618..573d0c81380 100644
--- a/cpp/include/cudf/utilities/traits.hpp
+++ b/cpp/include/cudf/utilities/traits.hpp
@@ -373,6 +373,19 @@ constexpr inline bool is_floating_point(data_type type)
   return cudf::type_dispatcher(type, is_floating_point_impl{});
 }
 
+/**
+ * @brief Indicates whether `T` is a std::byte type.
+ *
+ * @tparam T The type to verify
+ * @return true `type` is std::byte
+ * @return false `type` is not std::byte
+ */
+template <typename T>
+constexpr inline bool is_byte()
+{
+  return std::is_same_v<std::remove_cv_t<T>, std::byte>;
+}
+
 /**
  * @brief Indicates whether `T` is a Boolean type.
  *
@@ -561,7 +574,8 @@ constexpr inline bool is_chrono(data_type type)
 template <typename T>
 constexpr bool is_rep_layout_compatible()
 {
-  return cudf::is_numeric<T>() or cudf::is_chrono<T>() or cudf::is_boolean<T>();
+  return cudf::is_numeric<T>() or cudf::is_chrono<T>() or cudf::is_boolean<T>() or
+         cudf::is_byte<T>();
 }
 
 /**
diff --git a/cpp/scripts/run-cmake-format.sh b/cpp/scripts/run-cmake-format.sh
index 9c981c6cdaa..b9157c76492 100755
--- a/cpp/scripts/run-cmake-format.sh
+++ b/cpp/scripts/run-cmake-format.sh
@@ -1,5 +1,7 @@
 #!/bin/bash
 
+# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+
 # This script is a wrapper for cmakelang that may be used with pre-commit. The
 # wrapping is necessary because RAPIDS libraries split configuration for
 # cmakelang linters between a local config file and a second config file that's
@@ -69,5 +71,14 @@ fi
 if [[ $1 == "cmake-format" ]]; then
   cmake-format -i --config-files cpp/cmake/config.json ${RAPIDS_CMAKE_FORMAT_FILE} -- ${@:2}
 elif [[ $1 == "cmake-lint" ]]; then
-  cmake-lint --config-files cpp/cmake/config.json ${RAPIDS_CMAKE_FORMAT_FILE} -- ${@:2}
+  # Since the pre-commit hook is verbose, we have to be careful to only
+  # present cmake-lint's output (which is quite verbose) if we actually
+  # observe a failure.
+  OUTPUT=$(cmake-lint --config-files cpp/cmake/config.json ${RAPIDS_CMAKE_FORMAT_FILE} -- ${@:2})
+  status=$?
+
+  if ! [ ${status} -eq 0 ]; then
+    echo "${OUTPUT}"
+  fi
+  exit ${status}
 fi
diff --git a/cpp/src/io/orc/aggregate_orc_metadata.cpp b/cpp/src/io/orc/aggregate_orc_metadata.cpp
index 82765c60c1e..df3dfca5fa9 100644
--- a/cpp/src/io/orc/aggregate_orc_metadata.cpp
+++ b/cpp/src/io/orc/aggregate_orc_metadata.cpp
@@ -18,6 +18,7 @@
 
 #include <algorithm>
 #include <numeric>
+#include <optional>
 
 namespace cudf::io::orc::detail {
 
@@ -249,17 +250,17 @@ std::vector<metadata::stripe_source_mapping> aggregate_orc_metadata::select_stri
 }
 
 column_hierarchy aggregate_orc_metadata::select_columns(
-  std::vector<std::string> const& column_paths)
+  std::optional<std::vector<std::string>> const& column_paths)
 {
   auto const& pfm = per_file_metadata[0];
 
   column_hierarchy::nesting_map selected_columns;
-  if (column_paths.empty()) {
+  if (not column_paths.has_value()) {
     for (auto const& col_id : pfm.ff.types[0].subtypes) {
       add_column_to_mapping(selected_columns, pfm, col_id);
     }
   } else {
-    for (const auto& path : column_paths) {
+    for (const auto& path : column_paths.value()) {
       bool name_found = false;
       for (auto col_id = 1; col_id < pfm.get_num_columns(); ++col_id) {
         if (pfm.column_path(col_id) == path) {
diff --git a/cpp/src/io/orc/aggregate_orc_metadata.hpp b/cpp/src/io/orc/aggregate_orc_metadata.hpp
index 9d2380c0097..3ce1a922f31 100644
--- a/cpp/src/io/orc/aggregate_orc_metadata.hpp
+++ b/cpp/src/io/orc/aggregate_orc_metadata.hpp
@@ -17,6 +17,7 @@
 #include "orc.hpp"
 
 #include <map>
+#include <optional>
 #include <vector>
 
 namespace cudf::io::orc::detail {
@@ -126,10 +127,11 @@ class aggregate_orc_metadata {
    * Paths are in format "grandparent_col.parent_col.child_col", where the root ORC column is
    * omitted to match the cuDF table hierarchy.
    *
-   * @param column_paths List of full column names (i.e. paths) to select from the ORC file
+   * @param column_paths List of full column names (i.e. paths) to select from the ORC file;
+   * `nullopt` if user did not select columns to read
    * @return Columns hierarchy - lists of children columns and sorted columns in each nesting level
    */
-  column_hierarchy select_columns(std::vector<std::string> const& column_paths);
+  column_hierarchy select_columns(std::optional<std::vector<std::string>> const& column_paths);
 };
 
 }  // namespace cudf::io::orc::detail
diff --git a/cpp/src/io/parquet/compact_protocol_reader.hpp b/cpp/src/io/parquet/compact_protocol_reader.hpp
index ff278f63366..74565b2f244 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.hpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.hpp
@@ -18,10 +18,9 @@
 
 #include "parquet.hpp"
 
-#include <thrust/optional.h>
-
 #include <algorithm>
 #include <cstddef>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -264,10 +263,10 @@ class ParquetFieldInt32 {
  */
 class ParquetFieldOptionalInt32 {
   int field_val;
-  thrust::optional<int32_t>& val;
+  std::optional<int32_t>& val;
 
  public:
-  ParquetFieldOptionalInt32(int f, thrust::optional<int32_t>& v) : field_val(f), val(v) {}
+  ParquetFieldOptionalInt32(int f, std::optional<int32_t>& v) : field_val(f), val(v) {}
 
   inline bool operator()(CompactProtocolReader* cpr, int field_type)
   {
diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp
index b03ba23737e..a03fdf27953 100644
--- a/cpp/src/io/parquet/parquet.hpp
+++ b/cpp/src/io/parquet/parquet.hpp
@@ -18,9 +18,8 @@
 
 #include "parquet_common.hpp"
 
-#include <thrust/optional.h>
-
 #include <cstdint>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -147,7 +146,7 @@ struct SchemaElement {
   int32_t num_children                = 0;
   int32_t decimal_scale               = 0;
   int32_t decimal_precision           = 0;
-  thrust::optional<int32_t> field_id  = thrust::nullopt;
+  std::optional<int32_t> field_id     = std::nullopt;
   bool output_as_byte_array           = false;
 
   // The following fields are filled in later during schema initialization
diff --git a/cpp/src/io/statistics/byte_array_view.cuh b/cpp/src/io/statistics/byte_array_view.cuh
index 315e753a732..c1958780321 100644
--- a/cpp/src/io/statistics/byte_array_view.cuh
+++ b/cpp/src/io/statistics/byte_array_view.cuh
@@ -28,7 +28,7 @@ namespace cudf::io::statistics {
  */
 class byte_array_view {
  public:
-  using element_type = uint8_t const;  ///< The type of the elements in the byte array
+  using element_type = std::byte const;  ///< The type of the elements in the byte array
 
   constexpr byte_array_view() noexcept {}
   /**
diff --git a/cpp/src/io/statistics/statistics.cuh b/cpp/src/io/statistics/statistics.cuh
index ab6674e4328..f2611f7cc26 100644
--- a/cpp/src/io/statistics/statistics.cuh
+++ b/cpp/src/io/statistics/statistics.cuh
@@ -85,7 +85,8 @@ struct t_array_stats {
   __host__ __device__ __forceinline__ operator ReturnType() { return ReturnType(ptr, length); }
 };
 using string_stats     = t_array_stats<string_view, char>;
-using byte_array_stats = t_array_stats<statistics::byte_array_view, uint8_t>;
+using byte_array_view  = statistics::byte_array_view;
+using byte_array_stats = t_array_stats<byte_array_view, byte_array_view::element_type>;
 
 union statistics_val {
   string_stats str_val;       //!< string columns
@@ -129,10 +130,10 @@ template <typename T, std::enable_if_t<std::is_same_v<T, statistics::byte_array_
 __device__ T get_element(column_device_view const& col, uint32_t row)
 {
   using et              = typename T::element_type;
-  size_type index       = row + col.offset();  // account for this view's _offset
+  size_type const index = row + col.offset();  // account for this view's _offset
   auto const* d_offsets = col.child(lists_column_view::offsets_column_index).data<offset_type>();
   auto const* d_data    = col.child(lists_column_view::child_column_index).data<et>();
-  offset_type offset    = d_offsets[index];
+  auto const offset     = d_offsets[index];
   return T(d_data + offset, d_offsets[index + 1] - offset);
 }
 
diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp
index 50d641c9a74..bc6bdd9dc7b 100644
--- a/cpp/src/strings/regex/regcomp.cpp
+++ b/cpp/src/strings/regex/regcomp.cpp
@@ -60,7 +60,7 @@ static reclass cclass_S(NCCLASS_S);  // \S
 static reclass cclass_D(NCCLASS_D);  // \D
 
 // Tables for analyzing quantifiers
-const std::array<int, 6> valid_preceding_inst_types{{CHAR, CCLASS, NCCLASS, ANY, ANYNL, RBRA}};
+const std::array<int, 5> valid_preceding_inst_types{{CHAR, CCLASS, NCCLASS, ANY, ANYNL}};
 const std::array<char, 5> quantifiers{{'*', '?', '+', '{', '|'}};
 // Valid regex characters that can be escaped and used as literals
 const std::array<char, 33> escapable_chars{
@@ -459,16 +459,42 @@ class regex_parser {
     }
 
     // The quantifiers require at least one "real" previous item.
-    // We are throwing an error in these two if-checks for invalid quantifiers.
+    // We are throwing errors for invalid quantifiers.
     // Another option is to just return CHAR silently here which effectively
     // treats the chr character as a literal instead as a quantifier.
     // This could lead to confusion where sometimes unescaped quantifier characters
     // are treated as regex expressions and sometimes they are not.
     if (_items.empty()) { CUDF_FAIL("invalid regex pattern: nothing to repeat at position 0"); }
 
+    // Check that the previous item can be used with quantifiers.
+    // If the previous item is a capture group, we need to check items inside the
+    // capture group can be used with quantifiers too.
+    // (Note that capture groups can be nested).
+    auto previous_type = _items.back().type;
+    if (previous_type == RBRA) {  // previous item is a capture group
+      // look for matching LBRA
+      auto nested_count = 1;
+      auto lbra_itr =
+        std::find_if(_items.rbegin(), _items.rend(), [nested_count](auto const& item) mutable {
+          auto const is_closing = (item.type == RBRA);
+          auto const is_opening = (item.type == LBRA || item.type == LBRA_NC);
+          nested_count += is_closing - is_opening;
+          return is_opening && (nested_count == 0);
+        });
+      // search for the first valid item within the LBRA-RBRA range
+      auto first_valid = std::find_first_of(
+        _items.rbegin() + 1,
+        lbra_itr,
+        valid_preceding_inst_types.begin(),
+        valid_preceding_inst_types.end(),
+        [](auto const item, auto const valid_type) { return item.type == valid_type; });
+      // set previous_type to be checked in next if-statement
+      previous_type = (first_valid == lbra_itr) ? (--lbra_itr)->type : first_valid->type;
+    }
+
     if (std::find(valid_preceding_inst_types.begin(),
                   valid_preceding_inst_types.end(),
-                  _items.back().type) == valid_preceding_inst_types.end()) {
+                  previous_type) == valid_preceding_inst_types.end()) {
       CUDF_FAIL("invalid regex pattern: nothing to repeat at position " +
                 std::to_string(_expr_ptr - _pattern_begin - 1));
     }
diff --git a/cpp/src/structs/utilities.cpp b/cpp/src/structs/utilities.cpp
index 1d5ebfaa7fc..bf4216b6983 100644
--- a/cpp/src/structs/utilities.cpp
+++ b/cpp/src/structs/utilities.cpp
@@ -209,98 +209,6 @@ flattened_table flatten_nested_columns(table_view const& input,
   return table_flattener{input, column_order, null_precedence, nullability}();
 }
 
-namespace {
-using vector_of_columns = std::vector<std::unique_ptr<cudf::column>>;
-using column_index_t    = typename vector_of_columns::size_type;
-
-// Forward declaration, to enable recursion via `unflattener`.
-std::unique_ptr<cudf::column> unflatten_struct(vector_of_columns& flattened,
-                                               column_index_t& current_index,
-                                               cudf::column_view const& blueprint);
-
-/**
- * @brief Helper functor to reconstruct STRUCT columns from its flattened member columns.
- *
- */
-class unflattener {
- public:
-  unflattener(vector_of_columns& flattened_, column_index_t& current_index_)
-    : flattened{flattened_}, current_index{current_index_}
-  {
-  }
-
-  auto operator()(column_view const& blueprint)
-  {
-    return is_struct(blueprint) ? unflatten_struct(flattened, current_index, blueprint)
-                                : std::move(flattened[current_index++]);
-  }
-
- private:
-  vector_of_columns& flattened;
-  column_index_t& current_index;
-
-};  // class unflattener;
-
-std::unique_ptr<cudf::column> unflatten_struct(vector_of_columns& flattened,
-                                               column_index_t& current_index,
-                                               cudf::column_view const& blueprint)
-{
-  // "Consume" columns from `flattened`, starting at `current_index`,
-  // based on the provided `blueprint` struct col. Recurse for struct children.
-  CUDF_EXPECTS(blueprint.type().id() == type_id::STRUCT,
-               "Expected blueprint column to be a STRUCT column.");
-
-  CUDF_EXPECTS(current_index < flattened.size(), "STRUCT column can't have 0 children.");
-
-  auto const num_rows = flattened[current_index]->size();
-
-  // cudf::flatten_nested_columns() executes depth first, and serializes the struct null vector
-  // before the child/member columns.
-  // E.g. STRUCT_1< STRUCT_2< A, B >, C > is flattened to:
-  //      1. Null Vector for STRUCT_1
-  //      2. Null Vector for STRUCT_2
-  //      3. Member STRUCT_2::A
-  //      4. Member STRUCT_2::B
-  //      5. Member STRUCT_1::C
-  //
-  // Extract null-vector *before* child columns are constructed.
-  auto struct_null_column_contents = flattened[current_index++]->release();
-  auto unflattening_iter =
-    thrust::make_transform_iterator(blueprint.child_begin(), unflattener{flattened, current_index});
-
-  return cudf::make_structs_column(
-    num_rows,
-    vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_children()},
-    UNKNOWN_NULL_COUNT,  // Do count?
-    std::move(*struct_null_column_contents.null_mask));
-}
-}  // namespace
-
-std::unique_ptr<cudf::table> unflatten_nested_columns(std::unique_ptr<cudf::table>&& flattened,
-                                                      table_view const& blueprint)
-{
-  // Bail, if LISTs are present.
-  auto const has_lists = std::any_of(blueprint.begin(), blueprint.end(), is_or_has_nested_lists);
-  CUDF_EXPECTS(not has_lists, "Unflattening LIST columns is not supported.");
-
-  // If there are no STRUCTs, unflattening is a NOOP.
-  auto const has_structs = std::any_of(blueprint.begin(), blueprint.end(), is_struct);
-  if (not has_structs) {
-    return std::move(flattened);  // Unchanged.
-  }
-
-  // There be struct columns.
-  // Note: Requires null vectors for all struct input columns.
-  auto flattened_columns = flattened->release();
-  auto current_idx       = column_index_t{0};
-
-  auto unflattening_iter =
-    thrust::make_transform_iterator(blueprint.begin(), unflattener{flattened_columns, current_idx});
-
-  return std::make_unique<cudf::table>(
-    vector_of_columns{unflattening_iter, unflattening_iter + blueprint.num_columns()});
-}
-
 // Helper function to superimpose validity of parent struct
 // over the specified member (child) column.
 void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
diff --git a/cpp/tests/column/column_test.cu b/cpp/tests/column/column_test.cu
index 6fcabbcf823..801cee285b6 100644
--- a/cpp/tests/column/column_test.cu
+++ b/cpp/tests/column/column_test.cu
@@ -345,6 +345,42 @@ TYPED_TEST(TypedColumnTest, MoveConstructorWithMask)
   EXPECT_EQ(original_mask, moved_to_view.null_mask());
 }
 
+TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorNoMask)
+{
+  rmm::device_uvector<TypeParam> original{static_cast<std::size_t>(this->num_elements()),
+                                          cudf::default_stream_value};
+  thrust::copy(thrust::device,
+               static_cast<TypeParam*>(this->data.data()),
+               static_cast<TypeParam*>(this->data.data()) + this->num_elements(),
+               original.begin());
+  auto original_data = original.data();
+  cudf::column moved_to{std::move(original)};
+  verify_column_views(moved_to);
+
+  // Verify move
+  cudf::column_view moved_to_view = moved_to;
+  EXPECT_EQ(original_data, moved_to_view.head());
+}
+
+TYPED_TEST(TypedColumnTest, DeviceUvectorConstructorWithMask)
+{
+  rmm::device_uvector<TypeParam> original{static_cast<std::size_t>(this->num_elements()),
+                                          cudf::default_stream_value};
+  thrust::copy(thrust::device,
+               static_cast<TypeParam*>(this->data.data()),
+               static_cast<TypeParam*>(this->data.data()) + this->num_elements(),
+               original.begin());
+  auto original_data = original.data();
+  auto original_mask = this->all_valid_mask.data();
+  cudf::column moved_to{std::move(original), std::move(this->all_valid_mask)};
+  verify_column_views(moved_to);
+
+  // Verify move
+  cudf::column_view moved_to_view = moved_to;
+  EXPECT_EQ(original_data, moved_to_view.head());
+  EXPECT_EQ(original_mask, moved_to_view.null_mask());
+}
+
 TYPED_TEST(TypedColumnTest, ConstructWithChildren)
 {
   std::vector<std::unique_ptr<cudf::column>> children;
diff --git a/cpp/tests/groupby/max_tests.cpp b/cpp/tests/groupby/max_tests.cpp
index 0b7e0d13c24..1d2c8c489f3 100644
--- a/cpp/tests/groupby/max_tests.cpp
+++ b/cpp/tests/groupby/max_tests.cpp
@@ -459,5 +459,30 @@ TYPED_TEST(groupby_max_floating_point_test, values_with_infinity)
     keys, vals, expected_keys, expected_vals, std::move(agg), force_use_sort_impl::YES);
 }
 
+TYPED_TEST(groupby_max_floating_point_test, values_with_nan)
+{
+  using T          = TypeParam;
+  using int32s_col = fixed_width_column_wrapper<int32_t>;
+  using floats_col = fixed_width_column_wrapper<T, int32_t>;
+
+  auto constexpr nan = std::numeric_limits<T>::quiet_NaN();
+
+  auto const keys = int32s_col{1, 1};
+  auto const vals = floats_col{nan, nan};
+
+  std::vector<groupby::aggregation_request> requests;
+  requests.emplace_back(groupby::aggregation_request());
+  requests[0].values = vals;
+  requests[0].aggregations.emplace_back(cudf::make_max_aggregation<cudf::groupby_aggregation>());
+
+  // Without properly handling NaN, this will hang forever in hash-based aggregate (which is the
+  // default back-end for min/max in groupby context).
+  // This test is just to verify that the aggregate operation does not hang.
+  auto gb_obj       = groupby::groupby(table_view({keys}));
+  auto const result = gb_obj.aggregate(requests);
+
+  EXPECT_EQ(result.first->num_rows(), 1);
+}
+
 }  // namespace test
 }  // namespace cudf
diff --git a/cpp/tests/groupby/min_tests.cpp b/cpp/tests/groupby/min_tests.cpp
index a12ec7c8739..9606c8c55ee 100644
--- a/cpp/tests/groupby/min_tests.cpp
+++ b/cpp/tests/groupby/min_tests.cpp
@@ -458,5 +458,30 @@ TYPED_TEST(groupby_min_floating_point_test, values_with_infinity)
     keys, vals, expected_keys, expected_vals, std::move(agg), force_use_sort_impl::YES);
 }
 
+TYPED_TEST(groupby_min_floating_point_test, values_with_nan)
+{
+  using T          = TypeParam;
+  using int32s_col = fixed_width_column_wrapper<int32_t>;
+  using floats_col = fixed_width_column_wrapper<T, int32_t>;
+
+  auto constexpr nan = std::numeric_limits<T>::quiet_NaN();
+
+  auto const keys = int32s_col{1, 1};
+  auto const vals = floats_col{nan, nan};
+
+  std::vector<groupby::aggregation_request> requests;
+  requests.emplace_back(groupby::aggregation_request());
+  requests[0].values = vals;
+  requests[0].aggregations.emplace_back(cudf::make_min_aggregation<cudf::groupby_aggregation>());
+
+  // Without properly handling NaN, this will hang forever in hash-based aggregate (which is the
+  // default back-end for min/max in groupby context).
+  // This test is just to verify that the aggregate operation does not hang.
+  auto gb_obj       = groupby::groupby(table_view({keys}));
+  auto const result = gb_obj.aggregate(requests);
+
+  EXPECT_EQ(result.first->num_rows(), 1);
+}
+
 }  // namespace test
 }  // namespace cudf
diff --git a/cpp/tests/io/json_test.cpp b/cpp/tests/io/json_test.cpp
index b6a6270ca8b..c8aefece94f 100644
--- a/cpp/tests/io/json_test.cpp
+++ b/cpp/tests/io/json_test.cpp
@@ -915,4 +915,13 @@ TEST_F(JsonReaderTest, BadDtypeParams)
   EXPECT_THROW(cudf_io::read_json(options_map), cudf::logic_error);
 }
 
+TEST_F(JsonReaderTest, ExperimentalParam)
+{
+  cudf_io::json_reader_options const options =
+    cudf_io::json_reader_options::builder(cudf_io::source_info{nullptr, 0}).experimental(true);
+
+  // should throw for now
+  EXPECT_THROW(cudf_io::read_json(options), cudf::logic_error);
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index b3df2c8a8dd..76ffc92e243 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -1514,4 +1514,23 @@ TEST_F(OrcWriterTest, DecimalOptionsNested)
                                       result.tbl->view().column(0).child(1).child(0).child(1));
 }
 
+TEST_F(OrcReaderTest, EmptyColumnsParam)
+{
+  srand(31337);
+  auto const expected = create_random_fixed_table<int>(2, 4, false);
+
+  std::vector<char> out_buffer;
+  cudf_io::orc_writer_options args =
+    cudf_io::orc_writer_options::builder(cudf_io::sink_info{&out_buffer}, *expected);
+  cudf_io::write_orc(args);
+
+  cudf_io::orc_reader_options read_opts =
+    cudf_io::orc_reader_options::builder(cudf_io::source_info{out_buffer.data(), out_buffer.size()})
+      .columns({});
+  auto const result = cudf_io::read_orc(read_opts);
+
+  EXPECT_EQ(result.tbl->num_columns(), 0);
+  EXPECT_EQ(result.tbl->num_rows(), 0);
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp
index 70f28aa139d..d725f3d5dd0 100644
--- a/cpp/tests/strings/contains_tests.cpp
+++ b/cpp/tests/strings/contains_tests.cpp
@@ -424,6 +424,25 @@ TEST_F(StringsContainsTests, FixedQuantifier)
   }
 }
 
+TEST_F(StringsContainsTests, QuantifierErrors)
+{
+  auto input = cudf::test::strings_column_wrapper({"a", "aa", "aaa", "aaaa", "aaaaa", "aaaaaa"});
+  auto sv    = cudf::strings_column_view(input);
+
+  EXPECT_THROW(cudf::strings::contains_re(sv, "^+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::count_re(sv, "$+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::count_re(sv, "(^)+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::contains_re(sv, "($)+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::count_re(sv, "\\A+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::count_re(sv, "\\Z+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::contains_re(sv, "(\\A)+"), cudf::logic_error);
+  EXPECT_THROW(cudf::strings::contains_re(sv, "(\\Z)+"), cudf::logic_error);
+
+  EXPECT_THROW(cudf::strings::contains_re(sv, "(^($))+"), cudf::logic_error);
+  EXPECT_NO_THROW(cudf::strings::contains_re(sv, "(^a($))+"));
+  EXPECT_NO_THROW(cudf::strings::count_re(sv, "(^(a$))+"));
+}
+
 TEST_F(StringsContainsTests, OverlappedClasses)
 {
   auto input = cudf::test::strings_column_wrapper({"abcdefg", "defghí", "", "éééééé", "ghijkl"});
diff --git a/cpp/tests/structs/utilities_tests.cpp b/cpp/tests/structs/utilities_tests.cpp
index b26ea87c5b8..d58568cd1b5 100644
--- a/cpp/tests/structs/utilities_tests.cpp
+++ b/cpp/tests/structs/utilities_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,26 +30,13 @@
 
 namespace cudf::test {
 
-/**
- * @brief Round-trip input table through flatten/unflatten,
- *        verify that the table remains equivalent.
- */
-void flatten_unflatten_compare(table_view const& input_table)
-{
-  using namespace cudf::structs::detail;
-
-  auto flattened = flatten_nested_columns(input_table, {}, {}, column_nullability::FORCE);
-  auto unflattened =
-    unflatten_nested_columns(std::make_unique<cudf::table>(flattened), input_table);
-
-  CUDF_TEST_EXPECT_TABLES_EQUIVALENT(input_table, unflattened->view());
-}
-
 using namespace cudf;
 using namespace iterators;
+using namespace cudf::structs::detail;
 using strings    = strings_column_wrapper;
 using dictionary = dictionary_column_wrapper<std::string>;
 using structs    = structs_column_wrapper;
+using bools      = fixed_width_column_wrapper<bool>;
 
 template <typename T>
 using nums = fixed_width_column_wrapper<T, int32_t>;
@@ -66,7 +53,7 @@ struct TypedStructUtilitiesTest : StructUtilitiesTest {
 
 TYPED_TEST_SUITE(TypedStructUtilitiesTest, FixedWidthTypes);
 
-TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported)
+TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevel)
 {
   using T     = TypeParam;
   using lists = lists_column_wrapper<T, int32_t>;
@@ -75,8 +62,10 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAtTopLevelUnsupported)
   auto lists_col = lists{{0, 1}, {22, 33}, {44, 55, 66}};
   auto nums_col  = nums{{0, 1, 2}, null_at(6)};
 
-  EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{lists_col, nums_col}}),
-               cudf::logic_error);
+  auto table = cudf::table_view{{lists_col, nums_col}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(table,
+                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
@@ -88,10 +77,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NestedListsUnsupported)
   auto lists_member = lists{{0, 1}, {22, 33}, {44, 55, 66}};
   auto nums_member  = nums{{0, 1, 2}, null_at(6)};
   auto structs_col  = structs{{nums_member, lists_member}};
+  auto nums_col     = nums{{0, 1, 2}, null_at(6)};
 
-  auto nums_col = nums{{0, 1, 2}, null_at(6)};
-
-  EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}}),
+  EXPECT_THROW(flatten_nested_columns(
+                 cudf::table_view{{nums_col, structs_col}}, {}, {}, column_nullability::FORCE),
                cudf::logic_error);
 }
 
@@ -104,7 +93,10 @@ TYPED_TEST(TypedStructUtilitiesTest, NoStructs)
   auto strings_col     = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
   auto nuther_nums_col = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
 
-  flatten_unflatten_compare(cudf::table_view{{nums_col, strings_col, nuther_nums_col}});
+  auto table = cudf::table_view{{nums_col, strings_col, nuther_nums_col}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(table,
+                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
@@ -116,8 +108,19 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStruct)
   auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
   auto structs_col    = structs{{nums_member, strings_member}};
   auto nums_col       = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
-
-  flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}});
+  auto table          = cudf::table_view{{nums_col, structs_col}};
+
+  auto expected_nums_col_1  = cudf::column(nums_col);
+  auto expected_structs_col = bools{{1, 1, 1, 1, 1, 1, 1}};
+  auto expected_nums_col_2 =
+    cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(0));
+  auto expected_strings_col =
+    cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(1));
+  auto expected = cudf::table_view{
+    {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
+                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
@@ -129,8 +132,19 @@ TYPED_TEST(TypedStructUtilitiesTest, SingleLevelStructWithNulls)
   auto strings_member = strings{{"", "1", "22", "333", "4444", "55555", "666666"}, null_at(1)};
   auto structs_col    = structs{{nums_member, strings_member}, null_at(2)};
   auto nums_col       = nums{{0, 1, 2, 3, 4, 5, 6}, null_at(6)};
-
-  flatten_unflatten_compare(cudf::table_view{{nums_col, structs_col}});
+  auto table          = cudf::table_view{{nums_col, structs_col}};
+
+  auto expected_nums_col_1  = cudf::column(nums_col);
+  auto expected_structs_col = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)};
+  auto expected_nums_col_2 =
+    cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(0));
+  auto expected_strings_col =
+    cudf::column(static_cast<cudf::structs_column_view>(structs_col).get_sliced_child(1));
+  auto expected = cudf::table_view{
+    {expected_nums_col_1, expected_structs_col, expected_nums_col_2, expected_strings_col}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
+                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
@@ -147,8 +161,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStruct)
 
   auto struct_1_nums_member  = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
   auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}};
-
-  flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
+  auto table                 = cudf::table_view{{nums_col, struct_of_structs_col}};
+
+  auto expected_nums_col_1    = cudf::column(nums_col);
+  auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}};
+  auto expected_nums_col_2 =
+    cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
+  auto expected_structs_col_2 = bools{{1, 1, 1, 1, 1, 1, 1}};
+  auto expected_nums_col_3    = cudf::column(
+    static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
+  auto expected_strings_col = cudf::column(
+    static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
+  auto expected = cudf::table_view{{expected_nums_col_1,
+                                    expected_structs_col_1,
+                                    expected_nums_col_2,
+                                    expected_structs_col_2,
+                                    expected_nums_col_3,
+                                    expected_strings_col}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
+                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
@@ -166,8 +198,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtLeafLevel)
 
   auto struct_1_nums_member  = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
   auto struct_of_structs_col = structs{{struct_1_nums_member, structs_1_structs_member}};
-
-  flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
+  auto table                 = cudf::table_view{{nums_col, struct_of_structs_col}};
+
+  auto expected_nums_col_1    = cudf::column(nums_col);
+  auto expected_structs_col_1 = bools{{1, 1, 1, 1, 1, 1, 1}};
+  auto expected_nums_col_2 =
+    cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
+  auto expected_structs_col_2 = bools{{1, 1, 0, 1, 1, 1, 1}, null_at(2)};
+  auto expected_nums_col_3    = cudf::column(
+    static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
+  auto expected_strings_col = cudf::column(
+    static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
+  auto expected = cudf::table_view{{expected_nums_col_1,
+                                    expected_structs_col_1,
+                                    expected_nums_col_2,
+                                    expected_structs_col_2,
+                                    expected_nums_col_3,
+                                    expected_strings_col}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
+                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
@@ -185,8 +235,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtTopLevel)
   auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
   auto struct_of_structs_col =
     structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)};
-
-  flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
+  auto table = cudf::table_view{{nums_col, struct_of_structs_col}};
+
+  auto expected_nums_col_1    = cudf::column(nums_col);
+  auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
+  auto expected_nums_col_2 =
+    cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
+  auto expected_structs_col_2 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
+  auto expected_nums_col_3    = cudf::column(
+    static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
+  auto expected_strings_col = cudf::column(
+    static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
+  auto expected = cudf::table_view{{expected_nums_col_1,
+                                    expected_structs_col_1,
+                                    expected_nums_col_2,
+                                    expected_structs_col_2,
+                                    expected_nums_col_3,
+                                    expected_strings_col}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
+                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
@@ -205,8 +273,26 @@ TYPED_TEST(TypedStructUtilitiesTest, StructOfStructWithNullsAtAllLevels)
   auto struct_1_nums_member = nums{{0, 1, 22, 33, 44, 55, 66}, null_at(3)};
   auto struct_of_structs_col =
     structs{{struct_1_nums_member, structs_1_structs_member}, null_at(4)};
-
-  flatten_unflatten_compare(cudf::table_view{{nums_col, struct_of_structs_col}});
+  auto table = cudf::table_view{{nums_col, struct_of_structs_col}};
+
+  auto expected_nums_col_1    = cudf::column(nums_col);
+  auto expected_structs_col_1 = bools{{1, 1, 1, 1, 0, 1, 1}, null_at(4)};
+  auto expected_nums_col_2 =
+    cudf::column(static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(0));
+  auto expected_structs_col_2 = bools{{1, 1, 0, 1, 0, 1, 1}, {1, 1, 0, 1, 0, 1, 1}};
+  auto expected_nums_col_3    = cudf::column(
+    static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(0));
+  auto expected_strings_col = cudf::column(
+    static_cast<cudf::structs_column_view>(struct_of_structs_col).get_sliced_child(1).child(1));
+  auto expected = cudf::table_view{{expected_nums_col_1,
+                                    expected_structs_col_1,
+                                    expected_nums_col_2,
+                                    expected_structs_col_2,
+                                    expected_nums_col_3,
+                                    expected_strings_col}};
+
+  CUDF_TEST_EXPECT_TABLES_EQUAL(expected,
+                                flatten_nested_columns(table, {}, {}, column_nullability::FORCE));
 }
 
 TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)
@@ -222,7 +308,8 @@ TYPED_TEST(TypedStructUtilitiesTest, ListsAreUnsupported)
 
   auto structs_with_lists_col = structs{lists_member, ints_member};
 
-  EXPECT_THROW(flatten_unflatten_compare(cudf::table_view{{structs_with_lists_col}}),
+  EXPECT_THROW(flatten_nested_columns(
+                 cudf::table_view{{structs_with_lists_col}}, {}, {}, column_nullability::FORCE),
                cudf::logic_error);
 }
 
diff --git a/java/src/main/java/ai/rapids/cudf/ParquetOptions.java b/java/src/main/java/ai/rapids/cudf/ParquetOptions.java
index dd771cab7ea..1ae1b91b962 100644
--- a/java/src/main/java/ai/rapids/cudf/ParquetOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/ParquetOptions.java
@@ -18,6 +18,10 @@
 
 package ai.rapids.cudf;
 
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
 /**
  * Options for reading a parquet file
  */
@@ -26,24 +30,32 @@ public class ParquetOptions extends ColumnFilterOptions {
   public static ParquetOptions DEFAULT = new ParquetOptions(new Builder());
 
   private final DType unit;
-
-
+  private final boolean[] readBinaryAsString;
 
   private ParquetOptions(Builder builder) {
     super(builder);
     unit = builder.unit;
+    readBinaryAsString = new boolean[builder.binaryAsStringColumns.size()];
+    for (int i = 0 ; i < builder.binaryAsStringColumns.size() ; i++) {
+      readBinaryAsString[i] = builder.binaryAsStringColumns.get(i);
+    }
   }
 
   DType timeUnit() {
     return unit;
   }
 
-  public static Builder builder() {
+  boolean[] getReadBinaryAsString() {
+    return readBinaryAsString;
+  }
+
+  public static ParquetOptions.Builder builder() {
     return new Builder();
   }
 
   public static class Builder extends ColumnFilterOptions.Builder<Builder> {
     private DType unit = DType.EMPTY;
+    final List<Boolean> binaryAsStringColumns = new ArrayList<>();
 
     /**
      * Specify the time unit to use when returning timestamps.
@@ -56,6 +68,43 @@ public Builder withTimeUnit(DType unit) {
       return this;
     }
 
+    /**
+     * Include one or more specific columns.  Any column not included will not be read.
+     * @param names the name of the column, or more than one if you want.
+     */
+    @Override
+    public Builder includeColumn(String... names) {
+      super.includeColumn(names);
+      for (int i = 0 ; i < names.length ; i++) {
+        binaryAsStringColumns.add(true);
+      }
+      return this;
+    }
+
+    /**
+     * Include this column.
+     * @param name the name of the column
+     * @param isBinary whether this column is to be read in as binary
+     */
+    public Builder includeColumn(String name, boolean isBinary) {
+      includeColumnNames.add(name);
+      binaryAsStringColumns.add(!isBinary);
+      return this;
+    }
+
+    /**
+     * Include one or more specific columns.  Any column not included will not be read.
+     * @param names the name of the column, or more than one if you want.
+     */
+    @Override
+    public Builder includeColumn(Collection<String> names) {
+      super.includeColumn(names);
+      for (int i = 0 ; i < names.size() ; i++) {
+        binaryAsStringColumns.add(true);
+      }
+      return this;
+    }
+
     public ParquetOptions build() {
       return new ParquetOptions(this);
     }
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index c8f842fcc63..e5194b8b7eb 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -239,16 +239,20 @@ private static native long[] readJSON(String[] columnNames,
                                         String filePath, long address, long length,
                                         boolean dayFirst, boolean lines) throws CudfException;
 
+  private static native long readAndInferJSON(long address, long length,
+      boolean dayFirst, boolean lines) throws CudfException;
+
   /**
    * Read in Parquet formatted data.
    * @param filterColumnNames  name of the columns to read, or an empty array if we want to read
    *                           all of them
+   * @param binaryToString     whether to convert this column to String if binary
    * @param filePath           the path of the file to read, or null if no path should be read.
    * @param address            the address of the buffer to read from or 0 if we should not.
    * @param length             the length of the buffer to read from.
    * @param timeUnit           return type of TimeStamp in units
    */
-  private static native long[] readParquet(String[] filterColumnNames, String filePath,
+  private static native long[] readParquet(String[] filterColumnNames, boolean[] binaryToString, String filePath,
                                            long address, long length, int timeUnit) throws CudfException;
 
   /**
@@ -918,6 +922,26 @@ public static Table readJSON(Schema schema, JSONOptions opts, byte[] buffer, lon
     }
   }
 
+  /**
+   * Read JSON formatted data and infer the column names and schema.
+   * @param opts various JSON parsing options.
+   * @param buffer raw UTF8 formatted bytes.
+   * @param offset the starting offset into buffer.
+   * @param len the number of bytes to parse.
+   * @return the data parsed as a table on the GPU and the metadata for the table returned.
+   */
+  public static TableWithMeta readJSON(JSONOptions opts, HostMemoryBuffer buffer,
+      long offset, long len) {
+    if (len <= 0) {
+      len = buffer.length - offset;
+    }
+    assert len > 0;
+    assert len <= buffer.length - offset;
+    assert offset >= 0 && offset < buffer.length;
+    return new TableWithMeta(readAndInferJSON(buffer.getAddress() + offset, len,
+        opts.isDayFirst(), opts.isLines()));
+  }
+
   /**
    * Read JSON formatted data.
    * @param schema the schema of the data. You may use Schema.INFERRED to infer the schema.
@@ -956,7 +980,7 @@ public static Table readParquet(File path) {
    * @return the file parsed as a table on the GPU.
    */
   public static Table readParquet(ParquetOptions opts, File path) {
-    return new Table(readParquet(opts.getIncludeColumnNames(),
+    return new Table(readParquet(opts.getIncludeColumnNames(), opts.getReadBinaryAsString(),
         path.getAbsolutePath(), 0, 0, opts.timeUnit().typeId.getNativeId()));
   }
 
@@ -1016,7 +1040,7 @@ public static Table readParquet(ParquetOptions opts, HostMemoryBuffer buffer,
     assert len > 0;
     assert len <= buffer.getLength() - offset;
     assert offset >= 0 && offset < buffer.length;
-    return new Table(readParquet(opts.getIncludeColumnNames(),
+    return new Table(readParquet(opts.getIncludeColumnNames(), opts.getReadBinaryAsString(),
         null, buffer.getAddress() + offset, len, opts.timeUnit().typeId.getNativeId()));
   }
 
diff --git a/java/src/main/java/ai/rapids/cudf/TableWithMeta.java b/java/src/main/java/ai/rapids/cudf/TableWithMeta.java
new file mode 100644
index 00000000000..9baa127d39d
--- /dev/null
+++ b/java/src/main/java/ai/rapids/cudf/TableWithMeta.java
@@ -0,0 +1,67 @@
+/*
+ *
+ *  Copyright (c) 2022, NVIDIA CORPORATION.
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ *
+ */
+
+
+package ai.rapids.cudf;
+
+/**
+ * A table along with some metadata about the table. This is typically returned when
+ * reading data from an input file where the metadata can be important.
+ */
+public class TableWithMeta implements AutoCloseable {
+  private long handle;
+
+  TableWithMeta(long handle) {
+    this.handle = handle;
+  }
+
+  /**
+   * Get the table out of this metadata. Note that this can only be called once. Later calls
+   * will return a null.
+   */
+  public Table releaseTable() {
+    long[] ptr = releaseTable(handle);
+    if (ptr == null) {
+      return null;
+    } else {
+      return new Table(ptr);
+    }
+  }
+
+  /**
+   * Get the names of the top level columns. In the future new APIs can be added to get
+   * names of child columns.
+   */
+  public String[] getColumnNames() {
+    return getColumnNames(handle);
+  }
+
+  @Override
+  public void close() throws Exception {
+    if (handle != 0) {
+      close(handle);
+      handle = 0;
+    }
+  }
+
+  private static native void close(long handle);
+
+  private static native long[] releaseTable(long handle);
+
+  private static native String[] getColumnNames(long handle);
+}
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index d511512431b..44c08aec110 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -1314,6 +1314,77 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readCSV(
   CATCH_STD(env, NULL);
 }
 
+JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readAndInferJSON(
+    JNIEnv *env, jclass, jlong buffer, jlong buffer_length, jboolean day_first, jboolean lines) {
+
+  JNI_NULL_CHECK(env, buffer, "buffer cannot be null", 0);
+  if (buffer_length <= 0) {
+    JNI_THROW_NEW(env, "java/lang/IllegalArgumentException", "An empty buffer is not supported", 0);
+  }
+
+  try {
+    cudf::jni::auto_set_device(env);
+
+    auto source = cudf::io::source_info{reinterpret_cast<char *>(buffer),
+                                        static_cast<std::size_t>(buffer_length)};
+
+    cudf::io::json_reader_options_builder opts = cudf::io::json_reader_options::builder(source)
+                                                     .dayfirst(static_cast<bool>(day_first))
+                                                     .lines(static_cast<bool>(lines));
+
+    auto result =
+        std::make_unique<cudf::io::table_with_metadata>(cudf::io::read_json(opts.build()));
+
+    return reinterpret_cast<jlong>(result.release());
+  }
+  CATCH_STD(env, 0);
+}
+
+JNIEXPORT void JNICALL Java_ai_rapids_cudf_TableWithMeta_close(JNIEnv *env, jclass, jlong handle) {
+  JNI_NULL_CHECK(env, handle, "handle is null", );
+
+  try {
+    cudf::jni::auto_set_device(env);
+    delete reinterpret_cast<cudf::io::table_with_metadata *>(handle);
+  }
+  CATCH_STD(env, );
+}
+
+JNIEXPORT jobjectArray JNICALL Java_ai_rapids_cudf_TableWithMeta_getColumnNames(JNIEnv *env, jclass,
+                                                                                jlong handle) {
+  JNI_NULL_CHECK(env, handle, "handle is null", nullptr);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto ptr = reinterpret_cast<cudf::io::table_with_metadata *>(handle);
+    auto length = ptr->metadata.column_names.size();
+    auto ret = static_cast<jobjectArray>(
+        env->NewObjectArray(length, env->FindClass("java/lang/String"), nullptr));
+    for (size_t i = 0; i < length; i++) {
+      env->SetObjectArrayElement(ret, i, env->NewStringUTF(ptr->metadata.column_names[i].c_str()));
+    }
+
+    return ret;
+  }
+  CATCH_STD(env, nullptr);
+}
+
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_TableWithMeta_releaseTable(JNIEnv *env, jclass,
+                                                                            jlong handle) {
+  JNI_NULL_CHECK(env, handle, "handle is null", nullptr);
+
+  try {
+    cudf::jni::auto_set_device(env);
+    auto ptr = reinterpret_cast<cudf::io::table_with_metadata *>(handle);
+    if (ptr->tbl) {
+      return convert_table_for_return(env, ptr->tbl);
+    } else {
+      return nullptr;
+    }
+  }
+  CATCH_STD(env, nullptr);
+}
+
 JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readJSON(
     JNIEnv *env, jclass, jobjectArray col_names, jintArray j_types, jintArray j_scales,
     jstring inputfilepath, jlong buffer, jlong buffer_length, jboolean day_first, jboolean lines) {
@@ -1428,11 +1499,11 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readJSON(
   CATCH_STD(env, NULL);
 }
 
-JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readParquet(JNIEnv *env, jclass,
-                                                                   jobjectArray filter_col_names,
-                                                                   jstring inputfilepath,
-                                                                   jlong buffer,
-                                                                   jlong buffer_length, jint unit) {
+JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readParquet(
+    JNIEnv *env, jclass, jobjectArray filter_col_names, jbooleanArray j_col_binary_read,
+    jstring inputfilepath, jlong buffer, jlong buffer_length, jint unit) {
+
+  JNI_NULL_CHECK(env, j_col_binary_read, "null col_binary_read", 0);
   bool read_buffer = true;
   if (buffer == 0) {
     JNI_NULL_CHECK(env, inputfilepath, "input file or buffer must be supplied", NULL);
@@ -1454,6 +1525,7 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readParquet(JNIEnv *env,
     }
 
     cudf::jni::native_jstringArray n_filter_col_names(env, filter_col_names);
+    cudf::jni::native_jbooleanArray n_col_binary_read(env, j_col_binary_read);
 
     auto source = read_buffer ? cudf::io::source_info(reinterpret_cast<char *>(buffer),
                                                       static_cast<std::size_t>(buffer_length)) :
@@ -1461,7 +1533,8 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readParquet(JNIEnv *env,
 
     auto builder = cudf::io::parquet_reader_options::builder(source);
     if (n_filter_col_names.size() > 0) {
-      builder = builder.columns(n_filter_col_names.as_cpp_vector());
+      builder = builder.columns(n_filter_col_names.as_cpp_vector())
+                    .convert_binary_to_strings(n_col_binary_read.to_vector<bool>());
     }
 
     cudf::io::parquet_reader_options opts =
@@ -1678,10 +1751,13 @@ JNIEXPORT jlongArray JNICALL Java_ai_rapids_cudf_Table_readORC(
                       cudf::io::source_info(reinterpret_cast<char *>(buffer), buffer_length) :
                       cudf::io::source_info(filename.get());
 
+    auto builder = cudf::io::orc_reader_options::builder(source);
+    if (n_filter_col_names.size() > 0) {
+      builder = builder.columns(n_filter_col_names.as_cpp_vector());
+    }
+
     cudf::io::orc_reader_options opts =
-        cudf::io::orc_reader_options::builder(source)
-            .columns(n_filter_col_names.as_cpp_vector())
-            .use_index(false)
+        builder.use_index(false)
             .use_np_dtypes(static_cast<bool>(usingNumPyTypes))
             .timestamp_type(cudf::data_type(static_cast<cudf::type_id>(unit)))
             .decimal128_columns(n_dec128_col_names.as_cpp_vector())
diff --git a/java/src/test/java/ai/rapids/cudf/TableTest.java b/java/src/test/java/ai/rapids/cudf/TableTest.java
index 7ef47d6a7cc..c7e6fecea26 100644
--- a/java/src/test/java/ai/rapids/cudf/TableTest.java
+++ b/java/src/test/java/ai/rapids/cudf/TableTest.java
@@ -78,6 +78,7 @@
 
 public class TableTest extends CudfTestBase {
   private static final File TEST_PARQUET_FILE = TestUtils.getResourceAsFile("acq.parquet");
+  private static final File TEST_PARQUET_FILE_BINARY = TestUtils.getResourceAsFile("binary.parquet");
   private static final File TEST_ORC_FILE = TestUtils.getResourceAsFile("TestOrcFile.orc");
   private static final File TEST_ORC_TIMESTAMP_DATE_FILE = TestUtils.getResourceAsFile("timestamp-date-test.orc");
   private static final File TEST_DECIMAL_PARQUET_FILE = TestUtils.getResourceAsFile("decimal.parquet");
@@ -566,6 +567,19 @@ void testReadParquet() {
     }
   }
 
+  @Test
+  void testReadParquetBinary() {
+    ParquetOptions opts = ParquetOptions.builder()
+        .includeColumn("value1", true)
+        .includeColumn("value2", false)
+        .build();
+    try (Table table = Table.readParquet(opts, TEST_PARQUET_FILE_BINARY)) {
+      assertTableTypes(new DType[]{DType.LIST, DType.STRING}, table);
+      ColumnView columnView = table.getColumn(0);
+      assertEquals(DType.INT8, columnView.getChildColumnView(0).getType());
+    }
+  }
+
   @Test
   void testReadParquetBuffer() throws IOException {
     ParquetOptions opts = ParquetOptions.builder()
diff --git a/java/src/test/resources/binary.parquet b/java/src/test/resources/binary.parquet
new file mode 100644
index 00000000000..b72be9f36cc
Binary files /dev/null and b/java/src/test/resources/binary.parquet differ
diff --git a/python/cudf/cudf/_lib/cpp/io/json.pxd b/python/cudf/cudf/_lib/cpp/io/json.pxd
index 2c65e329bb0..bc9d87a5cbf 100644
--- a/python/cudf/cudf/_lib/cpp/io/json.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/json.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2022, NVIDIA CORPORATION.
 
 from libc.stdint cimport uint8_t
 from libcpp cimport bool
@@ -24,6 +24,7 @@ cdef extern from "cudf/io/json.hpp" \
         size_type get_byte_range_size() except+
         bool is_enabled_lines() except+
         bool is_enabled_dayfirst() except+
+        bool is_enabled_experimental() except+
 
         # setter
         void set_dtypes(vector[data_type] types) except+
@@ -35,6 +36,7 @@ cdef extern from "cudf/io/json.hpp" \
         void set_byte_range_size(size_type size) except+
         void enable_lines(bool val) except+
         void enable_dayfirst(bool val) except+
+        void enable_experimental(bool val) except+
 
         @staticmethod
         json_reader_options_builder builder(
@@ -70,6 +72,9 @@ cdef extern from "cudf/io/json.hpp" \
         json_reader_options_builder& dayfirst(
             bool val
         ) except+
+        json_reader_options_builder& experimental(
+            bool val
+        ) except+
 
         json_reader_options build() except+
 
diff --git a/python/cudf/cudf/_lib/cpp/io/orc.pxd b/python/cudf/cudf/_lib/cpp/io/orc.pxd
index 62ff5eb4f53..3e44ef98348 100644
--- a/python/cudf/cudf/_lib/cpp/io/orc.pxd
+++ b/python/cudf/cudf/_lib/cpp/io/orc.pxd
@@ -19,7 +19,6 @@ cdef extern from "cudf/io/orc.hpp" \
         orc_reader_options() except+
 
         cudf_io_types.source_info get_source() except+
-        vector[string] get_columns() except+
         vector[vector[size_type]] get_stripes() except+
         size_type get_skip_rows() except+
         size_type get_num_rows() except+
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 9c820a56104..0ee6062e7f2 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -31,7 +31,8 @@ cpdef read_json(object filepaths_or_buffers,
                 object dtype,
                 bool lines,
                 object compression,
-                object byte_range):
+                object byte_range,
+                bool experimental):
     """
     Cython function to call into libcudf API, see `read_json`.
 
@@ -98,6 +99,7 @@ cpdef read_json(object filepaths_or_buffers,
         .lines(c_lines)
         .byte_range_offset(c_range_offset)
         .byte_range_size(c_range_size)
+        .experimental(experimental)
         .build()
     )
     if is_list_like_dtypes:
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index 4d1090d8434..11c70317a39 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -103,7 +103,7 @@ cpdef read_orc(object filepaths_or_buffers,
     """
     cdef orc_reader_options c_orc_reader_options = make_orc_reader_options(
         filepaths_or_buffers,
-        columns or [],
+        columns,
         stripes or [],
         get_size_t_arg(skip_rows, "skip_rows"),
         get_size_t_arg(num_rows, "num_rows"),
@@ -325,16 +325,11 @@ cdef orc_reader_options make_orc_reader_options(
     for i, datasource in enumerate(filepaths_or_buffers):
         if isinstance(datasource, NativeFile):
             filepaths_or_buffers[i] = NativeFileDatasource(datasource)
-    cdef vector[string] c_column_names
     cdef vector[vector[size_type]] strps = stripes
-    c_column_names.reserve(len(column_names))
-    for col in column_names:
-        c_column_names.push_back(str(col).encode())
     cdef orc_reader_options opts
     cdef source_info src = make_source_info(filepaths_or_buffers)
     opts = move(
         orc_reader_options.builder(src)
-        .columns(c_column_names)
         .stripes(strps)
         .skip_rows(skip_rows)
         .num_rows(num_rows)
@@ -343,6 +338,13 @@ cdef orc_reader_options make_orc_reader_options(
         .build()
     )
 
+    cdef vector[string] c_column_names
+    if column_names is not None:
+        c_column_names.reserve(len(column_names))
+        for col in column_names:
+            c_column_names.push_back(str(col).encode())
+        opts.set_columns(c_column_names)
+
     return opts
 
 
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
index 264b1fb507b..c25360b307d 100644
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ b/python/cudf/cudf/_lib/parquet.pyx
@@ -177,9 +177,8 @@ cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
     allow_range_index = True
     if columns is not None:
         cpp_columns.reserve(len(columns))
-        if len(cpp_columns) == 0:
-            allow_range_index = False
-        for col in columns or []:
+        allow_range_index = False
+        for col in columns:
             cpp_columns.push_back(str(col).encode())
         args.set_columns(cpp_columns)
 
diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py
index 85f024e2420..e1e8e7cdb3d 100644
--- a/python/cudf/cudf/io/json.py
+++ b/python/cudf/cudf/io/json.py
@@ -30,7 +30,7 @@ def read_json(
         raise ValueError("cudf engine only supports JSON Lines format")
     if engine == "auto":
         engine = "cudf" if lines else "pandas"
-    if engine == "cudf":
+    if engine == "cudf" or engine == "cudf_experimental":
         # Multiple sources are passed as a list. If a single source is passed,
         # wrap it in a list for unified processing downstream.
         if not is_list_like(path_or_buf):
@@ -58,7 +58,12 @@ def read_json(
                 filepaths_or_buffers.append(tmp_source)
 
         df = libjson.read_json(
-            filepaths_or_buffers, dtype, lines, compression, byte_range
+            filepaths_or_buffers,
+            dtype,
+            lines,
+            compression,
+            byte_range,
+            engine == "cudf_experimental",
         )
     else:
         warnings.warn(
diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
index 358687d36c3..236fd619b8e 100644
--- a/python/cudf/cudf/tests/test_index.py
+++ b/python/cudf/cudf/tests/test_index.py
@@ -2674,7 +2674,7 @@ def test_rangeindex_join_user_option(default_integer_bitwidth):
     idx1 = cudf.RangeIndex(0, 10)
     idx2 = cudf.RangeIndex(5, 15)
 
-    actual = idx1.join(idx2, how="inner")
+    actual = idx1.join(idx2, how="inner", sort=True)
     expected = cudf.Index(
         [5, 6, 7, 8, 9], dtype=f"int{default_integer_bitwidth}", name=0
     )
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 84cf5872219..800ed68e8a4 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -573,3 +573,9 @@ def test_default_float_bitwidth(default_float_bitwidth):
     )
     assert df["a"].dtype == np.dtype(f"f{default_float_bitwidth//8}")
     assert df["b"].dtype == np.dtype(f"f{default_float_bitwidth//8}")
+
+
+def test_json_experimental():
+    # should raise an exception, for now
+    with pytest.raises(RuntimeError):
+        cudf.read_json("", engine="cudf_experimental")
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index 132eb528cd0..4373ef9afdf 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -1758,3 +1758,25 @@ def test_orc_writer_zlib_compression(list_struct_buff):
             pytest.mark.xfail(reason="nvcomp build doesn't have deflate")
         else:
             raise e
+
+
+@pytest.mark.parametrize("index", [True, False, None])
+@pytest.mark.parametrize("columns", [None, [], ["b", "a"]])
+def test_orc_columns_and_index_param(index, columns):
+    buffer = BytesIO()
+    df = cudf.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+    df.to_orc(buffer, index=index)
+
+    expected = pd.read_orc(buffer, columns=columns)
+    got = cudf.read_orc(buffer, columns=columns)
+
+    if columns:
+        # TODO: Remove workaround after this issue is fixed:
+        # https://github.com/pandas-dev/pandas/issues/47944
+        assert_eq(
+            expected.sort_index(axis=1),
+            got.sort_index(axis=1),
+            check_index_type=True,
+        )
+    else:
+        assert_eq(expected, got, check_index_type=True)
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 3771587eb47..d3c41de842a 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -463,7 +463,7 @@
     function or `StringIO`). Multiple inputs may be provided as a list. If a
     list is specified each list entry may be of a different input type as long
     as each input is of a valid type and all input JSON schema(s) match.
-engine : {{ 'auto', 'cudf', 'pandas' }}, default 'auto'
+engine : {{ 'auto', 'cudf', 'cudf_experimental', 'pandas' }}, default 'auto'
     Parser engine to use. If 'auto' is passed, the engine will be
     automatically selected based on the other parameters.
 orient : string,
diff --git a/python/dask_cudf/setup.py b/python/dask_cudf/setup.py
index 575683bc5fa..7d8a6d7c3a3 100644
--- a/python/dask_cudf/setup.py
+++ b/python/dask_cudf/setup.py
@@ -10,8 +10,8 @@
 
 install_requires = [
     "cudf",
-    "dask>=2022.05.2",
-    "distributed>=2022.05.2",
+    "dask==2022.7.1",
+    "distributed==2022.7.1",
     "fsspec>=0.6.0",
     "numpy",
     "pandas>=1.0,<1.5.0dev0",