Skip to content

Commit

Permalink
Use hypothesis (#5759)
Browse files Browse the repository at this point in the history
* Use hypothesis

* Allow int64 array interface for groups

* Add packages to Windows CI

* Add to travis

* Make sure device index is set correctly

* Fix dask-cudf test

* appveyor
  • Loading branch information
RAMitchell authored Jun 16, 2020
1 parent 02884b0 commit b47b5ac
Show file tree
Hide file tree
Showing 17 changed files with 414 additions and 442 deletions.
4 changes: 2 additions & 2 deletions Jenkinsfile-win64
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def TestWin64CPU() {
"""
echo "Installing Python dependencies..."
bat """
conda activate && conda upgrade scikit-learn pandas numpy
conda activate && conda install -y hypothesis && conda upgrade scikit-learn pandas numpy hypothesis
"""
echo "Running Python tests..."
bat "conda activate && python -m pytest -v -s --fulltrace tests\\python"
Expand All @@ -138,7 +138,7 @@ def TestWin64GPU(args) {
"""
echo "Installing Python dependencies..."
bat """
conda activate && conda upgrade scikit-learn pandas numpy
conda activate && conda install -y hypothesis && conda upgrade scikit-learn pandas numpy hypothesis
"""
echo "Running Python tests..."
bat """
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ install:
- if /i "%DO_PYTHON%" == "on" (
conda config --set always_yes true &&
conda update -q conda &&
conda install -y numpy scipy pandas matplotlib pytest scikit-learn graphviz python-graphviz
conda install -y numpy scipy pandas matplotlib pytest scikit-learn graphviz python-graphviz hypothesis
)
- set PATH=C:\Miniconda3-x64\Library\bin\graphviz;%PATH%
# R: based on https://github.com/krlmlr/r-appveyor
Expand Down
35 changes: 25 additions & 10 deletions src/data/data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,30 @@ void CopyInfoImpl(ArrayInterface column, HostDeviceVector<float>* out) {
});
}

void CopyGroupInfoImpl(ArrayInterface column, std::vector<bst_group_t>* out) {
CHECK(column.type[1] == 'i' || column.type[1] == 'u')
<< "Expected integer metainfo";
auto SetDeviceToPtr = [](void* ptr) {
cudaPointerAttributes attr;
dh::safe_cuda(cudaPointerGetAttributes(&attr, ptr));
int32_t ptr_device = attr.device;
dh::safe_cuda(cudaSetDevice(ptr_device));
return ptr_device;
};
auto ptr_device = SetDeviceToPtr(column.data);
dh::TemporaryArray<bst_group_t> temp(column.num_rows);
auto d_tmp = temp.data();

dh::LaunchN(ptr_device, column.num_rows, [=] __device__(size_t idx) {
d_tmp[idx] = column.GetElement(idx);
});
auto length = column.num_rows;
out->resize(length + 1);
out->at(0) = 0;
thrust::copy(temp.data(), temp.data() + length, out->begin() + 1);
std::partial_sum(out->begin(), out->end(), out->begin());
}

void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
Json j_interface = Json::Load({interface_str.c_str(), interface_str.size()});
auto const& j_arr = get<Array>(j_interface);
Expand All @@ -53,16 +77,7 @@ void MetaInfo::SetInfo(const char * c_key, std::string const& interface_str) {
} else if (key == "base_margin") {
CopyInfoImpl(array_interface, &base_margin_);
} else if (key == "group") {
// Ranking is not performed on device.
thrust::device_ptr<uint32_t> p_src{
reinterpret_cast<uint32_t*>(array_interface.data)};

auto length = array_interface.num_rows;
group_ptr_.resize(length + 1);
group_ptr_[0] = 0;
thrust::copy(p_src, p_src + length, group_ptr_.begin() + 1);
std::partial_sum(group_ptr_.begin(), group_ptr_.end(), group_ptr_.begin());

CopyGroupInfoImpl(array_interface, &group_ptr_);
return;
} else {
LOG(FATAL) << "Unknown metainfo: " << key;
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ ENV GOSU_VERSION 1.10
# Install Python packages in default env
RUN \
pip install pyyaml cpplint pylint astroid sphinx numpy scipy pandas matplotlib sh \
recommonmark guzzle_sphinx_theme mock breathe graphviz \
recommonmark guzzle_sphinx_theme mock breathe graphviz hypothesis\
pytest scikit-learn wheel kubernetes urllib3 jsonschema boto3 && \
pip install https://h2o-release.s3.amazonaws.com/datatable/stable/datatable-0.7.0/datatable-0.7.0-cp37-cp37m-linux_x86_64.whl && \
pip install "dask[complete]"
Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.cudf
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ENV PATH=/opt/python/bin:$PATH
RUN \
conda create -n cudf_test -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.7 cudf cudatoolkit=$CUDA_VERSION dask dask-cuda dask-cudf cupy \
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz
numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis

ENV GOSU_VERSION 1.10

Expand Down
2 changes: 1 addition & 1 deletion tests/ci_build/Dockerfile.gpu
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ ENV PATH=/opt/python/bin:$PATH
RUN \
conda create -n gpu_test -c rapidsai -c nvidia -c conda-forge -c defaults \
python=3.7 dask dask-cuda numpy pytest scipy scikit-learn pandas \
matplotlib wheel python-kubernetes urllib3 graphviz
matplotlib wheel python-kubernetes urllib3 graphviz hypothesis

ENV GOSU_VERSION 1.10

Expand Down
30 changes: 23 additions & 7 deletions tests/cpp/data/test_metainfo.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ std::string PrepareData(std::string typestr, thrust::device_vector<T>* out, cons

std::vector<Json> j_shape {Json(Integer(static_cast<Integer::Int>(kRows)))};
column["shape"] = Array(j_shape);
column["strides"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(4)))});
column["strides"] = Array(std::vector<Json>{Json(Integer(static_cast<Integer::Int>(sizeof(T))))});
column["version"] = Integer(static_cast<Integer::Int>(1));
column["typestr"] = String(typestr);

Expand Down Expand Up @@ -78,16 +78,32 @@ TEST(MetaInfo, FromInterface) {

TEST(MetaInfo, Group) {
cudaSetDevice(0);
thrust::device_vector<uint32_t> d_data;
std::string str = PrepareData<uint32_t>("<u4", &d_data);

MetaInfo info;

info.SetInfo("group", str.c_str());
auto const& h_group = info.group_ptr_;
ASSERT_EQ(h_group.size(), d_data.size() + 1);
thrust::device_vector<uint32_t> d_uint;
std::string uint_str = PrepareData<uint32_t>("<u4", &d_uint);
info.SetInfo("group", uint_str.c_str());
auto& h_group = info.group_ptr_;
ASSERT_EQ(h_group.size(), d_uint.size() + 1);
for (size_t i = 1; i < h_group.size(); ++i) {
ASSERT_EQ(h_group[i], d_data[i-1] + h_group[i-1]) << "i: " << i;
ASSERT_EQ(h_group[i], d_uint[i - 1] + h_group[i - 1]) << "i: " << i;
}

thrust::device_vector<int64_t> d_int64;
std::string int_str = PrepareData<int64_t>("<i8", &d_int64);
info = MetaInfo();
info.SetInfo("group", int_str.c_str());
h_group = info.group_ptr_;
ASSERT_EQ(h_group.size(), d_uint.size() + 1);
for (size_t i = 1; i < h_group.size(); ++i) {
ASSERT_EQ(h_group[i], d_uint[i - 1] + h_group[i - 1]) << "i: " << i;
}

// Incorrect type
thrust::device_vector<float> d_float;
std::string float_str = PrepareData<float>("<f4", &d_float);
info = MetaInfo();
EXPECT_ANY_THROW(info.SetInfo("group", float_str.c_str()));
}
} // namespace xgboost
70 changes: 45 additions & 25 deletions tests/python-gpu/test_gpu_linear.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,50 @@
import sys
import pytest
import unittest
from hypothesis import strategies, given, settings, assume
import xgboost as xgb
sys.path.append("tests/python")
import testing as tm

sys.path.append('tests/python/')
import test_linear # noqa: E402
import testing as tm # noqa: E402

parameter_strategy = strategies.fixed_dictionaries({
'booster': strategies.just('gblinear'),
'eta': strategies.floats(0.01, 0.25),
'tolerance': strategies.floats(1e-5, 1e-2),
'nthread': strategies.integers(1, 4),
'feature_selector': strategies.sampled_from(['cyclic', 'shuffle',
'greedy', 'thrifty']),
'top_k': strategies.integers(1, 10),
})

class TestGPULinear(unittest.TestCase):
datasets = ["Boston", "Digits", "Cancer", "Sparse regression"]
common_param = {
'booster': ['gblinear'],
'updater': ['gpu_coord_descent'],
'eta': [0.5],
'top_k': [10],
'tolerance': [1e-5],
'alpha': [.1],
'lambda': [0.005],
'coordinate_selection': ['cyclic', 'random', 'greedy']}
def train_result(param, dmat, num_rounds):
result = {}
xgb.train(param, dmat, num_rounds, [(dmat, 'train')], verbose_eval=False,
evals_result=result)
return result

@pytest.mark.skipif(**tm.no_sklearn())
def test_gpu_coordinate(self):
parameters = self.common_param.copy()
parameters['gpu_id'] = [0]
for param in test_linear.parameter_combinations(parameters):
results = test_linear.run_suite(
param, 100, self.datasets, scale_features=True)
test_linear.assert_regression_result(results, 1e-2)
test_linear.assert_classification_result(results)

class TestGPULinear:
@given(parameter_strategy, strategies.integers(10, 50),
tm.dataset_strategy)
@settings(deadline=None)
def test_gpu_coordinate(self, param, num_rounds, dataset):
assume(len(dataset.y) > 0)
param['updater'] = 'gpu_coord_descent'
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
assert tm.non_increasing(result)

# Loss is not guaranteed to always decrease because of regularisation parameters
# We test a weaker condition that the loss has not increased between the first and last
# iteration
@given(parameter_strategy, strategies.integers(10, 50),
tm.dataset_strategy, strategies.floats(1e-5, 2.0),
strategies.floats(1e-5, 2.0))
@settings(deadline=None)
def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
assume(len(dataset.y) > 0)
param['updater'] = 'gpu_coord_descent'
param['alpha'] = alpha
param['lambda'] = lambd
param = dataset.set_params(param)
result = train_result(param, dataset.get_dmat(), num_rounds)['train'][dataset.metric]
assert tm.non_increasing([result[0], result[-1]])
5 changes: 4 additions & 1 deletion tests/python-gpu/test_gpu_pickling.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,13 @@
import numpy as np
import subprocess
import os
import sys
import json
import pytest

sys.path.append("tests/python")
import testing as tm

import xgboost as xgb
from xgboost import XGBClassifier

Expand Down Expand Up @@ -90,7 +94,6 @@ def test_wrap_gpu_id(self):
)
status = subprocess.call(args, env=env)
assert status == 0

os.remove(model_path)

def test_pickled_predictor(self):
Expand Down
2 changes: 1 addition & 1 deletion tests/python-gpu/test_gpu_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,10 @@ def test_inplace_predict_cudf(self):
rows = 1000
cols = 10
rng = np.random.RandomState(1994)
cp.cuda.runtime.setDevice(0)
X = rng.randn(rows, cols)
X = pd.DataFrame(X)
y = rng.randn(rows)

X = cudf.from_pandas(X)

dtrain = xgb.DMatrix(X, y)
Expand Down
Loading

0 comments on commit b47b5ac

Please sign in to comment.