Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Topi] Fix GPU Dynamic Topk by Improving Dynamic Strided Slice in Topi #7018

Merged
merged 7 commits into from
Dec 4, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion include/tvm/topi/nn.h
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ inline tvm::te::Tensor batch_to_space_nd(const tvm::te::Tensor& data,
out = reshape(out, r_p_shape);

// Crop the start and end of dimensions of out
Array<Integer> begin_idx, end_idx, strides;
Array<PrimExpr> begin_idx, end_idx, strides;
for (size_t i = 0; i < r_p_shape.size(); ++i) {
strides.push_back(Integer(1));
if (i > 0 && i <= num_block_dims) {
Expand Down
70 changes: 61 additions & 9 deletions include/tvm/topi/transform.h
Original file line number Diff line number Diff line change
Expand Up @@ -598,17 +598,69 @@ inline te::Tensor dynamic_strided_slice(const te::Tensor& x, const te::Tensor& b
*
* \return A Tensor whose op member is the split operation
*/
inline Tensor strided_slice(const Tensor& x, const Array<Integer>& begin, const Array<Integer>& end,
const Array<Integer>& strides, std::string slice_mode = "end",
std::string name = "T_strided_slice", std::string tag = kInjective) {
inline Tensor strided_slice(const Tensor& x, const Array<PrimExpr>& begin,
const Array<PrimExpr>& end, const Array<PrimExpr>& strides,
std::string slice_mode = "end", std::string name = "T_strided_slice",
std::string tag = kInjective) {
size_t src_tensor_dim = static_cast<size_t>(x->shape.size());
// Quick path for dynamic shape strided slice.
// This is for ease of use to dynamice strided slice in topi.
bool is_dyn = false;
for (size_t i = 0; i < src_tensor_dim; ++i) {
if (!IsConstInt(x->shape[i])) {
is_dyn = true;
break;
}
}
if (!is_dyn) {
for (size_t i = 0; i < begin.size(); ++i) {
if (begin[i].defined() && !IsConstInt(begin[i])) {
is_dyn = true;
break;
}
}
}
if (!is_dyn) {
for (size_t i = 0; i < end.size(); ++i) {
if (end[i].defined() && !IsConstInt(end[i])) {
is_dyn = true;
break;
}
}
}
if (!is_dyn) {
for (size_t i = 0; i < strides.size(); ++i) {
if (strides[i].defined() && !IsConstInt(strides[i])) {
is_dyn = true;
break;
}
}
}
mbrookhart marked this conversation as resolved.
Show resolved Hide resolved

Array<PrimExpr> out_shape;
if (is_dyn) {
for (size_t i = 0; i < src_tensor_dim; ++i) {
out_shape.push_back(indexdiv(end[i] - begin[i], strides[i]));
}
return te::compute(
out_shape,
[&](const Array<tvm::tir::Var>& indices) {
Array<PrimExpr> real_indices;
for (size_t i = 0; i < src_tensor_dim; ++i) {
real_indices.push_back(indices[i] * strides[i] + begin[i]);
}
return x(real_indices);
},
name, tag);
}

mbrookhart marked this conversation as resolved.
Show resolved Hide resolved
// Setup the ranges.
// NOTE: this code duplicates the shape inference logic relay.op
// Consider to refactor in the future.
std::vector<int64_t> stride_vec(src_tensor_dim, 1);
for (size_t i = 0; i < strides.size(); ++i) {
ICHECK(strides[i].defined());
stride_vec[i] = strides[i]->value;
stride_vec[i] = GetConstInt(strides[i]);
}

const int64_t max_range = std::numeric_limits<int64_t>::max();
Expand All @@ -619,7 +671,7 @@ inline Tensor strided_slice(const Tensor& x, const Array<Integer>& begin, const
// value=None
begin_vec.push_back(stride_vec[i] > 0 ? 0 : max_range);
} else {
begin_vec.push_back(begin[i]->value);
begin_vec.push_back(GetConstInt(begin[i]));
}
}
for (size_t i = begin_vec.size(); i < src_tensor_dim; ++i) {
Expand All @@ -633,20 +685,20 @@ inline Tensor strided_slice(const Tensor& x, const Array<Integer>& begin, const
if (!end[i].defined()) {
end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range);
} else if (slice_mode == "size") {
if (end[i]->value < 0) {
int64_t end_val = GetConstInt(end[i]);
if (end_val < 0) {
end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range);
} else {
end_vec.push_back(begin_vec[i] + end[i]->value);
end_vec.push_back(begin_vec[i] + end_val);
}
} else {
end_vec.push_back(end[i]->value);
end_vec.push_back(GetConstInt(end[i]));
}
}
for (size_t i = end_vec.size(); i < src_tensor_dim; ++i) {
end_vec.push_back(stride_vec[i] < 0 ? 0 : max_range);
}
// Compute
Array<PrimExpr> out_shape;
Array<PrimExpr> begin_expr;
Array<PrimExpr> strides_expr;

Expand Down
28 changes: 25 additions & 3 deletions python/tvm/topi/cuda/sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,7 @@ def topk(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64"):
out : tvm.te.Tensor or List[tvm.te.Tensor]
The computed result.
"""
return topk_thrust(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int64")
assert ret_type in ["both", "values", "indices"]
ndim = len(data.shape)
axis = axis + ndim if axis < 0 else axis
Expand Down Expand Up @@ -561,10 +562,31 @@ def topk_thrust(data, k=1, axis=-1, ret_type="both", is_ascend=False, dtype="int
tag="topk_gpu",
)

if k > 0:
is_dyn = not isinstance(k, int)
for dim in data.shape:
if not isinstance(dim, tvm.tir.IntImm):
is_dyn = True
break

if not is_dyn:
if k > 0:
beg = [0] * ndim
end = data.shape[:axis] + [k] + data.shape[axis:]
out = [strided_slice(o, beg, end) for o in out]
else:
beg = [0] * ndim
end = data.shape[:-1] + [k]
out = [strided_slice(o, beg, end) for o in out]
end = []
for i in range(len(data.shape)):
if i == axis:
if isinstance(k, int):
end.append(data.shape[i] if k <= 0 else k)
else:
end.append(tvm.te.size_var("dim"))
else:
end.append(data.shape[i])

strides = [1] * ndim
out = [strided_slice(o, beg, end, strides) for o in out]

if axis != ndim - 1:
axes = swap(list(range(ndim)), axis)
Expand Down
19 changes: 14 additions & 5 deletions src/relay/op/tensor/transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2312,6 +2312,7 @@ Array<te::Tensor> StridedSliceCompute(const Attrs& attrs, const Array<te::Tensor
const StridedSliceAttrs* param = attrs.as<StridedSliceAttrs>();
ICHECK(param != nullptr);
Array<Integer> begin, end, strides;
Array<PrimExpr> begin_expr, end_expr, strides_expr;
begin = param->begin.value();
end = param->end.value();
strides = param->strides.value();
Expand All @@ -2324,8 +2325,6 @@ Array<te::Tensor> StridedSliceCompute(const Attrs& attrs, const Array<te::Tensor
for (size_t i = 0; i < src_tensor_dim; ++i) {
out_shape.push_back(tvm::tir::Var("dim"));
}
Array<PrimExpr> begin_expr;
Array<PrimExpr> strides_expr;
for (size_t i = 0; i < src_tensor_dim; ++i) {
int64_t begin_i = begin[i]->value;
if (begin_i < 0) {
Expand All @@ -2346,8 +2345,19 @@ Array<te::Tensor> StridedSliceCompute(const Attrs& attrs, const Array<te::Tensor
return input(real_indices);
},
std::string{"T_strided_slice_dynamic"}, std::string{topi::kInjective})};
} else {
for (size_t i = 0; i < begin.size(); ++i) {
begin_expr.push_back(begin[i]);
}
for (size_t i = 0; i < end.size(); ++i) {
end_expr.push_back(end[i]);
}
for (size_t i = 0; i < strides.size(); ++i) {
strides_expr.push_back(strides[i]);
}
}
return Array<te::Tensor>{topi::strided_slice(inputs[0], begin, end, strides, param->slice_mode)};
return Array<te::Tensor>{
topi::strided_slice(inputs[0], begin_expr, end_expr, strides_expr, param->slice_mode)};
}

// Positional relay function to create StridedSlice operator used by frontend FFI.
Expand Down Expand Up @@ -2663,8 +2673,7 @@ Array<te::Tensor> SliceLikeCompute(const Attrs& attrs, const Array<te::Tensor>&
<< topi::GetConstInt(src_shape[axis]);
}
}
return Array<te::Tensor>{topi::strided_slice(inputs[0], GetIntArray(begin_idx),
GetIntArray(end_idx), GetIntArray(strides), "end")};
return Array<te::Tensor>{topi::strided_slice(inputs[0], begin_idx, end_idx, strides, "end")};
}

TVM_REGISTER_GLOBAL("relay.op._make.slice_like").set_body_typed(MakeSliceLike);
Expand Down
8 changes: 1 addition & 7 deletions tests/python/relay/test_any.py
Original file line number Diff line number Diff line change
Expand Up @@ -815,13 +815,7 @@ def verify_any_topk(data_shape, kval, np_dshape, dtype, const_k=False):
else:
ref_out = sorted[0:kval]

for kind in ["debug", "vm"]:
ex = relay.create_executor(kind, mod=mod, ctx=tvm.cpu(), target="llvm")
result = ex.evaluate()(*in_vals)
tvm.testing.assert_allclose(result.asnumpy(), ref_out)

# TODO(@zhiics) Fix topk cuda schedule for dynamic inputs
# check_result(in_vals, mod, ref_out)
check_result(in_vals, mod, ref_out)


def test_any_topk():
Expand Down