From b1fa42734fd56f2f713271522a3661f45c0e9b1f Mon Sep 17 00:00:00 2001 From: Giuseppe Rossini Date: Thu, 19 Nov 2020 13:30:49 +0000 Subject: [PATCH 1/2] Bug-fix] Fix tir allocation with multiple lanes This PR stemmed from https://github.com/apache/incubator-tvm/pull/6907 and it is fixing a small error in the getter and setter of a buffer for the case where `t.lanes > 1`. I also added a test to stress the issue. --- python/tvm/tir/ir_builder.py | 4 ++-- tests/python/unittest/test_tir_ir_builder.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/python/tvm/tir/ir_builder.py b/python/tvm/tir/ir_builder.py index 77fe79b327b6..a904cd52d903 100644 --- a/python/tvm/tir/ir_builder.py +++ b/python/tvm/tir/ir_builder.py @@ -103,7 +103,7 @@ def __getitem__(self, index): index = self._linear_index(index) if t.lanes > 1: base = index * t.lanes - index = _expr.Ramp(base, const(1, base.dtype), t.lanes) + index = _expr.Ramp(base, 1, t.lanes) return _expr.Load(self._content_type, self._buffer_var, index) def __setitem__(self, index, value): @@ -116,7 +116,7 @@ def __setitem__(self, index, value): t = DataType(self._content_type) if t.lanes > 1: base = index * t.lanes - index = _expr.Ramp(base, const(1, base.dtype), t.lanes) + index = _expr.Ramp(base, 1, t.lanes) self._builder.emit(_stmt.Store(self._buffer_var, value, index)) diff --git a/tests/python/unittest/test_tir_ir_builder.py b/tests/python/unittest/test_tir_ir_builder.py index b84ee09b9fd9..1311518091e8 100644 --- a/tests/python/unittest/test_tir_ir_builder.py +++ b/tests/python/unittest/test_tir_ir_builder.py @@ -20,6 +20,21 @@ import tvm.testing +def test_allocate_with_lanes(): + ib = tvm.tir.ir_builder.create() + n = te.size_var("n") + A = ib.allocate("float32x4", n, name="A", scope="global") + B = ib.allocate("float32x4", n, name="A", scope="global") + A[0] = tvm.tir.const(0, "float32x4") + B[0] = A[0] + body = ib.get() + assert A == A + print(body) + assert isinstance(body, tvm.tir.AttrStmt) + body = body.body + assert isinstance(body, tvm.tir.Allocate) + + def test_for(): ib = tvm.tir.ir_builder.create() n = te.size_var("n") @@ -177,5 +192,6 @@ def check_target(target): test_prefetch() test_if() test_for() + test_allocate_with_lanes() test_cpu() test_gpu() From a48ea9497e2589ba92a40b714dd2f4923aedd78a Mon Sep 17 00:00:00 2001 From: Giuseppe Rossini Date: Thu, 19 Nov 2020 18:07:00 +0000 Subject: [PATCH 2/2] Address dtyped vs non-dtyped constant cases --- python/tvm/tir/ir_builder.py | 6 ++++-- tests/python/unittest/test_tir_ir_builder.py | 16 ---------------- .../test_tir_transform_narrow_datatype.py | 3 ++- 3 files changed, 6 insertions(+), 19 deletions(-) diff --git a/python/tvm/tir/ir_builder.py b/python/tvm/tir/ir_builder.py index a904cd52d903..75c5c2921ff4 100644 --- a/python/tvm/tir/ir_builder.py +++ b/python/tvm/tir/ir_builder.py @@ -103,7 +103,8 @@ def __getitem__(self, index): index = self._linear_index(index) if t.lanes > 1: base = index * t.lanes - index = _expr.Ramp(base, 1, t.lanes) + stride = 1 if (not hasattr(base, "dtype")) else const(1, base.dtype) + index = _expr.Ramp(base, stride, t.lanes) return _expr.Load(self._content_type, self._buffer_var, index) def __setitem__(self, index, value): @@ -116,7 +117,8 @@ def __setitem__(self, index, value): t = DataType(self._content_type) if t.lanes > 1: base = index * t.lanes - index = _expr.Ramp(base, 1, t.lanes) + stride = 1 if (not hasattr(base, "dtype")) else const(1, base.dtype) + index = _expr.Ramp(base, stride, t.lanes) self._builder.emit(_stmt.Store(self._buffer_var, value, index)) diff --git a/tests/python/unittest/test_tir_ir_builder.py b/tests/python/unittest/test_tir_ir_builder.py index 1311518091e8..b84ee09b9fd9 100644 --- a/tests/python/unittest/test_tir_ir_builder.py +++ b/tests/python/unittest/test_tir_ir_builder.py @@ -20,21 +20,6 @@ import tvm.testing -def test_allocate_with_lanes(): - ib = tvm.tir.ir_builder.create() - n = te.size_var("n") - A = ib.allocate("float32x4", n, name="A", scope="global") - B = ib.allocate("float32x4", n, name="A", scope="global") - A[0] = tvm.tir.const(0, "float32x4") - B[0] = A[0] - body = ib.get() - assert A == A - print(body) - assert isinstance(body, tvm.tir.AttrStmt) - body = body.body - assert isinstance(body, tvm.tir.Allocate) - - def test_for(): ib = tvm.tir.ir_builder.create() n = te.size_var("n") @@ -192,6 +177,5 @@ def check_target(target): test_prefetch() test_if() test_for() - test_allocate_with_lanes() test_cpu() test_gpu() diff --git a/tests/python/unittest/test_tir_transform_narrow_datatype.py b/tests/python/unittest/test_tir_transform_narrow_datatype.py index b1a9eae7893a..cb8968cfc880 100644 --- a/tests/python/unittest/test_tir_transform_narrow_datatype.py +++ b/tests/python/unittest/test_tir_transform_narrow_datatype.py @@ -126,9 +126,10 @@ def check(m, lanes, target_bits, target_dtype): B = ib.buffer_ptr(Bb) with ib.for_range(0, m, name="i", dtype=m.dtype) as i: B[i] = A[i] + 1 + A[0] = B[1] stmt = ib.get() stmt = lower_stmt([Ab, Bb], stmt, target_bits) - assert stmt.loop_var.dtype == target_dtype + assert stmt.seq[0].loop_var.dtype == target_dtype # i32 -> i32 check(const(2 ** 10, dtype="int32"), 2, target_bits=32, target_dtype="int32")