Skip to content

Commit

Permalink
Improve tests, improve deferred alloc check
Browse files Browse the repository at this point in the history
  • Loading branch information
ThrudPrimrose committed Dec 9, 2024
1 parent e915607 commit 9d646dc
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 80 deletions.
16 changes: 6 additions & 10 deletions dace/codegen/targets/framecode.py
Original file line number Diff line number Diff line change
Expand Up @@ -971,20 +971,16 @@ def generate_code(self,
ctypedef = size_nodedesc.dtype.ctype
from dace.codegen.targets import cpp
array = [v for v in sdfg.arrays.values() if v.size_desc_name is not None and v.size_desc_name == size_desc_name]
if len(array) != 1:
print(array)
assert len(array) <= 1
if len(array) == 1:
array = array[0]
if any(["__dace_defer" in str(dim) for dim in array.shape]):
if type(array) == dace.data.Array and array.is_deferred_array:
dimensions = ["0" if cpp.sym2cpp(dim).startswith("__dace_defer") else cpp.sym2cpp(dim) for dim in array.shape]
if any(["__dace_defer" in cpp.sym2cpp(dim) for dim in array.shape]):
size_str = ",".join(dimensions)
assert len(size_nodedesc.shape) == 1
print("BB", size_nodedesc.shape, dimensions, array.shape)
alloc_str = f'{ctypedef} {size_desc_name}[{size_nodedesc.shape[0]}]{{{size_str}}};\n'
callsite_stream.write(alloc_str)
self.dispatcher.defined_vars.add(size_desc_name, disp.DefinedType.Pointer, ctypedef)
size_str = ",".join(dimensions)
assert len(size_nodedesc.shape) == 1
alloc_str = f'{ctypedef} {size_desc_name}[{size_nodedesc.shape[0]}]{{{size_str}}};\n'
callsite_stream.write(alloc_str)
self.dispatcher.defined_vars.add(size_desc_name, disp.DefinedType.Pointer, ctypedef)

#######################################################################
# Generate actual program body
Expand Down
4 changes: 4 additions & 0 deletions dace/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1388,6 +1388,7 @@ class Array(Data):
pool = Property(dtype=bool, default=False, desc='Hint to the allocator that using a memory pool is preferred')

is_size_array = Property(dtype=bool, default=False, desc='Special array that is used to track the size of an another array')
is_deferred_array = Property(dtype=bool, default=False, desc='Array that requires deferred allocation')

def __init__(self,
dtype,
Expand Down Expand Up @@ -1440,6 +1441,9 @@ def __init__(self,
self.offset = cp.copy(offset)
else:
self.offset = [0] * len(shape)

self.is_deferred_array = any(["__dace_defer" in str(dim) for dim in self.shape])

self.validate()

def __repr__(self):
Expand Down
90 changes: 58 additions & 32 deletions dace/sdfg/sdfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -764,8 +764,10 @@ def replace_dict(self,
if validate_name(new_name):
_replace_dict_keys(self.arrays, name, new_name, non_size_arrays)
# Size desc names are updated later
if "__return" not in new_name: # To catch __return_0, __return_1, gpu__return
if "__return" not in new_name: # To catch __return_0, __return_1, gpu__return, fpga__return
size_desc_map[new_name] = new_name + "_size"
else:
size_desc_map[new_name] = None
_replace_dict_keys(self.symbols, name, new_name)
_replace_dict_keys(self.constants_prop, name, new_name)
_replace_dict_keys(self.callback_mapping, name, new_name)
Expand All @@ -779,12 +781,28 @@ def replace_dict(self,
arr = self.arrays[arr_name] if arr_name in self.arrays else None
if arr is not None:
size_desc_name_before = arr.size_desc_name
if arr.transient and type(arr) == dt.Array and size_desc_name_before is not None:
arr.size_desc_name = size_desc_name if "__return" not in new_name else None
# If we change the name of an array, then we need to change its size array accordingly
if (arr.transient and type(arr) == dt.Array and size_desc_name_before is not None
and size_desc_name is not None):
arr.size_desc_name = size_desc_name
assert (arr.size_desc_name == size_desc_name)
self.arrays[size_desc_name] = self.arrays.pop(size_desc_name_before)
# If the new size array is None, then we can remove the previous (and now unused size array)
if arr.size_desc_name is None and size_desc_name_before is not None:
size_ararys_to_rm.add(size_desc_name_before)
for size_arr_name in size_ararys_to_rm and size_arr_name in self.arrays:
del self.arrays[size_arr_name]
# If the new size array is not None, but it was non before we need to add the size array
if size_desc_name_before is None and arr.size_desc_name is not None:
retval = self._get_size_arr(arr_name, arr)
if retval is not None:
size_desc_name, size_desc = retval
assert (size_desc_name == arr.size_desc_name)
self._arrays[size_desc_name] = size_desc
self._add_symbols(size_desc)

# Rm any size array we need to remove
for size_arr_name in size_ararys_to_rm:
if size_arr_name in self.arrays:
del self.arrays[size_arr_name]

# Replace inside data descriptors
for array in self.arrays.values():
Expand Down Expand Up @@ -2062,6 +2080,37 @@ def _add_symbols(self, desc: dt.Data):
if sym.name not in self.symbols:
self.add_symbol(sym.name, sym.dtype)

def _get_size_arr(self, name: str, datadesc: dt.Data):
if (
datadesc.transient is True and
type(datadesc) == dt.Array and
"__return" not in name and
datadesc.lifetime is not dtypes.AllocationLifetime.External and
datadesc.lifetime is not dtypes.AllocationLifetime.Persistent and
datadesc.is_deferred_array
):
size_desc_name = f"{name}_size"
# Regardless of the scope and storage it is allocated as a register array
# And at the start of the SDFG (or nested SDFG), not setting SDFG prevents to_gpu assertions
# from failing. To lifetime and storage are set explicitly to
# to prevent optimizations to putting them to FPGA/GPU storage
size_desc = dt.Array(dtype=dace.uint64,
shape=(len(datadesc.shape),),
storage=dtypes.StorageType.CPU_Heap,
location=None,
allow_conflicts=False,
transient=True,
strides=(1,),
offset=(0,),
lifetime=dtypes.AllocationLifetime.State,
alignment=datadesc.alignment,
debuginfo=datadesc.debuginfo,
may_alias=False,
size_desc_name=None)
size_desc.is_size_array = True
return size_desc_name, size_desc
return None

def add_datadesc(self, name: str, datadesc: dt.Data, find_new_name=False) -> str:
""" Adds an existing data descriptor to the SDFG array store.
Expand Down Expand Up @@ -2105,33 +2154,10 @@ def add_datadesc(self, name: str, datadesc: dt.Data, find_new_name=False) -> str
# Add the data descriptor to the SDFG and all symbols that are not yet known.
self._arrays[name] = datadesc
self._add_symbols(datadesc)
if (
datadesc.transient is True and
type(datadesc) == dt.Array and
"__return" not in name and
datadesc.lifetime is not dtypes.AllocationLifetime.External and
datadesc.lifetime is not dtypes.AllocationLifetime.Persistent and
any(["__dace_defer" in str(dim) for dim in datadesc.shape])
):
size_desc_name = f"{name}_size"
# Regardless of the scope and storage it is allocated as a register array
# And at the start of the SDFG (or nested SDFG), not setting SDFG prevents to_gpu assertions
# from failing. To lifetime and storage are set explicitly to
# to prevent optimizations to putting them to FPGA/GPU storage
size_desc = dt.Array(dtype=dace.uint64,
shape=(len(list(datadesc.shape)),),
storage=dtypes.StorageType.CPU_Heap,
location=None,
allow_conflicts=False,
transient=True,
strides=(1,),
offset=(0,),
lifetime=dtypes.AllocationLifetime.State,
alignment=datadesc.alignment,
debuginfo=datadesc.debuginfo,
may_alias=False,
size_desc_name=None)
size_desc.is_size_array = True

retval = self._get_size_arr(name, datadesc)
if retval is not None:
size_desc_name, size_desc = retval
self._arrays[size_desc_name] = size_desc
# In case find_new_name and a new name is returned
# we need to update the size descriptor name of the array
Expand Down
18 changes: 18 additions & 0 deletions dace/sdfg/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,24 @@ def validate_sdfg(sdfg: 'dace.sdfg.SDFG', references: Set[int] = None, **context
"Arrays that use a multibank access pattern must have the size of the first dimension equal"
f" the number of banks and have at least 2 dimensions for array {name}", sdfg, None)

# Check the size array shapes match
if type(desc) == dt.Array:
if desc.is_size_array is False and desc.size_desc_name is not None:
# It is an array which is not a size array and needs to have a size array
size_desc = sdfg._arrays[desc.size_desc_name]
size_arr_len = size_desc.shape[0]
if not isinstance(size_arr_len, int) and (isinstance(size_arr_len, dace.symbolic.symbol) and not size_arr_len.is_integer):
raise InvalidSDFGError(
f"Size arrays need to be one-dimensional and have an integer length known at compile time. {desc.size_desc_name}: {size_desc.shape}"
, sdfg, None
)
# TODO: This check can be implemented as part of a getter/setter on the dimensions of the array?
if int(size_arr_len) != len(desc.shape):
raise InvalidSDFGError(
f"Size arrays size needs to match to shape of its array: {desc.size_desc_name}, {size_desc.shape}: {name}, {desc.shape}"
, sdfg, None
)

# Check if SDFG is located within a GPU kernel
context['in_gpu'] = is_devicelevel_gpu(sdfg, None, None)
context['in_fpga'] = is_devicelevel_fpga(sdfg, None, None)
Expand Down
80 changes: 42 additions & 38 deletions tests/deferred_alloc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def schedule_type(storage_type):
return dace.dtypes.ScheduleType.GPU_Device

def _get_trivial_alloc_sdfg(storage_type: dace.dtypes.StorageType, transient: bool, write_size="0:2"):
sdfg = dace.sdfg.SDFG(name="deferred_alloc_test")
sdfg = dace.sdfg.SDFG(name=f"deferred_alloc_test_1")

sdfg.add_array(name="A", shape=(15, "__dace_defer"), dtype=dace.float32, storage=storage_type, transient=transient)

Expand All @@ -37,9 +37,8 @@ def _get_trivial_alloc_sdfg(storage_type: dace.dtypes.StorageType, transient: bo

return sdfg


def _get_assign_map_sdfg(storage_type: dace.dtypes.StorageType, transient: bool, schedule_type: dace.dtypes.ScheduleType.Default):
sdfg = dace.sdfg.SDFG(name="deferred_alloc_test_2")
sdfg = dace.sdfg.SDFG(name=f"deferred_alloc_test_2")

sdfg.add_array(name="A", shape=(15, "__dace_defer"), dtype=dace.float32, storage=storage_type,
lifetime=dace.dtypes.AllocationLifetime.SDFG, transient=transient)
Expand Down Expand Up @@ -100,21 +99,20 @@ def _get_assign_map_sdfg(storage_type: dace.dtypes.StorageType, transient: bool,

return sdfg


def _valid_to_reallocate(transient, storage_type, scope):
def _valid_to_reallocate(transient, storage_type):
return transient and (storage_type == dace.dtypes.StorageType.GPU_Global or storage_type == dace.dtypes.StorageType.CPU_Heap)

def test_trivial_realloc(storage_type: dace.dtypes.StorageType, transient: bool):
def _test_trivial_realloc(storage_type: dace.dtypes.StorageType, transient: bool):
sdfg = _get_trivial_alloc_sdfg(storage_type, transient)
try:
sdfg.validate()
except Exception:
if not _valid_to_reallocate(transient, storage_type, None):
if not _valid_to_reallocate(transient, storage_type):
return
else:
raise AssertionError("Realloc with transient data failed when it was expected not to.")

if not _valid_to_reallocate(transient, storage_type, None):
if not _valid_to_reallocate(transient, storage_type):
raise AssertionError("Realloc with non-transient data did not fail when it was expected to.")

sdfg.compile()
Expand All @@ -124,17 +122,18 @@ def test_trivial_realloc(storage_type: dace.dtypes.StorageType, transient: bool)
sdfg.validate()
sdfg.compile()

def test_realloc_use(storage_type: dace.dtypes.StorageType, transient: bool, schedule_type: dace.dtypes.ScheduleType):

def _test_realloc_use(storage_type: dace.dtypes.StorageType, transient: bool, schedule_type: dace.dtypes.ScheduleType):
sdfg = _get_assign_map_sdfg(storage_type, transient, schedule_type)
try:
sdfg.validate()
except Exception:
if not _valid_to_reallocate(transient, storage_type, None):
if not _valid_to_reallocate(transient, storage_type):
return
else:
raise AssertionError("Realloc-use with transient data failed when it was expected not to.")

if not _valid_to_reallocate(transient, storage_type, None):
if not _valid_to_reallocate(transient, storage_type):
raise AssertionError("Realloc-use with non-transient data did not fail when it was expected to.")

compiled_sdfg = sdfg.compile()
Expand Down Expand Up @@ -174,13 +173,23 @@ def test_realloc_use(storage_type: dace.dtypes.StorageType, transient: bool, sch
compiled_sdfg(user_size=user_size, example_array=arr)
assert ( arr.get()[0] == 3.0 )

def test_realloc_inside_map():
pass
@pytest.mark.gpu
def test_realloc_use_gpu(transient: bool):
_test_realloc_use(dace.dtypes.StorageType.GPU_Global, transient, dace.dtypes.ScheduleType.GPU_Device)

def test_realloc_use_cpu(transient: bool):
_test_realloc_use(dace.dtypes.StorageType.CPU_Heap, transient, dace.dtypes.ScheduleType.Sequential)

def test_all_combinations(storage_type, transient, schedule_type):
test_trivial_realloc(storage_type, transient)
test_realloc_use(storage_type, transient, schedule_type)
@pytest.mark.gpu
def test_trivial_realloc_gpu(transient: bool):
_test_trivial_realloc(dace.dtypes.StorageType.GPU_Global, transient)

def test_trivial_realloc_cpu(transient: bool):
_test_trivial_realloc(dace.dtypes.StorageType.CPU_Heap, transient)


def test_realloc_inside_map():
pass

def test_incomplete_write_dimensions_1():
sdfg = _get_trivial_alloc_sdfg(dace.dtypes.StorageType.CPU_Heap, True, "1:2")
Expand All @@ -202,28 +211,23 @@ def test_incomplete_write_dimensions_2():


if __name__ == "__main__":
for storage_type, schedule_type in [(dace.dtypes.StorageType.CPU_Heap, dace.dtypes.ScheduleType.Sequential),
(dace.dtypes.StorageType.GPU_Global, dace.dtypes.ScheduleType.GPU_Device)]:
print(f"Trivial Realloc with storage {storage_type}")
test_trivial_realloc(storage_type, True)
print(f"Trivial Realloc-Use with storage {storage_type}")
test_realloc_use(storage_type, True, schedule_type)

for storage_type, schedule_type in [(dace.dtypes.StorageType.CPU_Heap, dace.dtypes.ScheduleType.Sequential),
(dace.dtypes.StorageType.GPU_Global, dace.dtypes.ScheduleType.GPU_Device)]:
print(f"Trivial Realloc with storage {storage_type} on non-transient data")
test_trivial_realloc(storage_type, False)
print(f"Trivial Realloc-Use with storage {storage_type} on non-transient data")
test_realloc_use(storage_type, False, schedule_type)

# Try some other combinations
for transient in [True, False]:
for storage_type, schedule_type in [(dace.dtypes.StorageType.CPU_Heap, dace.dtypes.ScheduleType.Sequential),
(dace.dtypes.StorageType.GPU_Global, dace.dtypes.ScheduleType.GPU_Device)]:
print(f"Trivial Realloc with storage {storage_type} on transient:{transient} data")
test_trivial_realloc(storage_type, transient)
print(f"Trivial Realloc-Use with storage {storage_type} on transient:{transient} data")
test_realloc_use(storage_type, transient, schedule_type)
print(f"Trivial Realloc with storage {dace.dtypes.StorageType.CPU_Heap}")
test_trivial_realloc_cpu(dace.dtypes.StorageType.CPU_Heap, True)
print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.CPU_Heap}")
test_realloc_use_cpu(dace.dtypes.StorageType.CPU_Heap, True, dace.dtypes.ScheduleType.Sequential)
print(f"Trivial Realloc with storage {dace.dtypes.StorageType.GPU_Global}")
test_trivial_realloc_gpu(dace.dtypes.StorageType.GPU_Global, True)
print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.GPU_Global}")
test_realloc_use_gpu(dace.dtypes.StorageType.GPU_Global, True, dace.dtypes.ScheduleType.GPU_Device)

print(f"Trivial Realloc with storage {dace.dtypes.StorageType.CPU_Heap} on non-transient data")
test_trivial_realloc_cpu(dace.dtypes.StorageType.CPU_Heap, False)
print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.CPU_Heap} on non-transient data")
test_realloc_use_cpu(dace.dtypes.StorageType.CPU_Heap, False, dace.dtypes.ScheduleType.Sequential)
print(f"Trivial Realloc with storage {dace.dtypes.StorageType.GPU_Global} on non-transient data")
test_trivial_realloc_gpu(dace.dtypes.StorageType.GPU_Global, False)
print(f"Trivial Realloc-Use with storage {dace.dtypes.StorageType.GPU_Global} on non-transient data")
test_realloc_use_gpu(dace.dtypes.StorageType.GPU_Global, False, dace.dtypes.ScheduleType.GPU_Device)

print(f"Realloc with incomplete write 1")
test_incomplete_write_dimensions_1()
Expand Down

0 comments on commit 9d646dc

Please sign in to comment.