diff --git a/pyproject.toml b/pyproject.toml index e859c9b4f7..8b91890d9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ classifiers = [ 'Topic :: Scientific/Engineering :: Physics' ] dependencies = [ + "array-api-compat>=1.9.1;python_version>='3.10'", "astunparse>=1.6.3;python_version<'3.9'", 'attrs>=21.3', 'black>=22.3', @@ -237,8 +238,9 @@ module = 'gt4py.next.iterator.runtime' [tool.pytest.ini_options] markers = [ 'all: special marker that skips all tests', - 'requires_atlas: tests that require `atlas4py` bindings package', - 'requires_dace: tests that require `dace` package', + 'requires_atlas: tests that require the `atlas4py` bindings package', + 'requires_dace: tests that require the `dace` package', + 'requires_jax: tests that require the `jax` package', 'requires_gpu: tests that require a NVidia GPU (`cupy` and `cudatoolkit` are required)', 'uses_applied_shifts: tests that require backend support for applied-shifts', 'uses_constant_fields: tests that require backend support for constant fields', @@ -264,6 +266,7 @@ markers = [ 'uses_unstructured_shift: tests that use a unstructured connectivity', 'uses_max_over: tests that use the max_over builtin', 'uses_mesh_with_skip_values: tests that use a mesh with skip values', + 'slices_out_argument: tests that slice the out argument in a field_operator call', 'checks_specific_error: tests that rely on the backend to produce a specific error message' ] norecursedirs = ['dist', 'build', 'cpp_backend_tests/build*', '_local/*', '.*'] diff --git a/src/gt4py/_core/definitions.py b/src/gt4py/_core/definitions.py index 8f62788b8f..f6250b1b60 100644 --- a/src/gt4py/_core/definitions.py +++ b/src/gt4py/_core/definitions.py @@ -27,6 +27,7 @@ Iterator, Literal, Protocol, + Self, Sequence, Tuple, Type, @@ -405,6 +406,7 @@ class DeviceType(enum.IntEnum): MetalDeviceTyping, VPIDeviceTyping, ROCMDeviceTyping, + covariant=True, ) @@ -454,7 +456,7 @@ def astype(self, dtype: npt.DTypeLike) -> NDArrayObject: ... def any(self) -> bool: ... - def __getitem__(self, item: Any) -> NDArrayObject: ... + def __getitem__(self, item: Any) -> Self: ... def __abs__(self) -> NDArrayObject: ... @@ -505,3 +507,49 @@ def __and__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ... def __or__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ... def __xor__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ... + + +class MutableNDArrayObject(NDArrayObject, Protocol): + def __setitem__(self, index: Any, value: Any) -> None: ... + + +class ArrayApiNamespace(Protocol): + def empty(self, shape: Sequence[int], *, dtype: Any = None, device: Any = None) -> Any: ... + def zeros(self, shape: Sequence[int], *, dtype: Any = None, device: Any = None) -> Any: ... + def ones(self, shape: Sequence[int], *, dtype: Any = None, device: Any = None) -> Any: ... + def full( + self, shape: Sequence[int], fill_value: Scalar, *, dtype: Any = None, device: Any = None + ) -> Any: ... + def asarray(self, obj: Any, *, dtype: Any = None, copy: Any = None) -> Any: ... + + # @property # once all relevant implementations have this attribute + # def __array_api_version__(self) -> str: ... # noqa: ERA001 + + # TODO(havogt): add relevant methods and attributes or wait for the standard to provide it, see e.g. https://github.com/data-apis/array-api/issues/697 + + +def is_array_api_namespace(obj: Any) -> TypeGuard[ArrayApiNamespace]: + # return hasattr(obj, "__array_api_version__") # noqa: ERA001 # once all relevant implementations have this attribute + return ( + hasattr(obj, "empty") + and hasattr(obj, "zeros") + and hasattr(obj, "ones") + and hasattr(obj, "full") + and hasattr(obj, "asarray") + ) + + +def to_array_api_dtype(xp: ArrayApiNamespace, dtype_: DTypeLike | None) -> Any: + """ + Converts a GT4Py `DTypeLike` to the dtype object of the given Array API namespace. + + Note: For convenience `None` is passed-through as it has a consistent meaning in all Array API implementations. + """ + if dtype_ is None: + return None + else: + dtype_ = dtype(dtype_) + assert ( + dtype_.tensor_shape == () + ) # TODO(havogt): support tensor shapes (or remove from our DType) + return getattr(xp, dtype_.scalar_type.__name__) diff --git a/src/gt4py/next/allocators.py b/src/gt4py/next/_allocators.py similarity index 50% rename from src/gt4py/next/allocators.py rename to src/gt4py/next/_allocators.py index 864f8c1b09..184785246a 100644 --- a/src/gt4py/next/allocators.py +++ b/src/gt4py/next/_allocators.py @@ -10,6 +10,8 @@ import dataclasses import functools +import array_api_compat + import gt4py._core.definitions as core_defs import gt4py.next.common as common import gt4py.storage.allocators as core_allocators @@ -41,6 +43,63 @@ ) +class GTArrayAllocationNamespace(Protocol): + """ + Standard Array API-like construction functions based on `domain` instead of `shape`. + + The reason to use `domain` is: + - we need the `Dimension`s for getting the desired ordering of strides + - `aligned_index` refers to a point in the domain (absolute position), not relative to the array shape + """ + + # Notes: + # - this concept could be evolved to a more general `GTArrayNamespace` that adds all array functions that we use in embedded + # - maybe for advanced indexing use-case: extend the namespace with standard compliant fallback functions. + + def empty( + self, + domain: common.DomainLike, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: ... + + def zeros( + self, + domain: common.DomainLike, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: ... + + def ones( + self, + domain: common.DomainLike, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: ... + + def full( + self, + domain: common.DomainLike, + fill_value: core_defs.Scalar, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: ... + + def asarray( + self, + data: core_defs.NDArrayObject, + *, + domain: common.DomainLike, + dtype: Optional[core_defs.DTypeLike] = None, + copy: Optional[bool] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: ... + + FieldLayoutMapper: TypeAlias = Callable[ [Sequence[common.Dimension]], core_allocators.BufferLayoutMap ] @@ -60,7 +119,7 @@ def __gt_allocate__( dtype: core_defs.DType[core_defs.ScalarT], device_id: int = 0, aligned_index: Optional[Sequence[common.NamedIndex]] = None, # absolute position - ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: ... + ) -> core_defs.MutableNDArrayObject: ... def is_field_allocator(obj: Any) -> TypeGuard[FieldBufferAllocatorProtocol]: @@ -160,7 +219,7 @@ def __gt_allocate__( dtype: core_defs.DType[core_defs.ScalarT], device_id: int = 0, aligned_index: Optional[Sequence[common.NamedIndex]] = None, # absolute position - ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: + ) -> core_defs.MutableNDArrayObject: shape = domain.shape layout_map = self.layout_mapper(domain.dims) # TODO(egparedes): add support for non-empty aligned index values @@ -242,7 +301,7 @@ def __gt_allocate__( dtype: core_defs.DType[core_defs.ScalarT], device_id: int = 0, aligned_index: Optional[Sequence[common.NamedIndex]] = None, # absolute position - ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: + ) -> core_defs.MutableNDArrayObject: raise self.exception @@ -293,15 +352,16 @@ def __init__(self) -> None: def allocate( - domain: common.DomainLike, - dtype: core_defs.DType[core_defs.ScalarT], *, + domain: common.Domain, + dtype: core_defs.DType[core_defs.ScalarT], + allocator: FieldBufferAllocatorProtocol, + device: core_defs.Device, aligned_index: Optional[Sequence[common.NamedIndex]] = None, - allocator: Optional[FieldBufferAllocationUtil] = None, - device: Optional[core_defs.Device] = None, -) -> core_allocators.TensorBuffer: +) -> core_defs.MutableNDArrayObject: """ - Allocate a TensorBuffer for the given domain and device or allocator. + TODO: docstring + Allocate an NDArrayObject for the given domain and device or allocator. The arguments `device` and `allocator` are mutually exclusive. If `device` is specified, the corresponding default allocator @@ -323,20 +383,215 @@ def allocate( If illegal or inconsistent arguments are specified. """ - if device is None and allocator is None: - raise ValueError("No 'device' or 'allocator' specified.") - actual_allocator = get_allocator(allocator) - if actual_allocator is None: - assert device is not None # for mypy - actual_allocator = device_allocators[device.device_type] - elif device is None: - device = core_defs.Device(actual_allocator.__gt_device_type__, 0) - elif device.device_type != actual_allocator.__gt_device_type__: - raise ValueError(f"Device '{device}' and allocator '{actual_allocator}' are incompatible.") - return actual_allocator.__gt_allocate__( - domain=common.domain(domain), + return allocator.__gt_allocate__( + domain=domain, dtype=dtype, device_id=device.device_id, aligned_index=aligned_index, ) + + +def _check_unsupported_device_and_aligned_index( + device: Optional[core_defs.Device], aligned_index: Optional[Sequence[common.NamedIndex]] +) -> None: + if aligned_index is not None: + raise NotImplementedError("Aligned index is not support for Array API namespaces.") + if device is not None: + # TODO(havogt): this requires to translate our device object to the concrete Array API implementation's device object + raise NotImplementedError("Device specification is not yet supported.") + + +def _get_actual_allocator_and_device( + allocator: Optional[FieldBufferAllocationUtil], device: Optional[core_defs.Device] +) -> tuple[FieldBufferAllocatorProtocol, core_defs.Device]: + if allocator is None and device is not None: + return device_allocators[device.device_type], device + + actual_allocator = get_allocator(allocator, default=device_allocators[core_defs.DeviceType.CPU]) + assert actual_allocator is not None + if device is None: + device = core_defs.Device(actual_allocator.__gt_device_type__, 0) + elif device.device_type != actual_allocator.__gt_device_type__: + raise ValueError(f"Device '{device}' and allocator '{actual_allocator}' are incompatible.") + return actual_allocator, device + + +def get_array_allocation_namespace( + allocator: Optional[FieldBufferAllocationUtil | core_defs.ArrayApiNamespace], + device: Optional[core_defs.Device] = None, +) -> GTArrayAllocationNamespace: + if core_defs.is_array_api_namespace(allocator): + assert core_defs.is_array_api_namespace(allocator) + array_ns = array_api_compat.array_namespace(allocator.empty([0])) + + class _ArrayNamespaceWrapper: + @staticmethod + def empty( + domain: common.DomainLike, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + _check_unsupported_device_and_aligned_index(device, aligned_index) + return array_ns.empty( + shape=common.domain(domain).shape, + dtype=core_defs.to_array_api_dtype(array_ns, dtype), + ) + + @staticmethod + def zeros( + domain: common.DomainLike, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + _check_unsupported_device_and_aligned_index(device, aligned_index) + return array_ns.zeros( + shape=common.domain(domain).shape, + dtype=core_defs.to_array_api_dtype(array_ns, dtype), + ) + + @staticmethod + def ones( + domain: common.DomainLike, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + _check_unsupported_device_and_aligned_index(device, aligned_index) + return array_ns.ones( + shape=common.domain(domain).shape, + dtype=core_defs.to_array_api_dtype(array_ns, dtype), + ) + + @staticmethod + def full( + domain: common.DomainLike, + fill_value: core_defs.Scalar, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + _check_unsupported_device_and_aligned_index(device, aligned_index) + return array_ns.full( + shape=common.domain(domain).shape, + fill_value=fill_value, + dtype=core_defs.to_array_api_dtype(array_ns, dtype), + ) + + @staticmethod + def asarray( + data: core_defs.NDArrayObject, + *, + domain: common.DomainLike, + dtype: Optional[core_defs.DTypeLike] = None, + copy: Optional[bool] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + _check_unsupported_device_and_aligned_index(device, aligned_index) + if not data.shape == common.domain(domain).shape: + raise ValueError( + f"Array of shape '{data.shape}' is incompatible with domain '{domain}'." + ) + + return array_ns.asarray( + data, dtype=core_defs.to_array_api_dtype(array_ns, dtype), copy=copy + ) + + return _ArrayNamespaceWrapper + + assert is_field_allocation_tool(allocator) or allocator is None + actual_allocator, actual_device = _get_actual_allocator_and_device(allocator, device) + + class _CustomAllocationArrayNamespace: + @staticmethod + def empty( + domain: common.DomainLike, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + return allocate( + domain=common.domain(domain), + dtype=core_defs.dtype(dtype), + aligned_index=aligned_index, + allocator=actual_allocator, + device=actual_device, + ) + + @staticmethod + def zeros( + domain: common.DomainLike, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + buffer = allocate( + domain=common.domain(domain), + dtype=core_defs.dtype(dtype), + aligned_index=aligned_index, + allocator=actual_allocator, + device=actual_device, + ) + buffer[...] = 0 + return buffer + + @staticmethod + def ones( + domain: common.DomainLike, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + buffer = allocate( + domain=common.domain(domain), + dtype=core_defs.dtype(dtype), + aligned_index=aligned_index, + allocator=actual_allocator, + device=actual_device, + ) + buffer[...] = 1 + return buffer + + @staticmethod + def full( + domain: common.DomainLike, + fill_value: core_defs.Scalar, + *, + dtype: Optional[core_defs.DTypeLike] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + buffer = allocate( + domain=common.domain(domain), + dtype=core_defs.dtype(dtype), # TODO check all dtypes + aligned_index=aligned_index, + allocator=actual_allocator, + device=actual_device, + ) + buffer[...] = fill_value + return buffer + + @staticmethod + def asarray( + data: core_defs.NDArrayObject, + *, + domain: common.DomainLike, + dtype: Optional[core_defs.DTypeLike] = None, + copy: Optional[bool] = None, + aligned_index: Optional[Sequence[common.NamedIndex]] = None, + ) -> core_defs.NDArrayObject: + if not copy: + raise NotImplementedError("Zero-copy construction is not yet supported.") + dtype = core_defs.dtype(data.dtype) if dtype is None else core_defs.dtype(dtype) + buffer = allocate( + domain=common.domain(domain), + dtype=dtype, + aligned_index=aligned_index, + allocator=actual_allocator, + device=actual_device, + ) + buffer[...] = array_api_compat.array_namespace(buffer).asarray(data) + return buffer + + return _CustomAllocationArrayNamespace diff --git a/src/gt4py/next/backend.py b/src/gt4py/next/backend.py index e223d7771c..5a02628012 100644 --- a/src/gt4py/next/backend.py +++ b/src/gt4py/next/backend.py @@ -13,7 +13,7 @@ from typing import Any, Generic from gt4py._core import definitions as core_defs -from gt4py.next import allocators as next_allocators +from gt4py.next import _allocators as next_allocators from gt4py.next.ffront import ( foast_to_gtir, foast_to_itir, diff --git a/src/gt4py/next/common.py b/src/gt4py/next/common.py index 9b2870e1c0..8d3bc832d2 100644 --- a/src/gt4py/next/common.py +++ b/src/gt4py/next/common.py @@ -950,6 +950,9 @@ def _field( /, *, domain: Optional[DomainLike] = None, + allocation_ns: Optional[ + Any + ] = None, # TODO: should be `next_allocators.GTArrayAllocationNamespace` dtype: Optional[core_defs.DType] = None, ) -> Field: raise NotImplementedError @@ -963,6 +966,9 @@ def _connectivity( codomain: Dimension, *, domain: Optional[DomainLike] = None, + allocation_ns: Optional[ + Any + ] = None, # TODO: should be `next_allocators.GTArrayAllocationNamespace` dtype: Optional[core_defs.DType] = None, skip_value: Optional[core_defs.IntegralScalar] = None, ) -> Connectivity: diff --git a/src/gt4py/next/constructors.py b/src/gt4py/next/constructors.py index 7b39511674..21b4b63636 100644 --- a/src/gt4py/next/constructors.py +++ b/src/gt4py/next/constructors.py @@ -14,7 +14,7 @@ import gt4py._core.definitions as core_defs import gt4py.eve as eve import gt4py.eve.extended_typing as xtyping -import gt4py.next.allocators as next_allocators +import gt4py.next._allocators as next_allocators import gt4py.next.common as common import gt4py.next.embedded.nd_array_field as nd_array_field import gt4py.storage.cartesian.utils as storage_utils @@ -26,9 +26,12 @@ def empty( dtype: core_defs.DTypeLike = core_defs.Float64DType(()), # noqa: B008 [function-call-in-default-argument] *, aligned_index: Optional[Sequence[common.NamedIndex]] = None, - allocator: Optional[next_allocators.FieldBufferAllocationUtil] = None, + allocator: Optional[ + next_allocators.FieldBufferAllocationUtil | core_defs.ArrayApiNamespace + ] = None, # TODO make sure numpy/cupy etc namespaces are accepted by mypy (maybe we have to allow Any) device: Optional[core_defs.Device] = None, ) -> nd_array_field.NdArrayField: + # TODO: update doc """Create a `Field` of uninitialized (undefined) values using the given (or device-default) allocator. This function supports partial binding of arguments, see :class:`eve.utils.partial` for details. @@ -74,13 +77,9 @@ def empty( >>> b.shape (3, 3) """ - dtype = core_defs.dtype(dtype) - if allocator is None and device is None: - device = core_defs.Device(core_defs.DeviceType.CPU, device_id=0) - buffer = next_allocators.allocate( - domain, dtype, aligned_index=aligned_index, allocator=allocator, device=device - ) - res = common._field(buffer.ndarray, domain=domain) + gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device) + buffer = gtarray_namespace.empty(domain, dtype=dtype, aligned_index=aligned_index) + res = common._field(buffer, domain=domain, allocation_ns=gtarray_namespace) assert isinstance(res, common.MutableField) assert isinstance(res, nd_array_field.NdArrayField) return res @@ -106,11 +105,12 @@ def zeros( >>> gtx.zeros({IDim: range(3, 10)}, allocator=gtx.itir_python).ndarray array([0., 0., 0., 0., 0., 0., 0.]) """ - field = empty( - domain=domain, dtype=dtype, aligned_index=aligned_index, allocator=allocator, device=device - ) - field[...] = field.dtype.scalar_type(0) - return field + gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device) + buffer = gtarray_namespace.zeros(domain, dtype=dtype, aligned_index=aligned_index) + res = common._field(buffer, domain=domain, allocation_ns=gtarray_namespace) + assert isinstance(res, common.MutableField) + assert isinstance(res, nd_array_field.NdArrayField) + return res @eve.utils.with_fluid_partial @@ -133,11 +133,12 @@ def ones( >>> gtx.ones({IDim: range(3, 10)}, allocator=gtx.itir_python).ndarray array([1., 1., 1., 1., 1., 1., 1.]) """ - field = empty( - domain=domain, dtype=dtype, aligned_index=aligned_index, allocator=allocator, device=device - ) - field[...] = field.dtype.scalar_type(1) - return field + gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device) + buffer = gtarray_namespace.ones(domain, dtype=dtype, aligned_index=aligned_index) + res = common._field(buffer, domain=domain, allocation_ns=gtarray_namespace) + assert isinstance(res, common.MutableField) + assert isinstance(res, nd_array_field.NdArrayField) + return res @eve.utils.with_fluid_partial @@ -166,15 +167,53 @@ def full( >>> gtx.full({IDim: 3}, 5, allocator=gtx.itir_python).ndarray array([5, 5, 5]) """ - field = empty( - domain=domain, + gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device) + buffer = gtarray_namespace.full( + domain, + fill_value, dtype=dtype if dtype is not None else core_defs.dtype(type(fill_value)), aligned_index=aligned_index, - allocator=allocator, - device=device, ) - field[...] = field.dtype.scalar_type(fill_value) - return field + res = common._field(buffer, domain=domain, allocation_ns=gtarray_namespace) + assert isinstance(res, common.MutableField) + assert isinstance(res, nd_array_field.NdArrayField) + return res + + +def _actual_domain( + dims_or_domain: common.DomainLike | Sequence[common.Dimension], + shape: Sequence[int], + origin: Optional[Mapping[common.Dimension, int]] = None, +) -> common.Domain: + if isinstance(dims_or_domain, Sequence) and all( + isinstance(dim, common.Dimension) for dim in dims_or_domain + ): + dims = cast(Sequence[common.Dimension], dims_or_domain) + if len(dims) != len(shape): + raise ValueError( + f"Cannot construct 'Field' from array of shape '{shape}' and domain '{dims}'." + ) + if origin: + domain_dims = set(dims) + if unknown_dims := set(origin.keys()) - domain_dims: + raise ValueError(f"Origin keys {unknown_dims} not in domain {dims}.") + else: + origin = {} + return common.domain( + [ + (d, (-(start_offset := origin.get(d, 0)), s - start_offset)) + for d, s in zip(dims, shape) + ] + ) + else: + domain = common.domain(cast(common.DomainLike, dims_or_domain)) + if origin: + raise ValueError(f"Cannot specify origin for domain {domain}") + if domain.shape != shape: + raise ValueError( + f"Cannot construct 'Field' from array of shape '{shape}' and domain '{domain}'." + ) + return domain @eve.utils.with_fluid_partial @@ -187,7 +226,7 @@ def as_field( aligned_index: Optional[Sequence[common.NamedIndex]] = None, allocator: Optional[next_allocators.FieldBufferAllocatorProtocol] = None, device: Optional[core_defs.Device] = None, - # TODO: copy=False + # TODO(havogt): copy=False ) -> nd_array_field.NdArrayField: """Create a Field from an array-like object using the given (or device-default) allocator. @@ -232,53 +271,25 @@ def as_field( >>> gtx.as_field({IDim: range(-1, 2)}, xdata).domain.ranges[0] UnitRange(-1, 2) """ - if isinstance(domain, Sequence) and all(isinstance(dim, common.Dimension) for dim in domain): - domain = cast(Sequence[common.Dimension], domain) - if len(domain) != data.ndim: - raise ValueError( - f"Cannot construct 'Field' from array of shape '{data.shape}' and domain '{domain}'." - ) - if origin: - domain_dims = set(domain) - if unknown_dims := set(origin.keys()) - domain_dims: - raise ValueError(f"Origin keys {unknown_dims} not in domain {domain}.") - else: - origin = {} - actual_domain = common.domain( - [ - (d, (-(start_offset := origin.get(d, 0)), s - start_offset)) - for d, s in zip(domain, data.shape) - ] - ) - else: - if origin: - raise ValueError(f"Cannot specify origin for domain {domain}") - actual_domain = common.domain(cast(common.DomainLike, domain)) + actual_domain = _actual_domain(dims_or_domain=domain, shape=data.shape, origin=origin) # TODO(egparedes): allow zero-copy construction (no reallocation) if buffer has # already the correct layout and device. - shape = storage_utils.asarray(data).shape - if shape != actual_domain.shape: - raise ValueError(f"Cannot construct 'Field' from array of shape '{shape}'.") - if dtype is None: - dtype = storage_utils.asarray(data).dtype - dtype = core_defs.dtype(dtype) - assert dtype.tensor_shape == () # TODO if (allocator is None) and (device is None) and xtyping.supports_dlpack(data): device = core_defs.Device(*data.__dlpack_device__()) - field = empty( + gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device) + buffer = gtarray_namespace.asarray( + data, domain=actual_domain, dtype=dtype, + copy=True, # TODO(havogt) add support for zero-copy construction aligned_index=aligned_index, - allocator=allocator, - device=device, ) + res = common._field(buffer, domain=actual_domain, allocation_ns=gtarray_namespace) - field[...] = field.array_ns.asarray(data) - - return field + return res # type: ignore[return-value] # it is an NDArrayField @eve.utils.with_fluid_partial @@ -349,12 +360,22 @@ def as_connectivity( if (allocator is None) and (device is None) and xtyping.supports_dlpack(data): device = core_defs.Device(*data.__dlpack_device__()) - buffer = next_allocators.allocate(actual_domain, dtype, allocator=allocator, device=device) - # TODO(havogt): consider adding MutableNDArrayObject - buffer.ndarray[...] = storage_utils.asarray(data) # type: ignore[index] + + gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device) + buffer = gtarray_namespace.asarray( + data, + domain=actual_domain, + dtype=dtype, + copy=True, # TODO(havogt) add support for zero-copy construction + ) connectivity_field = common._connectivity( - buffer.ndarray, codomain=codomain, domain=actual_domain, skip_value=skip_value + buffer, + codomain=codomain, + domain=actual_domain, + skip_value=skip_value, + allocation_ns=gtarray_namespace, ) + assert isinstance(connectivity_field, nd_array_field.NdArrayConnectivityField) return connectivity_field diff --git a/src/gt4py/next/embedded/nd_array_field.py b/src/gt4py/next/embedded/nd_array_field.py index e15fb4266a..06eeecb66a 100644 --- a/src/gt4py/next/embedded/nd_array_field.py +++ b/src/gt4py/next/embedded/nd_array_field.py @@ -29,7 +29,7 @@ TypeVar, cast, ) -from gt4py.next import common +from gt4py.next import _allocators, common from gt4py.next.embedded import ( common as embedded_common, context as embedded_context, @@ -116,6 +116,7 @@ class NdArrayField( _domain: common.Domain _ndarray: core_defs.NDArrayObject + _allocation_ns: Optional[_allocators.GTArrayAllocationNamespace] array_ns: ClassVar[ModuleType] # TODO(havogt) introduce a NDArrayNamespace protocol @@ -167,6 +168,9 @@ def from_array( /, *, domain: common.DomainLike, + allocation_ns: Optional[ + _allocators.GTArrayAllocationNamespace + ] = None, # TODO: maybe an NDArrayField always has an allocator? dtype: Optional[core_defs.DTypeLike] = None, ) -> NdArrayField: domain = common.domain(domain) @@ -184,7 +188,7 @@ def from_array( assert len(domain) == array.ndim assert all(s == 1 or len(r) == s for r, s in zip(domain.ranges, array.shape)) - return cls(domain, array) + return cls(domain, array, allocation_ns) def premap( self: NdArrayField, @@ -330,7 +334,9 @@ def restrict(self, index: common.AnyIndexSpec) -> NdArrayField: new_domain, buffer_slice = self._slice(index) new_buffer = self.ndarray[buffer_slice] new_buffer = self.__class__.array_ns.asarray(new_buffer) - return self.__class__.from_array(new_buffer, domain=new_domain) + return self.__class__.from_array( + new_buffer, domain=new_domain, allocation_ns=self._allocation_ns + ) __getitem__ = restrict @@ -428,6 +434,24 @@ def _slice( assert common.is_relative_index_sequence(slice_) return new_domain, slice_ + def __copy__(self) -> NdArrayField: + # Note: `copy` copies the data, following NumPy behavior + allocation_ns = ( + self._allocation_ns + if self._allocation_ns is not None + else _allocators.get_array_allocation_namespace(self.array_ns) + ) + ndarray_copy = allocation_ns.asarray( + self.ndarray, + domain=self.domain, + dtype=self.dtype, + copy=True, # aligned_index??? + ) + return self.__class__(self.domain, ndarray_copy, _allocation_ns=self._allocation_ns) + + def __deepcopy__(self, _: Any) -> NdArrayField: + return self.__copy__() + if dace: # Extension of NdArrayField adding SDFGConvertible support in GT4Py Programs def _dace_data_ptr(self) -> int: @@ -513,6 +537,7 @@ def from_array( # type: ignore[override] codomain: common.DimT, *, domain: common.DomainLike, + allocation_ns: Optional[_allocators.GTArrayAllocationNamespace] = None, dtype: Optional[core_defs.DTypeLike] = None, skip_value: Optional[core_defs.IntegralScalar] = None, ) -> NdArrayConnectivityField: @@ -533,7 +558,7 @@ def from_array( # type: ignore[override] assert isinstance(codomain, common.Dimension) - return cls(domain, array, codomain, _skip_value=skip_value) + return cls(domain, array, allocation_ns, codomain, _skip_value=skip_value) def inverse_image(self, image_range: common.UnitRange | common.NamedRange) -> common.Domain: cache_key = hash((id(self.ndarray), self.domain, image_range)) @@ -570,7 +595,13 @@ def restrict(self, index: common.AnyIndexSpec) -> NdArrayConnectivityField: xp = cls.array_ns new_domain, buffer_slice = self._slice(index) new_buffer = xp.asarray(self.ndarray[buffer_slice]) - restricted_connectivity = cls(new_domain, new_buffer, self.codomain, self.skip_value) + restricted_connectivity = cls( + new_domain, + new_buffer, + _allocation_ns=self._allocation_ns, + _codomain=self._codomain, + _skip_value=self._skip_value, + ) self._cache[cache_key] = restricted_connectivity return restricted_connectivity @@ -594,7 +625,9 @@ def _domain_premap(data: NdArrayField, *connectivities: common.Connectivity) -> new_ranges = connectivity.inverse_image(current_range) new_domain = new_domain.replace(dim_idx, *new_ranges) - return data.__class__.from_array(data._ndarray, domain=new_domain, dtype=data.dtype) + return data.__class__.from_array( + data._ndarray, domain=new_domain, dtype=data.dtype, allocation_ns=data._allocation_ns + ) def _reshuffling_premap( @@ -634,7 +667,10 @@ def _reshuffling_premap( conn_ndarray = xp.broadcast_to(conn_ndarray, data.domain.shape) if conn_ndarray is not conn.ndarray: conn = conn.__class__.from_array( - conn_ndarray, domain=data.domain, codomain=conn.codomain + conn_ndarray, + domain=data.domain, + codomain=conn.codomain, + allocation_ns=conn._allocation_ns, ) conn_map[conn.codomain] = conn dim_idx = data.domain.dim_index(conn.codomain, allow_missing=False) @@ -664,6 +700,7 @@ def _reshuffling_premap( new_buffer, domain=new_domain, dtype=data.dtype, + allocation_ns=data._allocation_ns, ) @@ -704,6 +741,7 @@ def _remapping_premap(data: NdArrayField, connectivity: common.Connectivity) -> new_buffer, domain=new_domain, dtype=data.dtype, + allocation_ns=data._allocation_ns, ) @@ -872,6 +910,7 @@ def _intersect_fields( nd_array_class.from_array( f.ndarray[_get_slices_from_domain_slice(f.domain, intersected_domain)], domain=intersected_domain, + # TODO allocator ) for f, intersected_domain in zip(broadcasted_fields, intersected_domains, strict=True) ) @@ -908,6 +947,7 @@ def _concat(*fields: common.Field, dim: common.Dimension) -> common.Field: [nd_array_class.array_ns.broadcast_to(f.ndarray, f.domain.shape) for f in fields], axis=new_domain.dim_index(dim, allow_missing=False), ), + # TODO allocator domain=new_domain, ) @@ -1076,8 +1116,16 @@ def __setitem__( index: common.AnyIndexSpec, value: common.Field | core_defs.NDArrayObject | core_defs.ScalarT, ) -> None: - # TODO(havogt): use something like `self.ndarray = self.ndarray.at(index).set(value)` - raise NotImplementedError("'__setitem__' for JaxArrayField not yet implemented.") + target_domain, target_slice = self._slice(index) + if isinstance(value, NdArrayField): + if not value.domain == target_domain: + raise ValueError( + f"Incompatible `Domain` in assignment. Source domain = {value.domain}, target domain = {target_domain}." + ) + value = value.ndarray + + assert hasattr(self._ndarray, "at") + object.__setattr__(self, "_ndarray", self._ndarray.at[target_slice].set(value)) common._field.register(jnp.ndarray, JaxArrayField.from_array) diff --git a/src/gt4py/next/ffront/decorator.py b/src/gt4py/next/ffront/decorator.py index 61756f30c9..a8a7c4e730 100644 --- a/src/gt4py/next/ffront/decorator.py +++ b/src/gt4py/next/ffront/decorator.py @@ -24,7 +24,7 @@ from gt4py._core import definitions as core_defs from gt4py.eve import extended_typing as xtyping from gt4py.next import ( - allocators as next_allocators, + _allocators as next_allocators, backend as next_backend, common, embedded as next_embedded, diff --git a/src/gt4py/next/program_processors/runners/dace.py b/src/gt4py/next/program_processors/runners/dace.py index 1b3b930818..45bc9908ef 100644 --- a/src/gt4py/next/program_processors/runners/dace.py +++ b/src/gt4py/next/program_processors/runners/dace.py @@ -9,7 +9,7 @@ import factory import gt4py._core.definitions as core_defs -import gt4py.next.allocators as next_allocators +import gt4py.next._allocators as next_allocators from gt4py.next import backend from gt4py.next.otf import workflow from gt4py.next.program_processors.runners.dace_fieldview import workflow as dace_fieldview_workflow diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py index 40d44f5ab0..edf510ca2e 100644 --- a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py +++ b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py @@ -16,7 +16,7 @@ import factory from gt4py._core import definitions as core_defs -from gt4py.next import allocators as gtx_allocators, common, config +from gt4py.next import _allocators as gtx_allocators, common, config from gt4py.next.iterator import ir as itir, transforms as itir_transforms from gt4py.next.otf import languages, recipes, stages, step_types, workflow from gt4py.next.otf.binding import interface diff --git a/src/gt4py/next/program_processors/runners/gtfn.py b/src/gt4py/next/program_processors/runners/gtfn.py index 55f479c665..2847540e6c 100644 --- a/src/gt4py/next/program_processors/runners/gtfn.py +++ b/src/gt4py/next/program_processors/runners/gtfn.py @@ -17,7 +17,7 @@ import filelock import gt4py._core.definitions as core_defs -import gt4py.next.allocators as next_allocators +import gt4py.next._allocators as next_allocators from gt4py.eve import utils from gt4py.eve.utils import content_hash from gt4py.next import backend, common, config diff --git a/src/gt4py/next/program_processors/runners/roundtrip.py b/src/gt4py/next/program_processors/runners/roundtrip.py index 1dd568b95a..57df1fe228 100644 --- a/src/gt4py/next/program_processors/runners/roundtrip.py +++ b/src/gt4py/next/program_processors/runners/roundtrip.py @@ -20,7 +20,7 @@ from gt4py.eve import codegen from gt4py.eve.codegen import FormatTemplate as as_fmt, MakoTemplate as as_mako -from gt4py.next import allocators as next_allocators, backend as next_backend, common, config +from gt4py.next import _allocators as next_allocators, backend as next_backend, common, config from gt4py.next.ffront import foast_to_gtir, foast_to_past, past_to_itir from gt4py.next.iterator import ir as itir, transforms as itir_transforms from gt4py.next.otf import stages, workflow diff --git a/src/gt4py/storage/allocators.py b/src/gt4py/storage/allocators.py index 298b9c2e5a..dbd3113a77 100644 --- a/src/gt4py/storage/allocators.py +++ b/src/gt4py/storage/allocators.py @@ -17,12 +17,10 @@ import types import numpy as np -import numpy.typing as npt from gt4py._core import definitions as core_defs from gt4py.eve import extended_typing as xtyping from gt4py.eve.extended_typing import ( - TYPE_CHECKING, Any, Callable, Generic, @@ -31,7 +29,6 @@ Protocol, Sequence, Tuple, - Type, TypeAlias, TypeGuard, Union, @@ -64,88 +61,6 @@ def is_valid_layout_map(value: Sequence[Any]) -> TypeGuard[BufferLayoutMap]: ) -@dataclasses.dataclass(frozen=True) -class TensorBuffer(Generic[core_defs.DeviceTypeT, core_defs.ScalarT]): - """ - N-dimensional (tensor-like) memory buffer. - - The actual class of the stored buffer and ndarray instances is - represented in the `NDBufferT` parameter and might be any n-dimensional - buffer-like class with a compatible buffer interface (e.g. NumPy - or CuPy `ndarray`.) - - Attributes: - buffer: Raw allocated buffer. - memory_address: Memory address of the buffer. - device: Device where the buffer is allocated. - dtype: Data type descriptor. - shape: Tuple with lengths of the corresponding tensor dimensions. - strides: Tuple with sizes (in bytes) of the steps in each dimension. - layout_map: Tuple with the order of the dimensions in the buffer - layout_map[i] = j means that the i-th dimension of the tensor - corresponds to the j-th dimension in the (C-layout) buffer. - byte_offset: Offset (in bytes) from the beginning of the buffer to - the first valid element. - byte_alignment: Alignment (in bytes) of the first valid element. - aligned_index: N-dimensional index of the first aligned element. - ndarray: N-dimensional tensor view of the allocated buffer. - """ - - buffer: _NDBuffer = dataclasses.field(hash=False) - memory_address: int - device: core_defs.Device[core_defs.DeviceTypeT] - dtype: core_defs.DType[core_defs.ScalarT] - shape: core_defs.TensorShape - strides: Tuple[int, ...] - layout_map: BufferLayoutMap - byte_offset: int - byte_alignment: int - aligned_index: Tuple[int, ...] - ndarray: core_defs.NDArrayObject = dataclasses.field(hash=False) - - @property - def ndim(self): - """Order of the tensor (`len(tensor_buffer.shape)`).""" - return len(self.shape) - - def __array__(self, dtype: Optional[npt.DTypeLike] = None, /) -> np.ndarray: - if not xtyping.supports_array(self.ndarray): - raise TypeError("Cannot export tensor buffer as NumPy array.") - - return self.ndarray.__array__(dtype) - - @property - def __array_interface__(self) -> dict[str, Any]: - if not xtyping.supports_array_interface(self.ndarray): - raise TypeError("Cannot export tensor buffer to NumPy array interface.") - - return self.ndarray.__array_interface__ - - @property - def __cuda_array_interface__(self) -> dict[str, Any]: - if not xtyping.supports_cuda_array_interface(self.ndarray): - raise TypeError("Cannot export tensor buffer to CUDA array interface.") - - return self.ndarray.__cuda_array_interface__ - - def __dlpack__(self, *, stream: Optional[int] = None) -> Any: - if not hasattr(self.ndarray, "__dlpack__"): - raise TypeError("Cannot export tensor buffer to DLPack.") - return self.ndarray.__dlpack__(stream=stream) # type: ignore[call-arg,arg-type] # stream is not always supported - - def __dlpack_device__(self) -> xtyping.DLPackDevice: - if not hasattr(self.ndarray, "__dlpack_device__"): - raise TypeError("Cannot extract DLPack device from tensor buffer.") - return self.ndarray.__dlpack_device__() - - -if TYPE_CHECKING: - # TensorBuffer should be compatible with all the expected buffer interfaces - __TensorBufferAsArrayInterfaceT: Type[xtyping.ArrayInterface] = TensorBuffer - __TensorBufferAsCUDAArrayInterfaceT: Type[xtyping.CUDAArrayInterface] = TensorBuffer - __TensorBufferAsDLPackBufferT: Type[xtyping.DLPackBuffer] = TensorBuffer - - class BufferAllocator(Protocol[core_defs.DeviceTypeT]): """Protocol for buffer allocators.""" @@ -160,9 +75,9 @@ def allocate( layout_map: BufferLayoutMap, byte_alignment: int, aligned_index: Optional[Sequence[int]] = None, - ) -> TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: + ) -> core_defs.MutableNDArrayObject: """ - Allocate a TensorBuffer with the given shape, layout and alignment settings. + Allocate an NDArrayObject with the given shape, layout and alignment settings. Args: shape: Tensor dimensions. @@ -194,7 +109,7 @@ def allocate( layout_map: BufferLayoutMap, byte_alignment: int, aligned_index: Optional[Sequence[int]] = None, - ) -> TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: + ) -> core_defs.MutableNDArrayObject: if not core_defs.is_valid_tensor_shape(shape): raise ValueError(f"Invalid shape {shape}") ndim = len(shape) @@ -254,24 +169,7 @@ def allocate( ) % byte_alignment byte_offset = (aligned_index_offset + allocation_mismatch_offset) % byte_alignment - # Create shaped view from buffer - ndarray = self.tensorize( - buffer, dtype, shape, padded_shape, item_size, strides, byte_offset - ) - - return TensorBuffer( - buffer=buffer, - memory_address=memory_address, - device=core_defs.Device(self.device_type, device_id), - dtype=dtype, - shape=shape, - strides=strides, - layout_map=layout_map, - byte_offset=byte_offset, - byte_alignment=byte_alignment, - aligned_index=aligned_index, - ndarray=ndarray, - ) + return self.tensorize(buffer, dtype, shape, padded_shape, item_size, strides, byte_offset) @property @abc.abstractmethod @@ -292,7 +190,8 @@ def tensorize( item_size: int, strides: Sequence[int], byte_offset: int, - ) -> core_defs.NDArrayObject: + ) -> core_defs.MutableNDArrayObject: + """Create shaped view from buffer.""" pass @@ -301,7 +200,7 @@ class ArrayUtils: array_ns: types.ModuleType empty: Callable[..., _NDBuffer] byte_bounds: Callable[[_NDBuffer], Tuple[int, int]] - as_strided: Callable[..., core_defs.NDArrayObject] + as_strided: Callable[..., core_defs.MutableNDArrayObject] numpy_array_utils = ArrayUtils( @@ -359,7 +258,7 @@ def tensorize( item_size: int, strides: Sequence[int], byte_offset: int, - ) -> core_defs.NDArrayObject: + ) -> core_defs.MutableNDArrayObject: aligned_buffer = buffer[byte_offset : byte_offset + math.prod(allocated_shape) * item_size] # type: ignore[index] # TODO(egparedes): should we extend `_NDBuffer`s to cover __getitem__? flat_ndarray = aligned_buffer.view(dtype=np.dtype(dtype)) tensor_view = self._array_utils.as_strided( diff --git a/src/gt4py/storage/cartesian/interface.py b/src/gt4py/storage/cartesian/interface.py index 8b38bcdd42..01f884a555 100644 --- a/src/gt4py/storage/cartesian/interface.py +++ b/src/gt4py/storage/cartesian/interface.py @@ -97,9 +97,7 @@ def empty( assert allocators.is_valid_layout_map(layout_map) dtype = np.dtype(dtype) - _, res = allocate_f(shape, layout_map, dtype, alignment * dtype.itemsize, aligned_index) - - return res + return allocate_f(shape, layout_map, dtype, alignment * dtype.itemsize, aligned_index) def ones( diff --git a/src/gt4py/storage/cartesian/utils.py b/src/gt4py/storage/cartesian/utils.py index 50500e536b..b94abb915c 100644 --- a/src/gt4py/storage/cartesian/utils.py +++ b/src/gt4py/storage/cartesian/utils.py @@ -248,9 +248,9 @@ def allocate_cpu( dtype: DTypeLike, alignment_bytes: int, aligned_index: Optional[Sequence[int]], -) -> Tuple[allocators._NDBuffer, np.ndarray]: +) -> np.ndarray: device = core_defs.Device(core_defs.DeviceType.CPU, 0) - buffer = _CPUBufferAllocator.allocate( + ndarray = _CPUBufferAllocator.allocate( shape, core_defs.dtype(dtype), device_id=device.device_id, @@ -258,7 +258,7 @@ def allocate_cpu( byte_alignment=alignment_bytes, aligned_index=aligned_index, ) - return buffer.buffer, cast(np.ndarray, buffer.ndarray) + return cast(np.ndarray, ndarray) def _allocate_gpu( @@ -267,14 +267,14 @@ def _allocate_gpu( dtype: DTypeLike, alignment_bytes: int, aligned_index: Optional[Sequence[int]], -) -> Tuple["cp.ndarray", "cp.ndarray"]: +) -> "cp.ndarray": assert cp is not None assert _GPUBufferAllocator is not None, "GPU allocation library or device not found" device = core_defs.Device( # type: ignore[type-var] (core_defs.DeviceType.ROCM if gt_config.GT4PY_USE_HIP else core_defs.DeviceType.CUDA), 0, ) - buffer = _GPUBufferAllocator.allocate( + ndarray = _GPUBufferAllocator.allocate( shape, core_defs.dtype(dtype), device_id=device.device_id, @@ -283,9 +283,7 @@ def _allocate_gpu( aligned_index=aligned_index, ) - buffer_ndarray = cast("cp.ndarray", buffer.ndarray) - - return buffer.buffer, buffer_ndarray + return cast("cp.ndarray", ndarray) allocate_gpu = _allocate_gpu @@ -321,8 +319,8 @@ def _allocate_gpu_rocm( dtype: DTypeLike, alignment_bytes: int, aligned_index: Optional[Sequence[int]], - ) -> Tuple["cp.ndarray", "cp.ndarray"]: - buffer, ndarray = _allocate_gpu(shape, layout_map, dtype, alignment_bytes, aligned_index) - return buffer, CUDAArrayInterfaceNDArray(ndarray) + ) -> "cp.ndarray": + ndarray = _allocate_gpu(shape, layout_map, dtype, alignment_bytes, aligned_index) + return CUDAArrayInterfaceNDArray(ndarray) allocate_gpu = _allocate_gpu_rocm diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py index d7413f32d7..85acf23300 100644 --- a/tests/next_tests/definitions.py +++ b/tests/next_tests/definitions.py @@ -14,13 +14,21 @@ import pytest -from gt4py.next import allocators as next_allocators +from gt4py.next import _allocators as next_allocators # Skip definitions XFAIL = pytest.xfail SKIP = pytest.skip +try: + import jax + import jax.numpy as jnp + + jax.config.update("jax_enable_x64", True) +except ImportError: + jnp = None + # Program processors class _PythonObjectIdMixin: @@ -56,13 +64,19 @@ class EmbeddedDummyBackend: allocator: next_allocators.FieldBufferAllocatorProtocol -numpy_execution = EmbeddedDummyBackend(next_allocators.StandardCPUFieldBufferAllocator()) +import numpy as np + + +# numpy_execution = EmbeddedDummyBackend(next_allocators.StandardCPUFieldBufferAllocator()) +numpy_execution = EmbeddedDummyBackend(np) cupy_execution = EmbeddedDummyBackend(next_allocators.StandardGPUFieldBufferAllocator()) +jax_execution = EmbeddedDummyBackend(jnp) class EmbeddedIds(_PythonObjectIdMixin, str, enum.Enum): NUMPY_EXECUTION = "next_tests.definitions.numpy_execution" CUPY_EXECUTION = "next_tests.definitions.cupy_execution" + JAX_EXECUTION = "next_tests.definitions.jax_execution" class OptionalProgramBackendId(_PythonObjectIdMixin, str, enum.Enum): @@ -112,6 +126,7 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): USES_MAX_OVER = "uses_max_over" USES_MESH_WITH_SKIP_VALUES = "uses_mesh_with_skip_values" USES_SCALAR_IN_DOMAIN_AND_FO = "uses_scalar_in_domain_and_fo" +SLICES_OUT_ARGUMENT = "slices_out_argument" CHECKS_SPECIFIC_ERROR = "checks_specific_error" # Skip messages (available format keys: 'marker', 'backend') @@ -147,6 +162,9 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): UNSUPPORTED_MESSAGE, ), # we can't extract the field type from scan args ] +JAX_SKIP_LIST = EMBEDDED_SKIP_LIST + [ + (SLICES_OUT_ARGUMENT, XFAIL, UNSUPPORTED_MESSAGE), +] ROUNDTRIP_SKIP_LIST = DOMAIN_INFERENCE_SKIP_LIST + [ (USES_SPARSE_FIELDS_AS_OUTPUT, XFAIL, UNSUPPORTED_MESSAGE), ] @@ -169,6 +187,7 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum): BACKEND_SKIP_TEST_MATRIX = { EmbeddedIds.NUMPY_EXECUTION: EMBEDDED_SKIP_LIST, EmbeddedIds.CUPY_EXECUTION: EMBEDDED_SKIP_LIST, + EmbeddedIds.JAX_EXECUTION: JAX_SKIP_LIST, OptionalProgramBackendId.DACE_CPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.DACE_GPU: DACE_SKIP_TEST_LIST, OptionalProgramBackendId.DACE_CPU_NO_OPT: DACE_SKIP_TEST_LIST, diff --git a/tests/next_tests/integration_tests/cases.py b/tests/next_tests/integration_tests/cases.py index 759cd1cf1f..9faff154da 100644 --- a/tests/next_tests/integration_tests/cases.py +++ b/tests/next_tests/integration_tests/cases.py @@ -23,11 +23,12 @@ from gt4py.eve import extended_typing as xtyping from gt4py.eve.extended_typing import Self from gt4py.next import ( - allocators as next_allocators, + _allocators as next_allocators, backend as next_backend, common, constructors, field_utils, + utils as gt_utils, ) from gt4py.next.ffront import decorator from gt4py.next.type_system import type_specifications as ts, type_translation @@ -55,7 +56,6 @@ mesh_descriptor, ) -from gt4py.next import utils as gt_utils # mypy does not accept [IDim, ...] as a type diff --git a/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py b/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py index 08904c06f3..a5ce949e04 100644 --- a/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py +++ b/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py @@ -6,11 +6,13 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause +from typing import Optional + import numpy as np import pytest import gt4py.next as gtx -from gt4py.next import allocators as gtx_allocators, common as gtx_common +from gt4py.next import _allocators as gtx_allocators, common as gtx_common from next_tests.integration_tests import cases from next_tests.integration_tests.cases import cartesian_case, unstructured_case diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py index 1147f4bc3e..c83c433545 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py @@ -7,14 +7,14 @@ # SPDX-License-Identifier: BSD-3-Clause import types -from typing import Any, Protocol, TypeVar +from typing import Protocol, TypeVar import numpy as np import pytest import gt4py.next as gtx from gt4py._core import definitions as core_defs -from gt4py.next import backend as next_backend, common, allocators as next_allocators +from gt4py.next import _allocators as next_allocators, backend as next_backend, common from gt4py.next.ffront import decorator import next_tests @@ -57,6 +57,9 @@ def __gt_allocator__( pytest.param( next_tests.definitions.EmbeddedIds.CUPY_EXECUTION, marks=pytest.mark.requires_gpu ), + pytest.param( + next_tests.definitions.EmbeddedIds.JAX_EXECUTION, marks=pytest.mark.requires_jax + ), pytest.param( next_tests.definitions.OptionalProgramBackendId.DACE_CPU, marks=pytest.mark.requires_dace, diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py index f1cb8ffb17..57607fefc3 100644 --- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py +++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py @@ -53,6 +53,7 @@ def test_identity_fo_execution(cartesian_case, identity_def): @pytest.mark.uses_cartesian_shift +@pytest.mark.slices_out_argument def test_shift_by_one_execution(cartesian_case): @gtx.field_operator def shift_by_one(in_field: cases.IFloatField) -> cases.IFloatField: @@ -95,6 +96,7 @@ def test_double_copy_execution(cartesian_case, double_copy_program_def): ) +@pytest.mark.slices_out_argument def test_copy_restricted_execution(cartesian_case, copy_restrict_program_def): copy_restrict_program = gtx.program(copy_restrict_program_def, backend=cartesian_case.backend) @@ -154,6 +156,7 @@ def prog( assert np.allclose((a.asnumpy(), b.asnumpy()), (out_a.asnumpy(), out_b.asnumpy())) +@pytest.mark.slices_out_argument def test_tuple_program_return_constructed_inside_with_slicing(cartesian_case): @gtx.field_operator def pack_tuple( diff --git a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_ffront_fvm_nabla.py b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_ffront_fvm_nabla.py index 6c6ca7e4bc..5c914ecede 100644 --- a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_ffront_fvm_nabla.py +++ b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_ffront_fvm_nabla.py @@ -11,10 +11,11 @@ import numpy as np import pytest + pytest.importorskip("atlas4py") from gt4py import next as gtx -from gt4py.next import allocators, neighbor_sum +from gt4py.next import _allocators, neighbor_sum from gt4py.next.iterator import atlas_utils from next_tests.integration_tests.feature_tests.ffront_tests.ffront_test_utils import ( diff --git a/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py b/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py index 9dde5bb40a..af5dd03102 100644 --- a/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py +++ b/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py @@ -6,6 +6,7 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause +import copy import math import operator from typing import Callable, Iterable, Optional @@ -261,6 +262,13 @@ def test_as_scalar(nd_array_implementation): assert isinstance(result, np.float32) +@pytest.mark.parametrize("copy", [copy.copy, copy.deepcopy]) +def test_copy(copy, nd_array_implementation): + testee = _make_field_or_scalar([[0, 1], [2, 3]], nd_array_implementation) + result = copy(testee) + assert np.array_equal(testee.ndarray, result.ndarray) + + def product_nd_array_implementation_params(): for xp1 in nd_array_field._nd_array_implementations: for xp2 in nd_array_field._nd_array_implementations: diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py index 62d88d9f0a..4ef69ec429 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py @@ -173,13 +173,13 @@ def verify_testee(offset_provider): ) mock_fast_call.assert_called_once() - if gtx.allocators.is_field_allocator_for( + if gtx._allocators.is_field_allocator_for( unstructured_case.backend.allocator, core_defs.DeviceType.CPU ): offset_provider = unstructured_case.offset_provider else: - assert gtx.allocators.is_field_allocator_for( - unstructured_case.backend.allocator, gtx.allocators.CUPY_DEVICE + assert gtx._allocators.is_field_allocator_for( + unstructured_case.backend.allocator, gtx._allocators.CUPY_DEVICE ) import cupy as cp diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py index 3d82dd8ee5..3df9d11015 100644 --- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py +++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py @@ -20,7 +20,7 @@ """ import gt4py._core.definitions as core_defs -from gt4py.next import allocators, config +from gt4py.next import _allocators, config from gt4py.next.iterator import transforms from gt4py.next.iterator.transforms import global_tmps from gt4py.next.otf import workflow @@ -40,8 +40,8 @@ def test_backend_factory_trait_device(): assert cpu_version.executor.decoration.keywords["device"] is core_defs.DeviceType.CPU assert gpu_version.executor.decoration.keywords["device"] is core_defs.DeviceType.CUDA - assert allocators.is_field_allocator_for(cpu_version.allocator, core_defs.DeviceType.CPU) - assert allocators.is_field_allocator_for(gpu_version.allocator, core_defs.DeviceType.CUDA) + assert _allocators.is_field_allocator_for(cpu_version.allocator, core_defs.DeviceType.CPU) + assert _allocators.is_field_allocator_for(gpu_version.allocator, core_defs.DeviceType.CUDA) def test_backend_factory_trait_cached(): diff --git a/tests/next_tests/unit_tests/test_allocators.py b/tests/next_tests/unit_tests/test_allocators.py index d3001779e1..1aeff1d3ec 100644 --- a/tests/next_tests/unit_tests/test_allocators.py +++ b/tests/next_tests/unit_tests/test_allocators.py @@ -12,7 +12,7 @@ import pytest import gt4py._core.definitions as core_defs -import gt4py.next.allocators as next_allocators +import gt4py.next._allocators as next_allocators import gt4py.next.common as common import gt4py.storage.allocators as core_allocators @@ -26,7 +26,7 @@ def __gt_allocate__( dtype: core_defs.DType[core_defs.ScalarT], device_id: int = 0, aligned_index: Optional[Sequence[common.NamedIndex]] = None, - ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: + ) -> core_defs.NDArrayObject: pass @@ -108,7 +108,7 @@ def test_get_allocator(): def test_horizontal_first_layout_mapper(): - from gt4py.next.allocators import horizontal_first_layout_mapper + from gt4py.next._allocators import horizontal_first_layout_mapper # Test with only horizontal dimensions dims = [ @@ -152,7 +152,7 @@ def test_allocate(self): def test_allocate(): - from gt4py.next.allocators import StandardCPUFieldBufferAllocator, allocate + from gt4py.next._allocators import StandardCPUFieldBufferAllocator, allocate I = common.Dimension("I") J = common.Dimension("J") @@ -161,27 +161,25 @@ def test_allocate(): # Test with a explicit field allocator allocator = StandardCPUFieldBufferAllocator() - tensor_buffer = allocate(domain, dtype, allocator=allocator) + tensor_buffer = allocate(domain=domain, dtype=dtype, allocator=allocator) assert tensor_buffer.shape == domain.shape assert tensor_buffer.dtype == dtype - assert tensor_buffer.device == core_defs.Device(core_defs.DeviceType.CPU, 0) # Test with a device device = core_defs.Device(core_defs.DeviceType.CPU, 0) - tensor_buffer = allocate(domain, dtype, device=device) + tensor_buffer = allocate(domain=domain, dtype=dtype, device=device) assert tensor_buffer.shape == domain.shape assert tensor_buffer.dtype == dtype - assert tensor_buffer.device == core_defs.Device(core_defs.DeviceType.CPU, 0) # Test with both allocator and device with pytest.raises(ValueError, match="are incompatible"): allocate( - domain, - dtype, + domain=domain, + dtype=dtype, allocator=allocator, device=core_defs.Device(core_defs.DeviceType.CUDA, 0), ) # Test with no device or allocator with pytest.raises(ValueError, match="No 'device' or 'allocator' specified"): - allocate(domain, dtype) + allocate(domain=domain, dtype=dtype) diff --git a/tests/next_tests/unit_tests/test_constructors.py b/tests/next_tests/unit_tests/test_constructors.py index 0998ab8eab..7e592a687d 100644 --- a/tests/next_tests/unit_tests/test_constructors.py +++ b/tests/next_tests/unit_tests/test_constructors.py @@ -6,12 +6,28 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause +import copy +from types import ModuleType +from typing import Any + import numpy as np + + +try: + import cupy as cp +except ImportError: + cp = None + +try: + import jax.numpy as jnp +except ImportError: + jnp = None + import pytest from gt4py import next as gtx from gt4py._core import definitions as core_defs -from gt4py.next import allocators as next_allocators, common +from gt4py.next import _allocators as next_allocators, common I = gtx.Dimension("I") @@ -21,16 +37,49 @@ sizes = {I: 10, J: 10, K: 10} -# TODO: parametrize with gpu backend and compare with cupy array -@pytest.mark.parametrize( - "allocator, device", - [ - [next_allocators.StandardCPUFieldBufferAllocator(), None], - [None, core_defs.Device(core_defs.DeviceType.CPU, 0)], - ], -) -def test_empty(allocator, device): - ref = np.empty([sizes[I], sizes[J]]).astype(gtx.float32) +def _pretty_print(val): + if val is None: + return "None" + if isinstance(val, ModuleType): + return val.__name__ + return val.__class__.__name__ + + +def _pretty_print_allocator_device_namespace(val: tuple[Any, Any, Any]): + return f"allocator={_pretty_print(val[0])}-device={_pretty_print(val[1])}-ref_namespace={_pretty_print(val[2])}" + + +def allocator_device_refnamespace_params(): + for v in [ + [next_allocators.StandardCPUFieldBufferAllocator(), None, np], + [None, core_defs.Device(core_defs.DeviceType.CPU, 0), np], + [np, None, np], + ]: + yield pytest.param( + v, + id=_pretty_print_allocator_device_namespace(v), + ) + for v in [ + [next_allocators.StandardGPUFieldBufferAllocator(), None, cp], + [None, core_defs.Device(core_defs.DeviceType.CUDA, 0), cp], # TODO CUDA or HIP... + ]: + yield pytest.param( + v, id=_pretty_print_allocator_device_namespace(v), marks=pytest.mark.requires_gpu + ) + for v in [[jnp, None, jnp]]: + yield pytest.param( + v, id=_pretty_print_allocator_device_namespace(v), marks=pytest.mark.requires_jax + ) + + +@pytest.fixture(params=allocator_device_refnamespace_params()) +def allocator_device_refnamespace(request): + return request.param + + +def test_empty(allocator_device_refnamespace): + allocator, device, xp = allocator_device_refnamespace + ref = xp.empty([sizes[I], sizes[J]]).astype(gtx.float32) a = gtx.empty( domain={I: range(sizes[I]), J: range(sizes[J])}, dtype=core_defs.dtype(np.float32), @@ -40,15 +89,8 @@ def test_empty(allocator, device): assert a.shape == ref.shape -# TODO: parametrize with gpu backend and compare with cupy array -@pytest.mark.parametrize( - "allocator, device", - [ - [next_allocators.StandardCPUFieldBufferAllocator(), None], - [None, core_defs.Device(core_defs.DeviceType.CPU, 0)], - ], -) -def test_zeros(allocator, device): +def test_zeros(allocator_device_refnamespace): + allocator, device, xp = allocator_device_refnamespace a = gtx.zeros( common.Domain( dims=(I, J), ranges=(common.UnitRange(0, sizes[I]), common.UnitRange(0, sizes[J])) @@ -57,40 +99,26 @@ def test_zeros(allocator, device): allocator=allocator, device=device, ) - ref = np.zeros((sizes[I], sizes[J])).astype(gtx.float32) + ref = xp.zeros((sizes[I], sizes[J])).astype(gtx.float32) - assert np.array_equal(a.ndarray, ref) + assert xp.array_equal(a.ndarray, ref) -# TODO: parametrize with gpu backend and compare with cupy array -@pytest.mark.parametrize( - "allocator, device", - [ - [next_allocators.StandardCPUFieldBufferAllocator(), None], - [None, core_defs.Device(core_defs.DeviceType.CPU, 0)], - ], -) -def test_ones(allocator, device): +def test_ones(allocator_device_refnamespace): + allocator, device, xp = allocator_device_refnamespace a = gtx.ones( common.Domain(dims=(I, J), ranges=(common.UnitRange(0, 10), common.UnitRange(0, 10))), dtype=core_defs.dtype(np.float32), allocator=allocator, device=device, ) - ref = np.ones((sizes[I], sizes[J])).astype(gtx.float32) + ref = xp.ones((sizes[I], sizes[J])).astype(gtx.float32) - assert np.array_equal(a.ndarray, ref) + assert xp.array_equal(a.ndarray, ref) -# TODO: parametrize with gpu backend and compare with cupy array -@pytest.mark.parametrize( - "allocator, device", - [ - [next_allocators.StandardCPUFieldBufferAllocator(), None], - [None, core_defs.Device(core_defs.DeviceType.CPU, 0)], - ], -) -def test_full(allocator, device): +def test_full(allocator_device_refnamespace): + allocator, device, xp = allocator_device_refnamespace a = gtx.full( domain={I: range(sizes[I] - 2), J: (sizes[J] - 2)}, fill_value=42.0, @@ -98,9 +126,21 @@ def test_full(allocator, device): allocator=allocator, device=device, ) - ref = np.full((sizes[I] - 2, sizes[J] - 2), 42.0).astype(gtx.float32) + ref = xp.full((sizes[I] - 2, sizes[J] - 2), 42.0).astype(gtx.float32) + + assert xp.array_equal(a.ndarray, ref) - assert np.array_equal(a.ndarray, ref) + +def test_copy(): + """Ensure data AND layout is preserved.""" + + testee = gtx.as_field([I, J], np.random.rand(sizes[I], sizes[J])) + result = copy.deepcopy(testee) + assert testee.ndarray.strides == result.ndarray.strides + assert ( + result.ndarray.strides != result.ndarray.copy().strides + ) # sanity check for this test, make sure our allocator don't have C-contiguous strides + assert np.array_equal(testee.ndarray, result.ndarray) def test_as_field(): @@ -144,9 +184,6 @@ def test_field_wrong_origin(): with pytest.raises(ValueError, match=(r"Origin keys {'J'} not in domain")): gtx.as_field([I], np.random.rand(sizes[I]).astype(gtx.float32), origin={"J": 0}) - with pytest.raises(ValueError, match=(r"Cannot specify origin for domain I")): - gtx.as_field("I", np.random.rand(sizes[J]).astype(gtx.float32), origin={"J": 0}) - @pytest.mark.xfail(reason="aligned_index not supported yet") def test_aligned_index(): diff --git a/tests/storage_tests/unit_tests/test_interface.py b/tests/storage_tests/unit_tests/test_interface.py index ba7bc2aaef..038b09aa4c 100644 --- a/tests/storage_tests/unit_tests/test_interface.py +++ b/tests/storage_tests/unit_tests/test_interface.py @@ -121,16 +121,7 @@ def test_allocate_cpu(param_dict): shape = param_dict["shape"] layout_map = param_dict["layout_order"] - raw_buffer, field = allocate_cpu(shape, layout_map, dtype, alignment_bytes, aligned_index) - - # check that memory of field is contained in raw_buffer - np_byte_bounds = ( - np.byte_bounds if hasattr(np, "byte_bounds") else np.lib.array_utils.byte_bounds - ) - assert ( - np_byte_bounds(field)[0] >= np_byte_bounds(raw_buffer)[0] - and np_byte_bounds(field)[1] <= np_byte_bounds(raw_buffer)[1] - ) + field = allocate_cpu(shape, layout_map, dtype, alignment_bytes, aligned_index) # check if the first compute-domain point in the last dimension is aligned for 100 random "columns" import random @@ -185,17 +176,7 @@ def test_allocate_gpu(param_dict): aligned_index = param_dict["aligned_index"] shape = param_dict["shape"] layout_map = param_dict["layout_order"] - device_raw_buffer, device_field = allocate_gpu( - shape, layout_map, dtype, alignment_bytes, aligned_index - ) - - # Would like to check device_field.base against device_raw_buffer but - # as_strided returns an ndarray where device_field.base is set to None. - # Instead, check that the memory of field is contained in raws buffer - assert ( - device_field.data.ptr >= device_raw_buffer.data.ptr - and device_field[-1:].data.ptr <= device_raw_buffer[-1:].data.ptr - ) + device_field = allocate_gpu(shape, layout_map, dtype, alignment_bytes, aligned_index) # check if the first compute-domain point in the last dimension is aligned for 100 random "columns" import random