diff --git a/pyproject.toml b/pyproject.toml
index e859c9b4f7..8b91890d9b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ classifiers = [
   'Topic :: Scientific/Engineering :: Physics'
 ]
 dependencies = [
+  "array-api-compat>=1.9.1;python_version>='3.10'",
   "astunparse>=1.6.3;python_version<'3.9'",
   'attrs>=21.3',
   'black>=22.3',
@@ -237,8 +238,9 @@ module = 'gt4py.next.iterator.runtime'
 [tool.pytest.ini_options]
 markers = [
   'all: special marker that skips all tests',
-  'requires_atlas: tests that require `atlas4py` bindings package',
-  'requires_dace: tests that require `dace` package',
+  'requires_atlas: tests that require the `atlas4py` bindings package',
+  'requires_dace: tests that require the `dace` package',
+  'requires_jax: tests that require the `jax` package',
   'requires_gpu: tests that require a NVidia GPU (`cupy` and `cudatoolkit` are required)',
   'uses_applied_shifts: tests that require backend support for applied-shifts',
   'uses_constant_fields: tests that require backend support for constant fields',
@@ -264,6 +266,7 @@ markers = [
   'uses_unstructured_shift: tests that use a unstructured connectivity',
   'uses_max_over: tests that use the max_over builtin',
   'uses_mesh_with_skip_values: tests that use a mesh with skip values',
+  'slices_out_argument: tests that slice the out argument in a field_operator call',
   'checks_specific_error: tests that rely on the backend to produce a specific error message'
 ]
 norecursedirs = ['dist', 'build', 'cpp_backend_tests/build*', '_local/*', '.*']
diff --git a/src/gt4py/_core/definitions.py b/src/gt4py/_core/definitions.py
index 8f62788b8f..f6250b1b60 100644
--- a/src/gt4py/_core/definitions.py
+++ b/src/gt4py/_core/definitions.py
@@ -27,6 +27,7 @@
     Iterator,
     Literal,
     Protocol,
+    Self,
     Sequence,
     Tuple,
     Type,
@@ -405,6 +406,7 @@ class DeviceType(enum.IntEnum):
     MetalDeviceTyping,
     VPIDeviceTyping,
     ROCMDeviceTyping,
+    covariant=True,
 )
 
 
@@ -454,7 +456,7 @@ def astype(self, dtype: npt.DTypeLike) -> NDArrayObject: ...
 
     def any(self) -> bool: ...
 
-    def __getitem__(self, item: Any) -> NDArrayObject: ...
+    def __getitem__(self, item: Any) -> Self: ...
 
     def __abs__(self) -> NDArrayObject: ...
 
@@ -505,3 +507,49 @@ def __and__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ...
     def __or__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ...
 
     def __xor__(self, other: NDArrayObject | Scalar) -> NDArrayObject: ...
+
+
+class MutableNDArrayObject(NDArrayObject, Protocol):
+    def __setitem__(self, index: Any, value: Any) -> None: ...
+
+
+class ArrayApiNamespace(Protocol):
+    def empty(self, shape: Sequence[int], *, dtype: Any = None, device: Any = None) -> Any: ...
+    def zeros(self, shape: Sequence[int], *, dtype: Any = None, device: Any = None) -> Any: ...
+    def ones(self, shape: Sequence[int], *, dtype: Any = None, device: Any = None) -> Any: ...
+    def full(
+        self, shape: Sequence[int], fill_value: Scalar, *, dtype: Any = None, device: Any = None
+    ) -> Any: ...
+    def asarray(self, obj: Any, *, dtype: Any = None, copy: Any = None) -> Any: ...
+
+    # @property # once all relevant implementations have this attribute
+    # def __array_api_version__(self) -> str: ... # noqa: ERA001
+
+    # TODO(havogt): add relevant methods and attributes or wait for the standard to provide it, see e.g. https://github.com/data-apis/array-api/issues/697
+
+
+def is_array_api_namespace(obj: Any) -> TypeGuard[ArrayApiNamespace]:
+    # return hasattr(obj, "__array_api_version__") # noqa: ERA001 # once all relevant implementations have this attribute
+    return (
+        hasattr(obj, "empty")
+        and hasattr(obj, "zeros")
+        and hasattr(obj, "ones")
+        and hasattr(obj, "full")
+        and hasattr(obj, "asarray")
+    )
+
+
+def to_array_api_dtype(xp: ArrayApiNamespace, dtype_: DTypeLike | None) -> Any:
+    """
+    Converts a GT4Py `DTypeLike` to the dtype object of the given Array API namespace.
+
+    Note: For convenience `None` is passed-through as it has a consistent meaning in all Array API implementations.
+    """
+    if dtype_ is None:
+        return None
+    else:
+        dtype_ = dtype(dtype_)
+        assert (
+            dtype_.tensor_shape == ()
+        )  # TODO(havogt): support tensor shapes (or remove from our DType)
+        return getattr(xp, dtype_.scalar_type.__name__)
diff --git a/src/gt4py/next/allocators.py b/src/gt4py/next/_allocators.py
similarity index 50%
rename from src/gt4py/next/allocators.py
rename to src/gt4py/next/_allocators.py
index 864f8c1b09..184785246a 100644
--- a/src/gt4py/next/allocators.py
+++ b/src/gt4py/next/_allocators.py
@@ -10,6 +10,8 @@
 import dataclasses
 import functools
 
+import array_api_compat
+
 import gt4py._core.definitions as core_defs
 import gt4py.next.common as common
 import gt4py.storage.allocators as core_allocators
@@ -41,6 +43,63 @@
 )
 
 
+class GTArrayAllocationNamespace(Protocol):
+    """
+    Standard Array API-like construction functions based on `domain` instead of `shape`.
+
+    The reason to use `domain` is:
+    - we need the `Dimension`s for getting the desired ordering of strides
+    - `aligned_index` refers to a point in the domain (absolute position), not relative to the array shape
+    """
+
+    # Notes:
+    # - this concept could be evolved to a more general `GTArrayNamespace` that adds all array functions that we use in embedded
+    # - maybe for advanced indexing use-case: extend the namespace with standard compliant fallback functions.
+
+    def empty(
+        self,
+        domain: common.DomainLike,
+        *,
+        dtype: Optional[core_defs.DTypeLike] = None,
+        aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+    ) -> core_defs.NDArrayObject: ...
+
+    def zeros(
+        self,
+        domain: common.DomainLike,
+        *,
+        dtype: Optional[core_defs.DTypeLike] = None,
+        aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+    ) -> core_defs.NDArrayObject: ...
+
+    def ones(
+        self,
+        domain: common.DomainLike,
+        *,
+        dtype: Optional[core_defs.DTypeLike] = None,
+        aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+    ) -> core_defs.NDArrayObject: ...
+
+    def full(
+        self,
+        domain: common.DomainLike,
+        fill_value: core_defs.Scalar,
+        *,
+        dtype: Optional[core_defs.DTypeLike] = None,
+        aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+    ) -> core_defs.NDArrayObject: ...
+
+    def asarray(
+        self,
+        data: core_defs.NDArrayObject,
+        *,
+        domain: common.DomainLike,
+        dtype: Optional[core_defs.DTypeLike] = None,
+        copy: Optional[bool] = None,
+        aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+    ) -> core_defs.NDArrayObject: ...
+
+
 FieldLayoutMapper: TypeAlias = Callable[
     [Sequence[common.Dimension]], core_allocators.BufferLayoutMap
 ]
@@ -60,7 +119,7 @@ def __gt_allocate__(
         dtype: core_defs.DType[core_defs.ScalarT],
         device_id: int = 0,
         aligned_index: Optional[Sequence[common.NamedIndex]] = None,  # absolute position
-    ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]: ...
+    ) -> core_defs.MutableNDArrayObject: ...
 
 
 def is_field_allocator(obj: Any) -> TypeGuard[FieldBufferAllocatorProtocol]:
@@ -160,7 +219,7 @@ def __gt_allocate__(
         dtype: core_defs.DType[core_defs.ScalarT],
         device_id: int = 0,
         aligned_index: Optional[Sequence[common.NamedIndex]] = None,  # absolute position
-    ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]:
+    ) -> core_defs.MutableNDArrayObject:
         shape = domain.shape
         layout_map = self.layout_mapper(domain.dims)
         # TODO(egparedes): add support for non-empty aligned index values
@@ -242,7 +301,7 @@ def __gt_allocate__(
         dtype: core_defs.DType[core_defs.ScalarT],
         device_id: int = 0,
         aligned_index: Optional[Sequence[common.NamedIndex]] = None,  # absolute position
-    ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]:
+    ) -> core_defs.MutableNDArrayObject:
         raise self.exception
 
 
@@ -293,15 +352,16 @@ def __init__(self) -> None:
 
 
 def allocate(
-    domain: common.DomainLike,
-    dtype: core_defs.DType[core_defs.ScalarT],
     *,
+    domain: common.Domain,
+    dtype: core_defs.DType[core_defs.ScalarT],
+    allocator: FieldBufferAllocatorProtocol,
+    device: core_defs.Device,
     aligned_index: Optional[Sequence[common.NamedIndex]] = None,
-    allocator: Optional[FieldBufferAllocationUtil] = None,
-    device: Optional[core_defs.Device] = None,
-) -> core_allocators.TensorBuffer:
+) -> core_defs.MutableNDArrayObject:
     """
-    Allocate a TensorBuffer for the given domain and device or allocator.
+    TODO: docstring
+    Allocate an NDArrayObject for the given domain and device or allocator.
 
     The arguments `device` and `allocator` are mutually exclusive.
     If `device` is specified, the corresponding default allocator
@@ -323,20 +383,215 @@ def allocate(
             If illegal or inconsistent arguments are specified.
 
     """
-    if device is None and allocator is None:
-        raise ValueError("No 'device' or 'allocator' specified.")
-    actual_allocator = get_allocator(allocator)
-    if actual_allocator is None:
-        assert device is not None  # for mypy
-        actual_allocator = device_allocators[device.device_type]
-    elif device is None:
-        device = core_defs.Device(actual_allocator.__gt_device_type__, 0)
-    elif device.device_type != actual_allocator.__gt_device_type__:
-        raise ValueError(f"Device '{device}' and allocator '{actual_allocator}' are incompatible.")
 
-    return actual_allocator.__gt_allocate__(
-        domain=common.domain(domain),
+    return allocator.__gt_allocate__(
+        domain=domain,
         dtype=dtype,
         device_id=device.device_id,
         aligned_index=aligned_index,
     )
+
+
+def _check_unsupported_device_and_aligned_index(
+    device: Optional[core_defs.Device], aligned_index: Optional[Sequence[common.NamedIndex]]
+) -> None:
+    if aligned_index is not None:
+        raise NotImplementedError("Aligned index is not support for Array API namespaces.")
+    if device is not None:
+        # TODO(havogt): this requires to translate our device object to the concrete Array API implementation's device object
+        raise NotImplementedError("Device specification is not yet supported.")
+
+
+def _get_actual_allocator_and_device(
+    allocator: Optional[FieldBufferAllocationUtil], device: Optional[core_defs.Device]
+) -> tuple[FieldBufferAllocatorProtocol, core_defs.Device]:
+    if allocator is None and device is not None:
+        return device_allocators[device.device_type], device
+
+    actual_allocator = get_allocator(allocator, default=device_allocators[core_defs.DeviceType.CPU])
+    assert actual_allocator is not None
+    if device is None:
+        device = core_defs.Device(actual_allocator.__gt_device_type__, 0)
+    elif device.device_type != actual_allocator.__gt_device_type__:
+        raise ValueError(f"Device '{device}' and allocator '{actual_allocator}' are incompatible.")
+    return actual_allocator, device
+
+
+def get_array_allocation_namespace(
+    allocator: Optional[FieldBufferAllocationUtil | core_defs.ArrayApiNamespace],
+    device: Optional[core_defs.Device] = None,
+) -> GTArrayAllocationNamespace:
+    if core_defs.is_array_api_namespace(allocator):
+        assert core_defs.is_array_api_namespace(allocator)
+        array_ns = array_api_compat.array_namespace(allocator.empty([0]))
+
+        class _ArrayNamespaceWrapper:
+            @staticmethod
+            def empty(
+                domain: common.DomainLike,
+                *,
+                dtype: Optional[core_defs.DTypeLike] = None,
+                aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+            ) -> core_defs.NDArrayObject:
+                _check_unsupported_device_and_aligned_index(device, aligned_index)
+                return array_ns.empty(
+                    shape=common.domain(domain).shape,
+                    dtype=core_defs.to_array_api_dtype(array_ns, dtype),
+                )
+
+            @staticmethod
+            def zeros(
+                domain: common.DomainLike,
+                *,
+                dtype: Optional[core_defs.DTypeLike] = None,
+                aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+            ) -> core_defs.NDArrayObject:
+                _check_unsupported_device_and_aligned_index(device, aligned_index)
+                return array_ns.zeros(
+                    shape=common.domain(domain).shape,
+                    dtype=core_defs.to_array_api_dtype(array_ns, dtype),
+                )
+
+            @staticmethod
+            def ones(
+                domain: common.DomainLike,
+                *,
+                dtype: Optional[core_defs.DTypeLike] = None,
+                aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+            ) -> core_defs.NDArrayObject:
+                _check_unsupported_device_and_aligned_index(device, aligned_index)
+                return array_ns.ones(
+                    shape=common.domain(domain).shape,
+                    dtype=core_defs.to_array_api_dtype(array_ns, dtype),
+                )
+
+            @staticmethod
+            def full(
+                domain: common.DomainLike,
+                fill_value: core_defs.Scalar,
+                *,
+                dtype: Optional[core_defs.DTypeLike] = None,
+                aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+            ) -> core_defs.NDArrayObject:
+                _check_unsupported_device_and_aligned_index(device, aligned_index)
+                return array_ns.full(
+                    shape=common.domain(domain).shape,
+                    fill_value=fill_value,
+                    dtype=core_defs.to_array_api_dtype(array_ns, dtype),
+                )
+
+            @staticmethod
+            def asarray(
+                data: core_defs.NDArrayObject,
+                *,
+                domain: common.DomainLike,
+                dtype: Optional[core_defs.DTypeLike] = None,
+                copy: Optional[bool] = None,
+                aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+            ) -> core_defs.NDArrayObject:
+                _check_unsupported_device_and_aligned_index(device, aligned_index)
+                if not data.shape == common.domain(domain).shape:
+                    raise ValueError(
+                        f"Array of shape '{data.shape}' is incompatible with domain '{domain}'."
+                    )
+
+                return array_ns.asarray(
+                    data, dtype=core_defs.to_array_api_dtype(array_ns, dtype), copy=copy
+                )
+
+        return _ArrayNamespaceWrapper
+
+    assert is_field_allocation_tool(allocator) or allocator is None
+    actual_allocator, actual_device = _get_actual_allocator_and_device(allocator, device)
+
+    class _CustomAllocationArrayNamespace:
+        @staticmethod
+        def empty(
+            domain: common.DomainLike,
+            *,
+            dtype: Optional[core_defs.DTypeLike] = None,
+            aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+        ) -> core_defs.NDArrayObject:
+            return allocate(
+                domain=common.domain(domain),
+                dtype=core_defs.dtype(dtype),
+                aligned_index=aligned_index,
+                allocator=actual_allocator,
+                device=actual_device,
+            )
+
+        @staticmethod
+        def zeros(
+            domain: common.DomainLike,
+            *,
+            dtype: Optional[core_defs.DTypeLike] = None,
+            aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+        ) -> core_defs.NDArrayObject:
+            buffer = allocate(
+                domain=common.domain(domain),
+                dtype=core_defs.dtype(dtype),
+                aligned_index=aligned_index,
+                allocator=actual_allocator,
+                device=actual_device,
+            )
+            buffer[...] = 0
+            return buffer
+
+        @staticmethod
+        def ones(
+            domain: common.DomainLike,
+            *,
+            dtype: Optional[core_defs.DTypeLike] = None,
+            aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+        ) -> core_defs.NDArrayObject:
+            buffer = allocate(
+                domain=common.domain(domain),
+                dtype=core_defs.dtype(dtype),
+                aligned_index=aligned_index,
+                allocator=actual_allocator,
+                device=actual_device,
+            )
+            buffer[...] = 1
+            return buffer
+
+        @staticmethod
+        def full(
+            domain: common.DomainLike,
+            fill_value: core_defs.Scalar,
+            *,
+            dtype: Optional[core_defs.DTypeLike] = None,
+            aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+        ) -> core_defs.NDArrayObject:
+            buffer = allocate(
+                domain=common.domain(domain),
+                dtype=core_defs.dtype(dtype),  # TODO check all dtypes
+                aligned_index=aligned_index,
+                allocator=actual_allocator,
+                device=actual_device,
+            )
+            buffer[...] = fill_value
+            return buffer
+
+        @staticmethod
+        def asarray(
+            data: core_defs.NDArrayObject,
+            *,
+            domain: common.DomainLike,
+            dtype: Optional[core_defs.DTypeLike] = None,
+            copy: Optional[bool] = None,
+            aligned_index: Optional[Sequence[common.NamedIndex]] = None,
+        ) -> core_defs.NDArrayObject:
+            if not copy:
+                raise NotImplementedError("Zero-copy construction is not yet supported.")
+            dtype = core_defs.dtype(data.dtype) if dtype is None else core_defs.dtype(dtype)
+            buffer = allocate(
+                domain=common.domain(domain),
+                dtype=dtype,
+                aligned_index=aligned_index,
+                allocator=actual_allocator,
+                device=actual_device,
+            )
+            buffer[...] = array_api_compat.array_namespace(buffer).asarray(data)
+            return buffer
+
+    return _CustomAllocationArrayNamespace
diff --git a/src/gt4py/next/backend.py b/src/gt4py/next/backend.py
index e223d7771c..5a02628012 100644
--- a/src/gt4py/next/backend.py
+++ b/src/gt4py/next/backend.py
@@ -13,7 +13,7 @@
 from typing import Any, Generic
 
 from gt4py._core import definitions as core_defs
-from gt4py.next import allocators as next_allocators
+from gt4py.next import _allocators as next_allocators
 from gt4py.next.ffront import (
     foast_to_gtir,
     foast_to_itir,
diff --git a/src/gt4py/next/common.py b/src/gt4py/next/common.py
index 9b2870e1c0..8d3bc832d2 100644
--- a/src/gt4py/next/common.py
+++ b/src/gt4py/next/common.py
@@ -950,6 +950,9 @@ def _field(
     /,
     *,
     domain: Optional[DomainLike] = None,
+    allocation_ns: Optional[
+        Any
+    ] = None,  # TODO: should be `next_allocators.GTArrayAllocationNamespace`
     dtype: Optional[core_defs.DType] = None,
 ) -> Field:
     raise NotImplementedError
@@ -963,6 +966,9 @@ def _connectivity(
     codomain: Dimension,
     *,
     domain: Optional[DomainLike] = None,
+    allocation_ns: Optional[
+        Any
+    ] = None,  # TODO: should be `next_allocators.GTArrayAllocationNamespace`
     dtype: Optional[core_defs.DType] = None,
     skip_value: Optional[core_defs.IntegralScalar] = None,
 ) -> Connectivity:
diff --git a/src/gt4py/next/constructors.py b/src/gt4py/next/constructors.py
index 7b39511674..21b4b63636 100644
--- a/src/gt4py/next/constructors.py
+++ b/src/gt4py/next/constructors.py
@@ -14,7 +14,7 @@
 import gt4py._core.definitions as core_defs
 import gt4py.eve as eve
 import gt4py.eve.extended_typing as xtyping
-import gt4py.next.allocators as next_allocators
+import gt4py.next._allocators as next_allocators
 import gt4py.next.common as common
 import gt4py.next.embedded.nd_array_field as nd_array_field
 import gt4py.storage.cartesian.utils as storage_utils
@@ -26,9 +26,12 @@ def empty(
     dtype: core_defs.DTypeLike = core_defs.Float64DType(()),  # noqa: B008 [function-call-in-default-argument]
     *,
     aligned_index: Optional[Sequence[common.NamedIndex]] = None,
-    allocator: Optional[next_allocators.FieldBufferAllocationUtil] = None,
+    allocator: Optional[
+        next_allocators.FieldBufferAllocationUtil | core_defs.ArrayApiNamespace
+    ] = None,  # TODO make sure numpy/cupy etc namespaces are accepted by mypy (maybe we have to allow Any)
     device: Optional[core_defs.Device] = None,
 ) -> nd_array_field.NdArrayField:
+    # TODO: update doc
     """Create a `Field` of uninitialized (undefined) values using the given (or device-default) allocator.
 
     This function supports partial binding of arguments, see :class:`eve.utils.partial` for details.
@@ -74,13 +77,9 @@ def empty(
         >>> b.shape
         (3, 3)
     """
-    dtype = core_defs.dtype(dtype)
-    if allocator is None and device is None:
-        device = core_defs.Device(core_defs.DeviceType.CPU, device_id=0)
-    buffer = next_allocators.allocate(
-        domain, dtype, aligned_index=aligned_index, allocator=allocator, device=device
-    )
-    res = common._field(buffer.ndarray, domain=domain)
+    gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device)
+    buffer = gtarray_namespace.empty(domain, dtype=dtype, aligned_index=aligned_index)
+    res = common._field(buffer, domain=domain, allocation_ns=gtarray_namespace)
     assert isinstance(res, common.MutableField)
     assert isinstance(res, nd_array_field.NdArrayField)
     return res
@@ -106,11 +105,12 @@ def zeros(
         >>> gtx.zeros({IDim: range(3, 10)}, allocator=gtx.itir_python).ndarray
         array([0., 0., 0., 0., 0., 0., 0.])
     """
-    field = empty(
-        domain=domain, dtype=dtype, aligned_index=aligned_index, allocator=allocator, device=device
-    )
-    field[...] = field.dtype.scalar_type(0)
-    return field
+    gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device)
+    buffer = gtarray_namespace.zeros(domain, dtype=dtype, aligned_index=aligned_index)
+    res = common._field(buffer, domain=domain, allocation_ns=gtarray_namespace)
+    assert isinstance(res, common.MutableField)
+    assert isinstance(res, nd_array_field.NdArrayField)
+    return res
 
 
 @eve.utils.with_fluid_partial
@@ -133,11 +133,12 @@ def ones(
         >>> gtx.ones({IDim: range(3, 10)}, allocator=gtx.itir_python).ndarray
         array([1., 1., 1., 1., 1., 1., 1.])
     """
-    field = empty(
-        domain=domain, dtype=dtype, aligned_index=aligned_index, allocator=allocator, device=device
-    )
-    field[...] = field.dtype.scalar_type(1)
-    return field
+    gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device)
+    buffer = gtarray_namespace.ones(domain, dtype=dtype, aligned_index=aligned_index)
+    res = common._field(buffer, domain=domain, allocation_ns=gtarray_namespace)
+    assert isinstance(res, common.MutableField)
+    assert isinstance(res, nd_array_field.NdArrayField)
+    return res
 
 
 @eve.utils.with_fluid_partial
@@ -166,15 +167,53 @@ def full(
         >>> gtx.full({IDim: 3}, 5, allocator=gtx.itir_python).ndarray
         array([5, 5, 5])
     """
-    field = empty(
-        domain=domain,
+    gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device)
+    buffer = gtarray_namespace.full(
+        domain,
+        fill_value,
         dtype=dtype if dtype is not None else core_defs.dtype(type(fill_value)),
         aligned_index=aligned_index,
-        allocator=allocator,
-        device=device,
     )
-    field[...] = field.dtype.scalar_type(fill_value)
-    return field
+    res = common._field(buffer, domain=domain, allocation_ns=gtarray_namespace)
+    assert isinstance(res, common.MutableField)
+    assert isinstance(res, nd_array_field.NdArrayField)
+    return res
+
+
+def _actual_domain(
+    dims_or_domain: common.DomainLike | Sequence[common.Dimension],
+    shape: Sequence[int],
+    origin: Optional[Mapping[common.Dimension, int]] = None,
+) -> common.Domain:
+    if isinstance(dims_or_domain, Sequence) and all(
+        isinstance(dim, common.Dimension) for dim in dims_or_domain
+    ):
+        dims = cast(Sequence[common.Dimension], dims_or_domain)
+        if len(dims) != len(shape):
+            raise ValueError(
+                f"Cannot construct 'Field' from array of shape '{shape}' and domain '{dims}'."
+            )
+        if origin:
+            domain_dims = set(dims)
+            if unknown_dims := set(origin.keys()) - domain_dims:
+                raise ValueError(f"Origin keys {unknown_dims} not in domain {dims}.")
+        else:
+            origin = {}
+        return common.domain(
+            [
+                (d, (-(start_offset := origin.get(d, 0)), s - start_offset))
+                for d, s in zip(dims, shape)
+            ]
+        )
+    else:
+        domain = common.domain(cast(common.DomainLike, dims_or_domain))
+        if origin:
+            raise ValueError(f"Cannot specify origin for domain {domain}")
+        if domain.shape != shape:
+            raise ValueError(
+                f"Cannot construct 'Field' from array of shape '{shape}' and domain '{domain}'."
+            )
+        return domain
 
 
 @eve.utils.with_fluid_partial
@@ -187,7 +226,7 @@ def as_field(
     aligned_index: Optional[Sequence[common.NamedIndex]] = None,
     allocator: Optional[next_allocators.FieldBufferAllocatorProtocol] = None,
     device: Optional[core_defs.Device] = None,
-    # TODO: copy=False
+    # TODO(havogt): copy=False
 ) -> nd_array_field.NdArrayField:
     """Create a Field from an array-like object using the given (or device-default) allocator.
 
@@ -232,53 +271,25 @@ def as_field(
         >>> gtx.as_field({IDim: range(-1, 2)}, xdata).domain.ranges[0]
         UnitRange(-1, 2)
     """
-    if isinstance(domain, Sequence) and all(isinstance(dim, common.Dimension) for dim in domain):
-        domain = cast(Sequence[common.Dimension], domain)
-        if len(domain) != data.ndim:
-            raise ValueError(
-                f"Cannot construct 'Field' from array of shape '{data.shape}' and domain '{domain}'."
-            )
-        if origin:
-            domain_dims = set(domain)
-            if unknown_dims := set(origin.keys()) - domain_dims:
-                raise ValueError(f"Origin keys {unknown_dims} not in domain {domain}.")
-        else:
-            origin = {}
-        actual_domain = common.domain(
-            [
-                (d, (-(start_offset := origin.get(d, 0)), s - start_offset))
-                for d, s in zip(domain, data.shape)
-            ]
-        )
-    else:
-        if origin:
-            raise ValueError(f"Cannot specify origin for domain {domain}")
-        actual_domain = common.domain(cast(common.DomainLike, domain))
+    actual_domain = _actual_domain(dims_or_domain=domain, shape=data.shape, origin=origin)
 
     # TODO(egparedes): allow zero-copy construction (no reallocation) if buffer has
     #   already the correct layout and device.
-    shape = storage_utils.asarray(data).shape
-    if shape != actual_domain.shape:
-        raise ValueError(f"Cannot construct 'Field' from array of shape '{shape}'.")
-    if dtype is None:
-        dtype = storage_utils.asarray(data).dtype
-    dtype = core_defs.dtype(dtype)
-    assert dtype.tensor_shape == ()  # TODO
 
     if (allocator is None) and (device is None) and xtyping.supports_dlpack(data):
         device = core_defs.Device(*data.__dlpack_device__())
 
-    field = empty(
+    gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device)
+    buffer = gtarray_namespace.asarray(
+        data,
         domain=actual_domain,
         dtype=dtype,
+        copy=True,  # TODO(havogt) add support for zero-copy construction
         aligned_index=aligned_index,
-        allocator=allocator,
-        device=device,
     )
+    res = common._field(buffer, domain=actual_domain, allocation_ns=gtarray_namespace)
 
-    field[...] = field.array_ns.asarray(data)
-
-    return field
+    return res  # type: ignore[return-value] # it is an NDArrayField
 
 
 @eve.utils.with_fluid_partial
@@ -349,12 +360,22 @@ def as_connectivity(
 
     if (allocator is None) and (device is None) and xtyping.supports_dlpack(data):
         device = core_defs.Device(*data.__dlpack_device__())
-    buffer = next_allocators.allocate(actual_domain, dtype, allocator=allocator, device=device)
-    # TODO(havogt): consider adding MutableNDArrayObject
-    buffer.ndarray[...] = storage_utils.asarray(data)  # type: ignore[index]
+
+    gtarray_namespace = next_allocators.get_array_allocation_namespace(allocator, device)
+    buffer = gtarray_namespace.asarray(
+        data,
+        domain=actual_domain,
+        dtype=dtype,
+        copy=True,  # TODO(havogt) add support for zero-copy construction
+    )
     connectivity_field = common._connectivity(
-        buffer.ndarray, codomain=codomain, domain=actual_domain, skip_value=skip_value
+        buffer,
+        codomain=codomain,
+        domain=actual_domain,
+        skip_value=skip_value,
+        allocation_ns=gtarray_namespace,
     )
+
     assert isinstance(connectivity_field, nd_array_field.NdArrayConnectivityField)
 
     return connectivity_field
diff --git a/src/gt4py/next/embedded/nd_array_field.py b/src/gt4py/next/embedded/nd_array_field.py
index e15fb4266a..06eeecb66a 100644
--- a/src/gt4py/next/embedded/nd_array_field.py
+++ b/src/gt4py/next/embedded/nd_array_field.py
@@ -29,7 +29,7 @@
     TypeVar,
     cast,
 )
-from gt4py.next import common
+from gt4py.next import _allocators, common
 from gt4py.next.embedded import (
     common as embedded_common,
     context as embedded_context,
@@ -116,6 +116,7 @@ class NdArrayField(
 
     _domain: common.Domain
     _ndarray: core_defs.NDArrayObject
+    _allocation_ns: Optional[_allocators.GTArrayAllocationNamespace]
 
     array_ns: ClassVar[ModuleType]  # TODO(havogt) introduce a NDArrayNamespace protocol
 
@@ -167,6 +168,9 @@ def from_array(
         /,
         *,
         domain: common.DomainLike,
+        allocation_ns: Optional[
+            _allocators.GTArrayAllocationNamespace
+        ] = None,  # TODO: maybe an NDArrayField always has an allocator?
         dtype: Optional[core_defs.DTypeLike] = None,
     ) -> NdArrayField:
         domain = common.domain(domain)
@@ -184,7 +188,7 @@ def from_array(
         assert len(domain) == array.ndim
         assert all(s == 1 or len(r) == s for r, s in zip(domain.ranges, array.shape))
 
-        return cls(domain, array)
+        return cls(domain, array, allocation_ns)
 
     def premap(
         self: NdArrayField,
@@ -330,7 +334,9 @@ def restrict(self, index: common.AnyIndexSpec) -> NdArrayField:
         new_domain, buffer_slice = self._slice(index)
         new_buffer = self.ndarray[buffer_slice]
         new_buffer = self.__class__.array_ns.asarray(new_buffer)
-        return self.__class__.from_array(new_buffer, domain=new_domain)
+        return self.__class__.from_array(
+            new_buffer, domain=new_domain, allocation_ns=self._allocation_ns
+        )
 
     __getitem__ = restrict
 
@@ -428,6 +434,24 @@ def _slice(
         assert common.is_relative_index_sequence(slice_)
         return new_domain, slice_
 
+    def __copy__(self) -> NdArrayField:
+        # Note: `copy` copies the data, following NumPy behavior
+        allocation_ns = (
+            self._allocation_ns
+            if self._allocation_ns is not None
+            else _allocators.get_array_allocation_namespace(self.array_ns)
+        )
+        ndarray_copy = allocation_ns.asarray(
+            self.ndarray,
+            domain=self.domain,
+            dtype=self.dtype,
+            copy=True,  # aligned_index???
+        )
+        return self.__class__(self.domain, ndarray_copy, _allocation_ns=self._allocation_ns)
+
+    def __deepcopy__(self, _: Any) -> NdArrayField:
+        return self.__copy__()
+
     if dace:
         # Extension of NdArrayField adding SDFGConvertible support in GT4Py Programs
         def _dace_data_ptr(self) -> int:
@@ -513,6 +537,7 @@ def from_array(  # type: ignore[override]
         codomain: common.DimT,
         *,
         domain: common.DomainLike,
+        allocation_ns: Optional[_allocators.GTArrayAllocationNamespace] = None,
         dtype: Optional[core_defs.DTypeLike] = None,
         skip_value: Optional[core_defs.IntegralScalar] = None,
     ) -> NdArrayConnectivityField:
@@ -533,7 +558,7 @@ def from_array(  # type: ignore[override]
 
         assert isinstance(codomain, common.Dimension)
 
-        return cls(domain, array, codomain, _skip_value=skip_value)
+        return cls(domain, array, allocation_ns, codomain, _skip_value=skip_value)
 
     def inverse_image(self, image_range: common.UnitRange | common.NamedRange) -> common.Domain:
         cache_key = hash((id(self.ndarray), self.domain, image_range))
@@ -570,7 +595,13 @@ def restrict(self, index: common.AnyIndexSpec) -> NdArrayConnectivityField:
             xp = cls.array_ns
             new_domain, buffer_slice = self._slice(index)
             new_buffer = xp.asarray(self.ndarray[buffer_slice])
-            restricted_connectivity = cls(new_domain, new_buffer, self.codomain, self.skip_value)
+            restricted_connectivity = cls(
+                new_domain,
+                new_buffer,
+                _allocation_ns=self._allocation_ns,
+                _codomain=self._codomain,
+                _skip_value=self._skip_value,
+            )
             self._cache[cache_key] = restricted_connectivity
 
         return restricted_connectivity
@@ -594,7 +625,9 @@ def _domain_premap(data: NdArrayField, *connectivities: common.Connectivity) ->
         new_ranges = connectivity.inverse_image(current_range)
         new_domain = new_domain.replace(dim_idx, *new_ranges)
 
-    return data.__class__.from_array(data._ndarray, domain=new_domain, dtype=data.dtype)
+    return data.__class__.from_array(
+        data._ndarray, domain=new_domain, dtype=data.dtype, allocation_ns=data._allocation_ns
+    )
 
 
 def _reshuffling_premap(
@@ -634,7 +667,10 @@ def _reshuffling_premap(
             conn_ndarray = xp.broadcast_to(conn_ndarray, data.domain.shape)
         if conn_ndarray is not conn.ndarray:
             conn = conn.__class__.from_array(
-                conn_ndarray, domain=data.domain, codomain=conn.codomain
+                conn_ndarray,
+                domain=data.domain,
+                codomain=conn.codomain,
+                allocation_ns=conn._allocation_ns,
             )
         conn_map[conn.codomain] = conn
         dim_idx = data.domain.dim_index(conn.codomain, allow_missing=False)
@@ -664,6 +700,7 @@ def _reshuffling_premap(
         new_buffer,
         domain=new_domain,
         dtype=data.dtype,
+        allocation_ns=data._allocation_ns,
     )
 
 
@@ -704,6 +741,7 @@ def _remapping_premap(data: NdArrayField, connectivity: common.Connectivity) ->
         new_buffer,
         domain=new_domain,
         dtype=data.dtype,
+        allocation_ns=data._allocation_ns,
     )
 
 
@@ -872,6 +910,7 @@ def _intersect_fields(
         nd_array_class.from_array(
             f.ndarray[_get_slices_from_domain_slice(f.domain, intersected_domain)],
             domain=intersected_domain,
+            # TODO allocator
         )
         for f, intersected_domain in zip(broadcasted_fields, intersected_domains, strict=True)
     )
@@ -908,6 +947,7 @@ def _concat(*fields: common.Field, dim: common.Dimension) -> common.Field:
             [nd_array_class.array_ns.broadcast_to(f.ndarray, f.domain.shape) for f in fields],
             axis=new_domain.dim_index(dim, allow_missing=False),
         ),
+        # TODO allocator
         domain=new_domain,
     )
 
@@ -1076,8 +1116,16 @@ def __setitem__(
             index: common.AnyIndexSpec,
             value: common.Field | core_defs.NDArrayObject | core_defs.ScalarT,
         ) -> None:
-            # TODO(havogt): use something like `self.ndarray = self.ndarray.at(index).set(value)`
-            raise NotImplementedError("'__setitem__' for JaxArrayField not yet implemented.")
+            target_domain, target_slice = self._slice(index)
+            if isinstance(value, NdArrayField):
+                if not value.domain == target_domain:
+                    raise ValueError(
+                        f"Incompatible `Domain` in assignment. Source domain = {value.domain}, target domain = {target_domain}."
+                    )
+                value = value.ndarray
+
+            assert hasattr(self._ndarray, "at")
+            object.__setattr__(self, "_ndarray", self._ndarray.at[target_slice].set(value))
 
     common._field.register(jnp.ndarray, JaxArrayField.from_array)
 
diff --git a/src/gt4py/next/ffront/decorator.py b/src/gt4py/next/ffront/decorator.py
index 61756f30c9..a8a7c4e730 100644
--- a/src/gt4py/next/ffront/decorator.py
+++ b/src/gt4py/next/ffront/decorator.py
@@ -24,7 +24,7 @@
 from gt4py._core import definitions as core_defs
 from gt4py.eve import extended_typing as xtyping
 from gt4py.next import (
-    allocators as next_allocators,
+    _allocators as next_allocators,
     backend as next_backend,
     common,
     embedded as next_embedded,
diff --git a/src/gt4py/next/program_processors/runners/dace.py b/src/gt4py/next/program_processors/runners/dace.py
index 1b3b930818..45bc9908ef 100644
--- a/src/gt4py/next/program_processors/runners/dace.py
+++ b/src/gt4py/next/program_processors/runners/dace.py
@@ -9,7 +9,7 @@
 import factory
 
 import gt4py._core.definitions as core_defs
-import gt4py.next.allocators as next_allocators
+import gt4py.next._allocators as next_allocators
 from gt4py.next import backend
 from gt4py.next.otf import workflow
 from gt4py.next.program_processors.runners.dace_fieldview import workflow as dace_fieldview_workflow
diff --git a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py
index 40d44f5ab0..edf510ca2e 100644
--- a/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py
+++ b/src/gt4py/next/program_processors/runners/dace_fieldview/workflow.py
@@ -16,7 +16,7 @@
 import factory
 
 from gt4py._core import definitions as core_defs
-from gt4py.next import allocators as gtx_allocators, common, config
+from gt4py.next import _allocators as gtx_allocators, common, config
 from gt4py.next.iterator import ir as itir, transforms as itir_transforms
 from gt4py.next.otf import languages, recipes, stages, step_types, workflow
 from gt4py.next.otf.binding import interface
diff --git a/src/gt4py/next/program_processors/runners/gtfn.py b/src/gt4py/next/program_processors/runners/gtfn.py
index 55f479c665..2847540e6c 100644
--- a/src/gt4py/next/program_processors/runners/gtfn.py
+++ b/src/gt4py/next/program_processors/runners/gtfn.py
@@ -17,7 +17,7 @@
 import filelock
 
 import gt4py._core.definitions as core_defs
-import gt4py.next.allocators as next_allocators
+import gt4py.next._allocators as next_allocators
 from gt4py.eve import utils
 from gt4py.eve.utils import content_hash
 from gt4py.next import backend, common, config
diff --git a/src/gt4py/next/program_processors/runners/roundtrip.py b/src/gt4py/next/program_processors/runners/roundtrip.py
index 1dd568b95a..57df1fe228 100644
--- a/src/gt4py/next/program_processors/runners/roundtrip.py
+++ b/src/gt4py/next/program_processors/runners/roundtrip.py
@@ -20,7 +20,7 @@
 
 from gt4py.eve import codegen
 from gt4py.eve.codegen import FormatTemplate as as_fmt, MakoTemplate as as_mako
-from gt4py.next import allocators as next_allocators, backend as next_backend, common, config
+from gt4py.next import _allocators as next_allocators, backend as next_backend, common, config
 from gt4py.next.ffront import foast_to_gtir, foast_to_past, past_to_itir
 from gt4py.next.iterator import ir as itir, transforms as itir_transforms
 from gt4py.next.otf import stages, workflow
diff --git a/src/gt4py/storage/allocators.py b/src/gt4py/storage/allocators.py
index 298b9c2e5a..dbd3113a77 100644
--- a/src/gt4py/storage/allocators.py
+++ b/src/gt4py/storage/allocators.py
@@ -17,12 +17,10 @@
 import types
 
 import numpy as np
-import numpy.typing as npt
 
 from gt4py._core import definitions as core_defs
 from gt4py.eve import extended_typing as xtyping
 from gt4py.eve.extended_typing import (
-    TYPE_CHECKING,
     Any,
     Callable,
     Generic,
@@ -31,7 +29,6 @@
     Protocol,
     Sequence,
     Tuple,
-    Type,
     TypeAlias,
     TypeGuard,
     Union,
@@ -64,88 +61,6 @@ def is_valid_layout_map(value: Sequence[Any]) -> TypeGuard[BufferLayoutMap]:
     )
 
 
-@dataclasses.dataclass(frozen=True)
-class TensorBuffer(Generic[core_defs.DeviceTypeT, core_defs.ScalarT]):
-    """
-    N-dimensional (tensor-like) memory buffer.
-
-    The actual class of the stored buffer and ndarray instances is
-    represented in the `NDBufferT` parameter and might be any n-dimensional
-    buffer-like class with a compatible buffer interface (e.g. NumPy
-    or CuPy `ndarray`.)
-
-    Attributes:
-        buffer: Raw allocated buffer.
-        memory_address: Memory address of the buffer.
-        device: Device where the buffer is allocated.
-        dtype: Data type descriptor.
-        shape: Tuple with lengths of the corresponding tensor dimensions.
-        strides: Tuple with sizes (in bytes) of the steps in each dimension.
-        layout_map: Tuple with the order of the dimensions in the buffer
-            layout_map[i] = j means that the i-th dimension of the tensor
-            corresponds to the j-th dimension in the (C-layout) buffer.
-        byte_offset: Offset (in bytes) from the beginning of the buffer to
-            the first valid element.
-        byte_alignment: Alignment (in bytes) of the first valid element.
-        aligned_index: N-dimensional index of the first aligned element.
-        ndarray: N-dimensional tensor view of the allocated buffer.
-    """
-
-    buffer: _NDBuffer = dataclasses.field(hash=False)
-    memory_address: int
-    device: core_defs.Device[core_defs.DeviceTypeT]
-    dtype: core_defs.DType[core_defs.ScalarT]
-    shape: core_defs.TensorShape
-    strides: Tuple[int, ...]
-    layout_map: BufferLayoutMap
-    byte_offset: int
-    byte_alignment: int
-    aligned_index: Tuple[int, ...]
-    ndarray: core_defs.NDArrayObject = dataclasses.field(hash=False)
-
-    @property
-    def ndim(self):
-        """Order of the tensor (`len(tensor_buffer.shape)`)."""
-        return len(self.shape)
-
-    def __array__(self, dtype: Optional[npt.DTypeLike] = None, /) -> np.ndarray:
-        if not xtyping.supports_array(self.ndarray):
-            raise TypeError("Cannot export tensor buffer as NumPy array.")
-
-        return self.ndarray.__array__(dtype)
-
-    @property
-    def __array_interface__(self) -> dict[str, Any]:
-        if not xtyping.supports_array_interface(self.ndarray):
-            raise TypeError("Cannot export tensor buffer to NumPy array interface.")
-
-        return self.ndarray.__array_interface__
-
-    @property
-    def __cuda_array_interface__(self) -> dict[str, Any]:
-        if not xtyping.supports_cuda_array_interface(self.ndarray):
-            raise TypeError("Cannot export tensor buffer to CUDA array interface.")
-
-        return self.ndarray.__cuda_array_interface__
-
-    def __dlpack__(self, *, stream: Optional[int] = None) -> Any:
-        if not hasattr(self.ndarray, "__dlpack__"):
-            raise TypeError("Cannot export tensor buffer to DLPack.")
-        return self.ndarray.__dlpack__(stream=stream)  # type: ignore[call-arg,arg-type]  # stream is not always supported
-
-    def __dlpack_device__(self) -> xtyping.DLPackDevice:
-        if not hasattr(self.ndarray, "__dlpack_device__"):
-            raise TypeError("Cannot extract DLPack device from tensor buffer.")
-        return self.ndarray.__dlpack_device__()
-
-
-if TYPE_CHECKING:
-    # TensorBuffer should be compatible with all the expected buffer interfaces
-    __TensorBufferAsArrayInterfaceT: Type[xtyping.ArrayInterface] = TensorBuffer
-    __TensorBufferAsCUDAArrayInterfaceT: Type[xtyping.CUDAArrayInterface] = TensorBuffer
-    __TensorBufferAsDLPackBufferT: Type[xtyping.DLPackBuffer] = TensorBuffer
-
-
 class BufferAllocator(Protocol[core_defs.DeviceTypeT]):
     """Protocol for buffer allocators."""
 
@@ -160,9 +75,9 @@ def allocate(
         layout_map: BufferLayoutMap,
         byte_alignment: int,
         aligned_index: Optional[Sequence[int]] = None,
-    ) -> TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]:
+    ) -> core_defs.MutableNDArrayObject:
         """
-        Allocate a TensorBuffer with the given shape, layout and alignment settings.
+        Allocate an NDArrayObject with the given shape, layout and alignment settings.
 
         Args:
             shape: Tensor dimensions.
@@ -194,7 +109,7 @@ def allocate(
         layout_map: BufferLayoutMap,
         byte_alignment: int,
         aligned_index: Optional[Sequence[int]] = None,
-    ) -> TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]:
+    ) -> core_defs.MutableNDArrayObject:
         if not core_defs.is_valid_tensor_shape(shape):
             raise ValueError(f"Invalid shape {shape}")
         ndim = len(shape)
@@ -254,24 +169,7 @@ def allocate(
         ) % byte_alignment
         byte_offset = (aligned_index_offset + allocation_mismatch_offset) % byte_alignment
 
-        # Create shaped view from buffer
-        ndarray = self.tensorize(
-            buffer, dtype, shape, padded_shape, item_size, strides, byte_offset
-        )
-
-        return TensorBuffer(
-            buffer=buffer,
-            memory_address=memory_address,
-            device=core_defs.Device(self.device_type, device_id),
-            dtype=dtype,
-            shape=shape,
-            strides=strides,
-            layout_map=layout_map,
-            byte_offset=byte_offset,
-            byte_alignment=byte_alignment,
-            aligned_index=aligned_index,
-            ndarray=ndarray,
-        )
+        return self.tensorize(buffer, dtype, shape, padded_shape, item_size, strides, byte_offset)
 
     @property
     @abc.abstractmethod
@@ -292,7 +190,8 @@ def tensorize(
         item_size: int,
         strides: Sequence[int],
         byte_offset: int,
-    ) -> core_defs.NDArrayObject:
+    ) -> core_defs.MutableNDArrayObject:
+        """Create shaped view from buffer."""
         pass
 
 
@@ -301,7 +200,7 @@ class ArrayUtils:
     array_ns: types.ModuleType
     empty: Callable[..., _NDBuffer]
     byte_bounds: Callable[[_NDBuffer], Tuple[int, int]]
-    as_strided: Callable[..., core_defs.NDArrayObject]
+    as_strided: Callable[..., core_defs.MutableNDArrayObject]
 
 
 numpy_array_utils = ArrayUtils(
@@ -359,7 +258,7 @@ def tensorize(
         item_size: int,
         strides: Sequence[int],
         byte_offset: int,
-    ) -> core_defs.NDArrayObject:
+    ) -> core_defs.MutableNDArrayObject:
         aligned_buffer = buffer[byte_offset : byte_offset + math.prod(allocated_shape) * item_size]  # type: ignore[index] # TODO(egparedes): should we extend `_NDBuffer`s to cover __getitem__?
         flat_ndarray = aligned_buffer.view(dtype=np.dtype(dtype))
         tensor_view = self._array_utils.as_strided(
diff --git a/src/gt4py/storage/cartesian/interface.py b/src/gt4py/storage/cartesian/interface.py
index 8b38bcdd42..01f884a555 100644
--- a/src/gt4py/storage/cartesian/interface.py
+++ b/src/gt4py/storage/cartesian/interface.py
@@ -97,9 +97,7 @@ def empty(
     assert allocators.is_valid_layout_map(layout_map)
 
     dtype = np.dtype(dtype)
-    _, res = allocate_f(shape, layout_map, dtype, alignment * dtype.itemsize, aligned_index)
-
-    return res
+    return allocate_f(shape, layout_map, dtype, alignment * dtype.itemsize, aligned_index)
 
 
 def ones(
diff --git a/src/gt4py/storage/cartesian/utils.py b/src/gt4py/storage/cartesian/utils.py
index 50500e536b..b94abb915c 100644
--- a/src/gt4py/storage/cartesian/utils.py
+++ b/src/gt4py/storage/cartesian/utils.py
@@ -248,9 +248,9 @@ def allocate_cpu(
     dtype: DTypeLike,
     alignment_bytes: int,
     aligned_index: Optional[Sequence[int]],
-) -> Tuple[allocators._NDBuffer, np.ndarray]:
+) -> np.ndarray:
     device = core_defs.Device(core_defs.DeviceType.CPU, 0)
-    buffer = _CPUBufferAllocator.allocate(
+    ndarray = _CPUBufferAllocator.allocate(
         shape,
         core_defs.dtype(dtype),
         device_id=device.device_id,
@@ -258,7 +258,7 @@ def allocate_cpu(
         byte_alignment=alignment_bytes,
         aligned_index=aligned_index,
     )
-    return buffer.buffer, cast(np.ndarray, buffer.ndarray)
+    return cast(np.ndarray, ndarray)
 
 
 def _allocate_gpu(
@@ -267,14 +267,14 @@ def _allocate_gpu(
     dtype: DTypeLike,
     alignment_bytes: int,
     aligned_index: Optional[Sequence[int]],
-) -> Tuple["cp.ndarray", "cp.ndarray"]:
+) -> "cp.ndarray":
     assert cp is not None
     assert _GPUBufferAllocator is not None, "GPU allocation library or device not found"
     device = core_defs.Device(  # type: ignore[type-var]
         (core_defs.DeviceType.ROCM if gt_config.GT4PY_USE_HIP else core_defs.DeviceType.CUDA),
         0,
     )
-    buffer = _GPUBufferAllocator.allocate(
+    ndarray = _GPUBufferAllocator.allocate(
         shape,
         core_defs.dtype(dtype),
         device_id=device.device_id,
@@ -283,9 +283,7 @@ def _allocate_gpu(
         aligned_index=aligned_index,
     )
 
-    buffer_ndarray = cast("cp.ndarray", buffer.ndarray)
-
-    return buffer.buffer, buffer_ndarray
+    return cast("cp.ndarray", ndarray)
 
 
 allocate_gpu = _allocate_gpu
@@ -321,8 +319,8 @@ def _allocate_gpu_rocm(
         dtype: DTypeLike,
         alignment_bytes: int,
         aligned_index: Optional[Sequence[int]],
-    ) -> Tuple["cp.ndarray", "cp.ndarray"]:
-        buffer, ndarray = _allocate_gpu(shape, layout_map, dtype, alignment_bytes, aligned_index)
-        return buffer, CUDAArrayInterfaceNDArray(ndarray)
+    ) -> "cp.ndarray":
+        ndarray = _allocate_gpu(shape, layout_map, dtype, alignment_bytes, aligned_index)
+        return CUDAArrayInterfaceNDArray(ndarray)
 
     allocate_gpu = _allocate_gpu_rocm
diff --git a/tests/next_tests/definitions.py b/tests/next_tests/definitions.py
index d7413f32d7..85acf23300 100644
--- a/tests/next_tests/definitions.py
+++ b/tests/next_tests/definitions.py
@@ -14,13 +14,21 @@
 
 import pytest
 
-from gt4py.next import allocators as next_allocators
+from gt4py.next import _allocators as next_allocators
 
 
 # Skip definitions
 XFAIL = pytest.xfail
 SKIP = pytest.skip
 
+try:
+    import jax
+    import jax.numpy as jnp
+
+    jax.config.update("jax_enable_x64", True)
+except ImportError:
+    jnp = None
+
 
 # Program processors
 class _PythonObjectIdMixin:
@@ -56,13 +64,19 @@ class EmbeddedDummyBackend:
     allocator: next_allocators.FieldBufferAllocatorProtocol
 
 
-numpy_execution = EmbeddedDummyBackend(next_allocators.StandardCPUFieldBufferAllocator())
+import numpy as np
+
+
+# numpy_execution = EmbeddedDummyBackend(next_allocators.StandardCPUFieldBufferAllocator())
+numpy_execution = EmbeddedDummyBackend(np)
 cupy_execution = EmbeddedDummyBackend(next_allocators.StandardGPUFieldBufferAllocator())
+jax_execution = EmbeddedDummyBackend(jnp)
 
 
 class EmbeddedIds(_PythonObjectIdMixin, str, enum.Enum):
     NUMPY_EXECUTION = "next_tests.definitions.numpy_execution"
     CUPY_EXECUTION = "next_tests.definitions.cupy_execution"
+    JAX_EXECUTION = "next_tests.definitions.jax_execution"
 
 
 class OptionalProgramBackendId(_PythonObjectIdMixin, str, enum.Enum):
@@ -112,6 +126,7 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum):
 USES_MAX_OVER = "uses_max_over"
 USES_MESH_WITH_SKIP_VALUES = "uses_mesh_with_skip_values"
 USES_SCALAR_IN_DOMAIN_AND_FO = "uses_scalar_in_domain_and_fo"
+SLICES_OUT_ARGUMENT = "slices_out_argument"
 CHECKS_SPECIFIC_ERROR = "checks_specific_error"
 
 # Skip messages (available format keys: 'marker', 'backend')
@@ -147,6 +162,9 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum):
         UNSUPPORTED_MESSAGE,
     ),  # we can't extract the field type from scan args
 ]
+JAX_SKIP_LIST = EMBEDDED_SKIP_LIST + [
+    (SLICES_OUT_ARGUMENT, XFAIL, UNSUPPORTED_MESSAGE),
+]
 ROUNDTRIP_SKIP_LIST = DOMAIN_INFERENCE_SKIP_LIST + [
     (USES_SPARSE_FIELDS_AS_OUTPUT, XFAIL, UNSUPPORTED_MESSAGE),
 ]
@@ -169,6 +187,7 @@ class ProgramFormatterId(_PythonObjectIdMixin, str, enum.Enum):
 BACKEND_SKIP_TEST_MATRIX = {
     EmbeddedIds.NUMPY_EXECUTION: EMBEDDED_SKIP_LIST,
     EmbeddedIds.CUPY_EXECUTION: EMBEDDED_SKIP_LIST,
+    EmbeddedIds.JAX_EXECUTION: JAX_SKIP_LIST,
     OptionalProgramBackendId.DACE_CPU: DACE_SKIP_TEST_LIST,
     OptionalProgramBackendId.DACE_GPU: DACE_SKIP_TEST_LIST,
     OptionalProgramBackendId.DACE_CPU_NO_OPT: DACE_SKIP_TEST_LIST,
diff --git a/tests/next_tests/integration_tests/cases.py b/tests/next_tests/integration_tests/cases.py
index 759cd1cf1f..9faff154da 100644
--- a/tests/next_tests/integration_tests/cases.py
+++ b/tests/next_tests/integration_tests/cases.py
@@ -23,11 +23,12 @@
 from gt4py.eve import extended_typing as xtyping
 from gt4py.eve.extended_typing import Self
 from gt4py.next import (
-    allocators as next_allocators,
+    _allocators as next_allocators,
     backend as next_backend,
     common,
     constructors,
     field_utils,
+    utils as gt_utils,
 )
 from gt4py.next.ffront import decorator
 from gt4py.next.type_system import type_specifications as ts, type_translation
@@ -55,7 +56,6 @@
     mesh_descriptor,
 )
 
-from gt4py.next import utils as gt_utils
 
 # mypy does not accept [IDim, ...] as a type
 
diff --git a/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py b/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py
index 08904c06f3..a5ce949e04 100644
--- a/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py
+++ b/tests/next_tests/integration_tests/feature_tests/dace/test_orchestration.py
@@ -6,11 +6,13 @@
 # Please, refer to the LICENSE file in the root directory.
 # SPDX-License-Identifier: BSD-3-Clause
 
+from typing import Optional
+
 import numpy as np
 import pytest
 
 import gt4py.next as gtx
-from gt4py.next import allocators as gtx_allocators, common as gtx_common
+from gt4py.next import _allocators as gtx_allocators, common as gtx_common
 
 from next_tests.integration_tests import cases
 from next_tests.integration_tests.cases import cartesian_case, unstructured_case
diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py
index 1147f4bc3e..c83c433545 100644
--- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py
+++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/ffront_test_utils.py
@@ -7,14 +7,14 @@
 # SPDX-License-Identifier: BSD-3-Clause
 
 import types
-from typing import Any, Protocol, TypeVar
+from typing import Protocol, TypeVar
 
 import numpy as np
 import pytest
 
 import gt4py.next as gtx
 from gt4py._core import definitions as core_defs
-from gt4py.next import backend as next_backend, common, allocators as next_allocators
+from gt4py.next import _allocators as next_allocators, backend as next_backend, common
 from gt4py.next.ffront import decorator
 
 import next_tests
@@ -57,6 +57,9 @@ def __gt_allocator__(
         pytest.param(
             next_tests.definitions.EmbeddedIds.CUPY_EXECUTION, marks=pytest.mark.requires_gpu
         ),
+        pytest.param(
+            next_tests.definitions.EmbeddedIds.JAX_EXECUTION, marks=pytest.mark.requires_jax
+        ),
         pytest.param(
             next_tests.definitions.OptionalProgramBackendId.DACE_CPU,
             marks=pytest.mark.requires_dace,
diff --git a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py
index f1cb8ffb17..57607fefc3 100644
--- a/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py
+++ b/tests/next_tests/integration_tests/feature_tests/ffront_tests/test_program.py
@@ -53,6 +53,7 @@ def test_identity_fo_execution(cartesian_case, identity_def):
 
 
 @pytest.mark.uses_cartesian_shift
+@pytest.mark.slices_out_argument
 def test_shift_by_one_execution(cartesian_case):
     @gtx.field_operator
     def shift_by_one(in_field: cases.IFloatField) -> cases.IFloatField:
@@ -95,6 +96,7 @@ def test_double_copy_execution(cartesian_case, double_copy_program_def):
     )
 
 
+@pytest.mark.slices_out_argument
 def test_copy_restricted_execution(cartesian_case, copy_restrict_program_def):
     copy_restrict_program = gtx.program(copy_restrict_program_def, backend=cartesian_case.backend)
 
@@ -154,6 +156,7 @@ def prog(
     assert np.allclose((a.asnumpy(), b.asnumpy()), (out_a.asnumpy(), out_b.asnumpy()))
 
 
+@pytest.mark.slices_out_argument
 def test_tuple_program_return_constructed_inside_with_slicing(cartesian_case):
     @gtx.field_operator
     def pack_tuple(
diff --git a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_ffront_fvm_nabla.py b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_ffront_fvm_nabla.py
index 6c6ca7e4bc..5c914ecede 100644
--- a/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_ffront_fvm_nabla.py
+++ b/tests/next_tests/integration_tests/multi_feature_tests/ffront_tests/test_ffront_fvm_nabla.py
@@ -11,10 +11,11 @@
 import numpy as np
 import pytest
 
+
 pytest.importorskip("atlas4py")
 
 from gt4py import next as gtx
-from gt4py.next import allocators, neighbor_sum
+from gt4py.next import _allocators, neighbor_sum
 from gt4py.next.iterator import atlas_utils
 
 from next_tests.integration_tests.feature_tests.ffront_tests.ffront_test_utils import (
diff --git a/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py b/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py
index 9dde5bb40a..af5dd03102 100644
--- a/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py
+++ b/tests/next_tests/unit_tests/embedded_tests/test_nd_array_field.py
@@ -6,6 +6,7 @@
 # Please, refer to the LICENSE file in the root directory.
 # SPDX-License-Identifier: BSD-3-Clause
 
+import copy
 import math
 import operator
 from typing import Callable, Iterable, Optional
@@ -261,6 +262,13 @@ def test_as_scalar(nd_array_implementation):
     assert isinstance(result, np.float32)
 
 
+@pytest.mark.parametrize("copy", [copy.copy, copy.deepcopy])
+def test_copy(copy, nd_array_implementation):
+    testee = _make_field_or_scalar([[0, 1], [2, 3]], nd_array_implementation)
+    result = copy(testee)
+    assert np.array_equal(testee.ndarray, result.ndarray)
+
+
 def product_nd_array_implementation_params():
     for xp1 in nd_array_field._nd_array_implementations:
         for xp2 in nd_array_field._nd_array_implementations:
diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py
index 62d88d9f0a..4ef69ec429 100644
--- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py
+++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/dace_tests/test_dace.py
@@ -173,13 +173,13 @@ def verify_testee(offset_provider):
         )
         mock_fast_call.assert_called_once()
 
-    if gtx.allocators.is_field_allocator_for(
+    if gtx._allocators.is_field_allocator_for(
         unstructured_case.backend.allocator, core_defs.DeviceType.CPU
     ):
         offset_provider = unstructured_case.offset_provider
     else:
-        assert gtx.allocators.is_field_allocator_for(
-            unstructured_case.backend.allocator, gtx.allocators.CUPY_DEVICE
+        assert gtx._allocators.is_field_allocator_for(
+            unstructured_case.backend.allocator, gtx._allocators.CUPY_DEVICE
         )
 
         import cupy as cp
diff --git a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py
index 3d82dd8ee5..3df9d11015 100644
--- a/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py
+++ b/tests/next_tests/unit_tests/program_processor_tests/runners_tests/test_gtfn.py
@@ -20,7 +20,7 @@
 """
 
 import gt4py._core.definitions as core_defs
-from gt4py.next import allocators, config
+from gt4py.next import _allocators, config
 from gt4py.next.iterator import transforms
 from gt4py.next.iterator.transforms import global_tmps
 from gt4py.next.otf import workflow
@@ -40,8 +40,8 @@ def test_backend_factory_trait_device():
     assert cpu_version.executor.decoration.keywords["device"] is core_defs.DeviceType.CPU
     assert gpu_version.executor.decoration.keywords["device"] is core_defs.DeviceType.CUDA
 
-    assert allocators.is_field_allocator_for(cpu_version.allocator, core_defs.DeviceType.CPU)
-    assert allocators.is_field_allocator_for(gpu_version.allocator, core_defs.DeviceType.CUDA)
+    assert _allocators.is_field_allocator_for(cpu_version.allocator, core_defs.DeviceType.CPU)
+    assert _allocators.is_field_allocator_for(gpu_version.allocator, core_defs.DeviceType.CUDA)
 
 
 def test_backend_factory_trait_cached():
diff --git a/tests/next_tests/unit_tests/test_allocators.py b/tests/next_tests/unit_tests/test_allocators.py
index d3001779e1..1aeff1d3ec 100644
--- a/tests/next_tests/unit_tests/test_allocators.py
+++ b/tests/next_tests/unit_tests/test_allocators.py
@@ -12,7 +12,7 @@
 import pytest
 
 import gt4py._core.definitions as core_defs
-import gt4py.next.allocators as next_allocators
+import gt4py.next._allocators as next_allocators
 import gt4py.next.common as common
 import gt4py.storage.allocators as core_allocators
 
@@ -26,7 +26,7 @@ def __gt_allocate__(
         dtype: core_defs.DType[core_defs.ScalarT],
         device_id: int = 0,
         aligned_index: Optional[Sequence[common.NamedIndex]] = None,
-    ) -> core_allocators.TensorBuffer[core_defs.DeviceTypeT, core_defs.ScalarT]:
+    ) -> core_defs.NDArrayObject:
         pass
 
 
@@ -108,7 +108,7 @@ def test_get_allocator():
 
 
 def test_horizontal_first_layout_mapper():
-    from gt4py.next.allocators import horizontal_first_layout_mapper
+    from gt4py.next._allocators import horizontal_first_layout_mapper
 
     # Test with only horizontal dimensions
     dims = [
@@ -152,7 +152,7 @@ def test_allocate(self):
 
 
 def test_allocate():
-    from gt4py.next.allocators import StandardCPUFieldBufferAllocator, allocate
+    from gt4py.next._allocators import StandardCPUFieldBufferAllocator, allocate
 
     I = common.Dimension("I")
     J = common.Dimension("J")
@@ -161,27 +161,25 @@ def test_allocate():
 
     # Test with a explicit field allocator
     allocator = StandardCPUFieldBufferAllocator()
-    tensor_buffer = allocate(domain, dtype, allocator=allocator)
+    tensor_buffer = allocate(domain=domain, dtype=dtype, allocator=allocator)
     assert tensor_buffer.shape == domain.shape
     assert tensor_buffer.dtype == dtype
-    assert tensor_buffer.device == core_defs.Device(core_defs.DeviceType.CPU, 0)
 
     # Test with a device
     device = core_defs.Device(core_defs.DeviceType.CPU, 0)
-    tensor_buffer = allocate(domain, dtype, device=device)
+    tensor_buffer = allocate(domain=domain, dtype=dtype, device=device)
     assert tensor_buffer.shape == domain.shape
     assert tensor_buffer.dtype == dtype
-    assert tensor_buffer.device == core_defs.Device(core_defs.DeviceType.CPU, 0)
 
     # Test with both allocator and device
     with pytest.raises(ValueError, match="are incompatible"):
         allocate(
-            domain,
-            dtype,
+            domain=domain,
+            dtype=dtype,
             allocator=allocator,
             device=core_defs.Device(core_defs.DeviceType.CUDA, 0),
         )
 
     # Test with no device or allocator
     with pytest.raises(ValueError, match="No 'device' or 'allocator' specified"):
-        allocate(domain, dtype)
+        allocate(domain=domain, dtype=dtype)
diff --git a/tests/next_tests/unit_tests/test_constructors.py b/tests/next_tests/unit_tests/test_constructors.py
index 0998ab8eab..7e592a687d 100644
--- a/tests/next_tests/unit_tests/test_constructors.py
+++ b/tests/next_tests/unit_tests/test_constructors.py
@@ -6,12 +6,28 @@
 # Please, refer to the LICENSE file in the root directory.
 # SPDX-License-Identifier: BSD-3-Clause
 
+import copy
+from types import ModuleType
+from typing import Any
+
 import numpy as np
+
+
+try:
+    import cupy as cp
+except ImportError:
+    cp = None
+
+try:
+    import jax.numpy as jnp
+except ImportError:
+    jnp = None
+
 import pytest
 
 from gt4py import next as gtx
 from gt4py._core import definitions as core_defs
-from gt4py.next import allocators as next_allocators, common
+from gt4py.next import _allocators as next_allocators, common
 
 
 I = gtx.Dimension("I")
@@ -21,16 +37,49 @@
 sizes = {I: 10, J: 10, K: 10}
 
 
-# TODO: parametrize with gpu backend and compare with cupy array
-@pytest.mark.parametrize(
-    "allocator, device",
-    [
-        [next_allocators.StandardCPUFieldBufferAllocator(), None],
-        [None, core_defs.Device(core_defs.DeviceType.CPU, 0)],
-    ],
-)
-def test_empty(allocator, device):
-    ref = np.empty([sizes[I], sizes[J]]).astype(gtx.float32)
+def _pretty_print(val):
+    if val is None:
+        return "None"
+    if isinstance(val, ModuleType):
+        return val.__name__
+    return val.__class__.__name__
+
+
+def _pretty_print_allocator_device_namespace(val: tuple[Any, Any, Any]):
+    return f"allocator={_pretty_print(val[0])}-device={_pretty_print(val[1])}-ref_namespace={_pretty_print(val[2])}"
+
+
+def allocator_device_refnamespace_params():
+    for v in [
+        [next_allocators.StandardCPUFieldBufferAllocator(), None, np],
+        [None, core_defs.Device(core_defs.DeviceType.CPU, 0), np],
+        [np, None, np],
+    ]:
+        yield pytest.param(
+            v,
+            id=_pretty_print_allocator_device_namespace(v),
+        )
+    for v in [
+        [next_allocators.StandardGPUFieldBufferAllocator(), None, cp],
+        [None, core_defs.Device(core_defs.DeviceType.CUDA, 0), cp],  # TODO CUDA or HIP...
+    ]:
+        yield pytest.param(
+            v, id=_pretty_print_allocator_device_namespace(v), marks=pytest.mark.requires_gpu
+        )
+    for v in [[jnp, None, jnp]]:
+        yield pytest.param(
+            v, id=_pretty_print_allocator_device_namespace(v), marks=pytest.mark.requires_jax
+        )
+
+
+@pytest.fixture(params=allocator_device_refnamespace_params())
+def allocator_device_refnamespace(request):
+    return request.param
+
+
+def test_empty(allocator_device_refnamespace):
+    allocator, device, xp = allocator_device_refnamespace
+    ref = xp.empty([sizes[I], sizes[J]]).astype(gtx.float32)
     a = gtx.empty(
         domain={I: range(sizes[I]), J: range(sizes[J])},
         dtype=core_defs.dtype(np.float32),
@@ -40,15 +89,8 @@ def test_empty(allocator, device):
     assert a.shape == ref.shape
 
 
-# TODO: parametrize with gpu backend and compare with cupy array
-@pytest.mark.parametrize(
-    "allocator, device",
-    [
-        [next_allocators.StandardCPUFieldBufferAllocator(), None],
-        [None, core_defs.Device(core_defs.DeviceType.CPU, 0)],
-    ],
-)
-def test_zeros(allocator, device):
+def test_zeros(allocator_device_refnamespace):
+    allocator, device, xp = allocator_device_refnamespace
     a = gtx.zeros(
         common.Domain(
             dims=(I, J), ranges=(common.UnitRange(0, sizes[I]), common.UnitRange(0, sizes[J]))
@@ -57,40 +99,26 @@ def test_zeros(allocator, device):
         allocator=allocator,
         device=device,
     )
-    ref = np.zeros((sizes[I], sizes[J])).astype(gtx.float32)
+    ref = xp.zeros((sizes[I], sizes[J])).astype(gtx.float32)
 
-    assert np.array_equal(a.ndarray, ref)
+    assert xp.array_equal(a.ndarray, ref)
 
 
-# TODO: parametrize with gpu backend and compare with cupy array
-@pytest.mark.parametrize(
-    "allocator, device",
-    [
-        [next_allocators.StandardCPUFieldBufferAllocator(), None],
-        [None, core_defs.Device(core_defs.DeviceType.CPU, 0)],
-    ],
-)
-def test_ones(allocator, device):
+def test_ones(allocator_device_refnamespace):
+    allocator, device, xp = allocator_device_refnamespace
     a = gtx.ones(
         common.Domain(dims=(I, J), ranges=(common.UnitRange(0, 10), common.UnitRange(0, 10))),
         dtype=core_defs.dtype(np.float32),
         allocator=allocator,
         device=device,
     )
-    ref = np.ones((sizes[I], sizes[J])).astype(gtx.float32)
+    ref = xp.ones((sizes[I], sizes[J])).astype(gtx.float32)
 
-    assert np.array_equal(a.ndarray, ref)
+    assert xp.array_equal(a.ndarray, ref)
 
 
-# TODO: parametrize with gpu backend and compare with cupy array
-@pytest.mark.parametrize(
-    "allocator, device",
-    [
-        [next_allocators.StandardCPUFieldBufferAllocator(), None],
-        [None, core_defs.Device(core_defs.DeviceType.CPU, 0)],
-    ],
-)
-def test_full(allocator, device):
+def test_full(allocator_device_refnamespace):
+    allocator, device, xp = allocator_device_refnamespace
     a = gtx.full(
         domain={I: range(sizes[I] - 2), J: (sizes[J] - 2)},
         fill_value=42.0,
@@ -98,9 +126,21 @@ def test_full(allocator, device):
         allocator=allocator,
         device=device,
     )
-    ref = np.full((sizes[I] - 2, sizes[J] - 2), 42.0).astype(gtx.float32)
+    ref = xp.full((sizes[I] - 2, sizes[J] - 2), 42.0).astype(gtx.float32)
+
+    assert xp.array_equal(a.ndarray, ref)
 
-    assert np.array_equal(a.ndarray, ref)
+
+def test_copy():
+    """Ensure data AND layout is preserved."""
+
+    testee = gtx.as_field([I, J], np.random.rand(sizes[I], sizes[J]))
+    result = copy.deepcopy(testee)
+    assert testee.ndarray.strides == result.ndarray.strides
+    assert (
+        result.ndarray.strides != result.ndarray.copy().strides
+    )  # sanity check for this test, make sure our allocator don't have C-contiguous strides
+    assert np.array_equal(testee.ndarray, result.ndarray)
 
 
 def test_as_field():
@@ -144,9 +184,6 @@ def test_field_wrong_origin():
     with pytest.raises(ValueError, match=(r"Origin keys {'J'} not in domain")):
         gtx.as_field([I], np.random.rand(sizes[I]).astype(gtx.float32), origin={"J": 0})
 
-    with pytest.raises(ValueError, match=(r"Cannot specify origin for domain I")):
-        gtx.as_field("I", np.random.rand(sizes[J]).astype(gtx.float32), origin={"J": 0})
-
 
 @pytest.mark.xfail(reason="aligned_index not supported yet")
 def test_aligned_index():
diff --git a/tests/storage_tests/unit_tests/test_interface.py b/tests/storage_tests/unit_tests/test_interface.py
index ba7bc2aaef..038b09aa4c 100644
--- a/tests/storage_tests/unit_tests/test_interface.py
+++ b/tests/storage_tests/unit_tests/test_interface.py
@@ -121,16 +121,7 @@ def test_allocate_cpu(param_dict):
     shape = param_dict["shape"]
     layout_map = param_dict["layout_order"]
 
-    raw_buffer, field = allocate_cpu(shape, layout_map, dtype, alignment_bytes, aligned_index)
-
-    # check that memory of field is contained in raw_buffer
-    np_byte_bounds = (
-        np.byte_bounds if hasattr(np, "byte_bounds") else np.lib.array_utils.byte_bounds
-    )
-    assert (
-        np_byte_bounds(field)[0] >= np_byte_bounds(raw_buffer)[0]
-        and np_byte_bounds(field)[1] <= np_byte_bounds(raw_buffer)[1]
-    )
+    field = allocate_cpu(shape, layout_map, dtype, alignment_bytes, aligned_index)
 
     # check if the first compute-domain point in the last dimension is aligned for 100 random "columns"
     import random
@@ -185,17 +176,7 @@ def test_allocate_gpu(param_dict):
     aligned_index = param_dict["aligned_index"]
     shape = param_dict["shape"]
     layout_map = param_dict["layout_order"]
-    device_raw_buffer, device_field = allocate_gpu(
-        shape, layout_map, dtype, alignment_bytes, aligned_index
-    )
-
-    # Would like to check device_field.base against device_raw_buffer but
-    # as_strided returns an ndarray where device_field.base is set to None.
-    # Instead, check that the memory of field is contained in raws buffer
-    assert (
-        device_field.data.ptr >= device_raw_buffer.data.ptr
-        and device_field[-1:].data.ptr <= device_raw_buffer[-1:].data.ptr
-    )
+    device_field = allocate_gpu(shape, layout_map, dtype, alignment_bytes, aligned_index)
 
     # check if the first compute-domain point in the last dimension is aligned for 100 random "columns"
     import random