Skip to content

Commit

Permalink
Revert "[Datasets] Fix ndarray representation of single-element ragge…
Browse files Browse the repository at this point in the history
…d tensor slices. (ray-project#30514)" (ray-project#30709)

This reverts commit 36aebcb.

Reverts ray-project#30514

This is causing linux://python/ray/data:tests/test_transform_pyarrow to fail.
  • Loading branch information
stephanie-wang authored Nov 28, 2022
1 parent 79783bd commit 579770a
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 45 deletions.
9 changes: 1 addition & 8 deletions python/ray/air/tests/test_tensor_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,14 +178,7 @@ def test_arrow_variable_shaped_tensor_array_slice():
slice(0, 3),
]
for slice_ in slices:
ata_slice = ata[slice_]
ata_slice_np = ata_slice.to_numpy()
arr_slice = arr[slice_]
# Check for equivalent dtypes and shapes.
assert ata_slice_np.dtype == arr_slice.dtype
assert ata_slice_np.shape == arr_slice.shape
# Iteration over tensor array slices triggers NumPy conversion.
for o, e in zip(ata_slice, arr_slice):
for o, e in zip(ata[slice_], arr[slice_]):
np.testing.assert_array_equal(o, e)


Expand Down
12 changes: 4 additions & 8 deletions python/ray/air/util/tensor_extensions/arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@
import numpy as np
import pyarrow as pa

from ray.air.util.tensor_extensions.utils import (
_is_ndarray_variable_shaped_tensor,
_create_strict_ragged_ndarray,
)
from ray.air.util.tensor_extensions.utils import _is_ndarray_variable_shaped_tensor
from ray._private.utils import _get_pyarrow_version
from ray.util.annotations import PublicAPI

Expand Down Expand Up @@ -667,11 +664,10 @@ def from_numpy(
np_data_buffer = np.concatenate(raveled)
dtype = np_data_buffer.dtype
if dtype.type is np.object_:
types_and_shapes = [(f"dtype={a.dtype}", f"shape={a.shape}") for a in arr]
raise ValueError(
"ArrowVariableShapedTensorArray only supports heterogeneous-shaped "
"tensor collections, not arbitrarily nested ragged tensors. Got "
f"arrays: {types_and_shapes}"
"tensor collections, not arbitrarily nested ragged tensors. Got: "
f"{arr}"
)
pa_dtype = pa.from_numpy_dtype(dtype)
if dtype.type is np.bool_:
Expand Down Expand Up @@ -724,7 +720,7 @@ def _to_numpy(self, index: Optional[int] = None, zero_copy_only: bool = False):
arrs = [self._to_numpy(i, zero_copy_only) for i in range(len(self))]
# Return ragged NumPy ndarray in the ndarray of ndarray pointers
# representation.
return _create_strict_ragged_ndarray(arrs)
return np.array(arrs, dtype=object)
data = self.storage.field("data")
shapes = self.storage.field("shape")
value_type = data.type.value_type
Expand Down
16 changes: 9 additions & 7 deletions python/ray/air/util/tensor_extensions/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,7 @@
from pandas.core.indexers import check_array_indexer, validate_indices
from pandas.io.formats.format import ExtensionArrayFormatter

from ray.air.util.tensor_extensions.utils import (
_is_ndarray_variable_shaped_tensor,
_create_strict_ragged_ndarray,
)
from ray.air.util.tensor_extensions.utils import _is_ndarray_variable_shaped_tensor
from ray.util.annotations import PublicAPI

try:
Expand Down Expand Up @@ -1425,7 +1422,9 @@ def _is_boolean(self):


def _create_possibly_ragged_ndarray(
values: Union[np.ndarray, ABCSeries, Sequence[np.ndarray]]
values: Union[
np.ndarray, ABCSeries, Sequence[Union[np.ndarray, TensorArrayElement]]
]
) -> np.ndarray:
"""
Create a possibly ragged ndarray.
Expand All @@ -1439,8 +1438,11 @@ def _create_possibly_ragged_ndarray(
return np.array(values, copy=False)
except ValueError as e:
if "could not broadcast input array from shape" in str(e):
# Fall back to strictly creating a ragged ndarray.
return _create_strict_ragged_ndarray(values)
# Create an empty object-dtyped 1D array.
arr = np.empty(len(values), dtype=object)
# Try to fill the 1D array of pointers with the (ragged) tensors.
arr[:] = list(values)
return arr
else:
# Re-raise original error if the failure wasn't a broadcast error.
raise e from None
Expand Down
22 changes: 0 additions & 22 deletions python/ray/air/util/tensor_extensions/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
from typing import Any

import numpy as np


Expand All @@ -22,23 +20,3 @@ def _is_ndarray_variable_shaped_tensor(arr: np.ndarray) -> bool:
if a.shape != shape:
return True
return True


def _create_strict_ragged_ndarray(values: Any) -> np.ndarray:
"""Create a ragged ndarray; the representation will be ragged (1D array of
subndarray pointers) even if it's possible to represent it as a non-ragged ndarray.
"""
# Use the create-empty-and-fill method. This avoids the following pitfalls of the
# np.array constructor - np.array(values, dtype=object):
# 1. It will fail to construct an ndarray if the first element dimension is
# uniform, e.g. for imagery whose first element dimension is the channel.
# 2. It will construct the wrong representation for a single-row column (i.e. unit
# outer dimension). Namely, it will consolidate it into a single multi-dimensional
# ndarray rather than a 1D array of subndarray pointers, resulting in the single
# row not being well-typed (having object dtype).

# Create an empty object-dtyped 1D array.
arr = np.empty(len(values), dtype=object)
# Try to fill the 1D array of pointers with the (ragged) tensors.
arr[:] = list(values)
return arr

0 comments on commit 579770a

Please sign in to comment.