Skip to content

Commit

Permalink
refactor: change Content._getitem_range to use explicit indices (#2220
Browse files Browse the repository at this point in the history
)

* wip: use `unknown_length`

* refactor: remove last vestiges of `XXX_shape_item`

* refactor: move `unknown_length` to new submodule

* fix: ensure forms with `unknown_length` serialise to JSON

* chore: appease pylint

* fix: revert `length=unset` change to `RecordArray.__init__`

* refactor: use defined `str()` of `unknown_length`

* feat!: change API for `_getitem_range`

* docs: improve docstring

* refactor: remove unused code

* refactor: add type hints

* fix: support `content` with unknown lengths

* refactor: make type relationship explicit

* fix: properly normalise slice

* refactor: add `IndexType` distinction

* wip: fix slice normalisation

* fix: proper slice handling

* refactor: use US spelling

* refactor: move slice expansion to nplike

* fix: ufuncs that return tuples

* test: add tests for slicing

* fix: support slice length

* fix: bugs in length detection

* refactor: use `IndexType` instead of `SupportsIndex`
  • Loading branch information
agoose77 authored Feb 9, 2023
1 parent cb19001 commit 69df2f2
Show file tree
Hide file tree
Showing 20 changed files with 364 additions and 191 deletions.
2 changes: 1 addition & 1 deletion src/awkward/_connect/numba/arrayview.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def __init__(self, type, behavior, lookup, pos, start, stop, fields):

def toarray(self):
layout = self.type.tolayout(self.lookup, self.pos, self.fields)
sliced = layout._getitem_range(slice(self.start, self.stop))
sliced = layout._getitem_range(self.start, self.stop)
return ak._util.wrap(sliced, self.behavior)


Expand Down
34 changes: 24 additions & 10 deletions src/awkward/_nplikes/array_module.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
from __future__ import annotations

import math

import numpy

import awkward as ak
from awkward._nplikes.numpylike import (
ArrayLike,
NumpyLike,
NumpyMetadata,
)
from awkward._nplikes.numpylike import ArrayLike, IndexType, NumpyLike, NumpyMetadata
from awkward._nplikes.shape import ShapeItem, unknown_length
from awkward.typing import Final, Literal, SupportsInt
from awkward.typing import Final, Literal

np = NumpyMetadata.instance()

Expand Down Expand Up @@ -134,21 +132,37 @@ def reshape(
else:
return result

def shape_item_as_index(self, x1: ShapeItem) -> int | ArrayLike:
def shape_item_as_index(self, x1: ShapeItem) -> int:
if x1 is unknown_length:
raise ak._errors.wrap_error(
TypeError("array module nplikes do not support unknown lengths")
)
elif isinstance(x1, int):
return self._module.asarray(x1, dtype=np.int64)
return x1
else:
raise ak._errors.wrap_error(
TypeError(f"expected None or int type, received {x1}")
)

def index_as_shape_item(self, x1: int | ArrayLike) -> ShapeItem:
def index_as_shape_item(self, x1: IndexType) -> int:
return int(x1)

def derive_slice_for_length(
self, slice_: slice, length: ShapeItem
) -> tuple[IndexType, IndexType, IndexType, ShapeItem]:
"""
Args:
slice_: normalized slice object
length: length of layout
Return a tuple of (start, stop, step) indices into a layout, suitable for
`_getitem_range` (if step == 1). Normalize lengths to fit length of array,
and for arrays with unknown lengths, these offsets become none.
"""
start, stop, step = slice_.indices(length)
slice_length = math.ceil((stop - start) / step)
return start, stop, step, slice_length

def nonzero(self, x: ArrayLike) -> tuple[ArrayLike, ...]:
return self._module.nonzero(x)

Expand Down Expand Up @@ -209,7 +223,7 @@ def unpackbits(
) -> ArrayLike:
return self._module.unpackbits(x, axis=axis, count=count, bitorder=bitorder)

def broadcast_to(self, x: ArrayLike, shape: tuple[SupportsInt, ...]) -> ArrayLike:
def broadcast_to(self, x: ArrayLike, shape: tuple[ShapeItem, ...]) -> ArrayLike:
return self._module.broadcast_to(x, shape)

############################ ufuncs
Expand Down
24 changes: 20 additions & 4 deletions src/awkward/_nplikes/numpylike.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,19 @@

import numpy

from awkward._nplikes.shape import ShapeItem
from awkward._nplikes.shape import ShapeItem, unknown_length
from awkward._singleton import Singleton
from awkward.typing import (
Literal,
Protocol,
Self,
SupportsIndex,
# noqa: F401
TypeAlias,
overload,
)

IndexType: TypeAlias = "int | ArrayLike"


class ArrayLike(Protocol):
@property
Expand Down Expand Up @@ -294,12 +296,26 @@ def broadcast_arrays(self, *arrays: ArrayLike) -> list[ArrayLike]:
def broadcast_to(self, x: ArrayLike, shape: tuple[ShapeItem, ...]) -> ArrayLike:
...

@overload
def shape_item_as_index(self, x1: int) -> int:
...

@overload
def shape_item_as_index(self, x1: type[unknown_length]) -> ArrayLike:
...

@abstractmethod
def shape_item_as_index(self, x1):
...

@abstractmethod
def shape_item_as_index(self, x1: ShapeItem):
def index_as_shape_item(self, x1: IndexType) -> ShapeItem:
...

@abstractmethod
def index_as_shape_item(self, x1) -> ShapeItem:
def derive_slice_for_length(
self, slice_: slice, length: ShapeItem
) -> tuple[IndexType, IndexType, IndexType, ShapeItem]:
...

@abstractmethod
Expand Down
8 changes: 2 additions & 6 deletions src/awkward/_nplikes/shape.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
from __future__ import annotations

from awkward.typing import (
Self,
SupportsInt, # noqa: F401
TypeAlias,
)
from awkward.typing import Self, TypeAlias

ShapeItem: TypeAlias = "SupportsInt | _UnknownLength"
ShapeItem: TypeAlias = "int | _UnknownLength"


class _UnknownLength:
Expand Down
137 changes: 95 additions & 42 deletions src/awkward/_nplikes/typetracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,10 @@

import awkward as ak
from awkward._errors import wrap_error
from awkward._nplikes.numpylike import ArrayLike, NumpyLike, NumpyMetadata
from awkward._nplikes.numpylike import ArrayLike, IndexType, NumpyLike, NumpyMetadata
from awkward._nplikes.shape import ShapeItem, unknown_length
from awkward._util import NDArrayOperatorsMixin, is_non_string_like_sequence
from awkward.typing import (
Any,
Final,
Literal,
Self,
SupportsIndex,
SupportsInt,
TypeVar,
)
from awkward.typing import Any, Final, Literal, Self, SupportsIndex, TypeVar

np = NumpyMetadata.instance()

Expand Down Expand Up @@ -204,17 +196,6 @@ def typetracer_with_report(form, forget_length=True):
return layout, report


def _length_after_slice(slice, original_length):
start, stop, step = slice.indices(original_length)
assert step != 0

if (step > 0 and stop - start > 0) or (step < 0 and stop - start < 0):
d, m = divmod(abs(start - stop), abs(step))
return d + (1 if m != 0 else 0)
else:
return 0


class TypeTracerArray(NDArrayOperatorsMixin, ArrayLike):
_dtype: numpy.dtype
_shape: tuple[ShapeItem, ...]
Expand Down Expand Up @@ -389,17 +370,6 @@ def __len__(self):
)
)

def _resolve_slice_length(self, length, slice_):
if length is unknown_length:
return unknown_length
elif any(
is_unknown_scalar(x) for x in (slice_.start, slice_.stop, slice_.step)
):
return unknown_length
else:
start, stop, step = slice_.indices(length)
return min((stop - start) // step, length)

def __getitem__(
self,
key: SupportsIndex
Expand Down Expand Up @@ -493,7 +463,12 @@ def __getitem__(
previous_item_is_basic = False
# Slice
elif isinstance(item, slice):
slice_length = self._resolve_slice_length(dimension_length, item)
(
start,
stop,
step,
slice_length,
) = self.nplike.derive_slice_for_length(item, dimension_length)
result_shape_parts.append((slice_length,))
previous_item_is_basic = True
# Integer
Expand Down Expand Up @@ -597,7 +572,15 @@ def _apply_ufunc(self, ufunc, *inputs):
placeholders = [numpy.empty(0, x.dtype) for x in broadcasted]

result = ufunc(*placeholders)
return TypeTracerArray._new(result.dtype, shape=broadcasted[0].shape)
if isinstance(result, numpy.ndarray):
return TypeTracerArray._new(result.dtype, shape=broadcasted[0].shape)
elif isinstance(result, tuple):
return (
TypeTracerArray._new(x.dtype, shape=b.shape)
for x, b in zip(result, broadcasted)
)
else:
raise wrap_error(TypeError)

def to_rectilinear(self, array, *args, **kwargs):
try_touch_shape(array)
Expand Down Expand Up @@ -811,25 +794,95 @@ def promote_scalar(self, obj) -> TypeTracerArray:
else:
raise wrap_error(TypeError(f"expected scalar type, received {obj}"))

def shape_item_as_index(self, x1: ShapeItem) -> int | TypeTracerArray:
def shape_item_as_index(self, x1: ShapeItem) -> IndexType:
if x1 is unknown_length:
return TypeTracerArray._new(np.int64, shape=())
elif isinstance(x1, int):
return x1
else:
raise wrap_error(TypeError(f"expected None or int type, received {x1}"))

def index_as_shape_item(self, x1: int | ArrayLike) -> ShapeItem:
def index_as_shape_item(self, x1: IndexType) -> ShapeItem:
if is_unknown_scalar(x1) and np.issubdtype(x1.dtype, np.integer):
return unknown_length
else:
return int(x1)

def broadcast_shapes(
self, *shapes: tuple[SupportsInt, ...]
) -> tuple[SupportsInt, ...]:
def derive_slice_for_length(
self, slice_: slice, length: ShapeItem
) -> tuple[IndexType, IndexType, IndexType, ShapeItem]:
"""
Args:
slice_: normalized slice object
length: length of layout
Return a tuple of (start, stop, step, length) indices into a layout, suitable for
`_getitem_range` (if step == 1). Normalize lengths to fit length of array,
and for arrays with unknown lengths, these offsets become none.
"""
start = slice_.start
stop = slice_.stop
step = slice_.step

# Unknown lengths mean that the slice index is unknown
length_scalar = self.shape_item_as_index(length)
if length is unknown_length:
return length_scalar, length_scalar, step, length
else:
# Normalise `None` values
if step is None:
step = 1

if start is None:
# `step` is unknown → `start` is unknown
if is_unknown_scalar(step):
start = step
elif step < 0:
start = length_scalar - 1
else:
start = 0
# Normalise negative integers
elif not is_unknown_scalar(start):
if start < 0:
start = start + length_scalar
# Clamp values into length bounds
if is_unknown_scalar(length_scalar):
start = length_scalar
else:
start = min(max(start, 0), length_scalar)

if stop is None:
# `step` is unknown → `stop` is unknown
if is_unknown_scalar(step):
stop = step
elif step < 0:
stop = -1
else:
stop = length_scalar
# Normalise negative integers
elif not is_unknown_scalar(stop):
if stop < 0:
stop = stop + length_scalar
# Clamp values into length bounds
if is_unknown_scalar(length_scalar):
stop = length_scalar
else:
stop = min(max(stop, 0), length_scalar)

# Compute the length of the slice for downstream use
slice_length, remainder = divmod((stop - start), step)
if not is_unknown_scalar(slice_length):
# Take ceiling of division
if remainder != 0:
slice_length += 1

slice_length = max(0, slice_length)

return start, stop, step, self.index_as_shape_item(slice_length)

def broadcast_shapes(self, *shapes: tuple[ShapeItem, ...]) -> tuple[ShapeItem, ...]:
ndim = max([len(s) for s in shapes], default=0)
result: list[SupportsInt] = [1] * ndim
result: list[ShapeItem] = [1] * ndim

for shape in shapes:
# Right broadcasting
Expand Down Expand Up @@ -882,7 +935,7 @@ def broadcast_arrays(self, *arrays: ArrayLike) -> list[TypeTracerArray]:
return [TypeTracerArray._new(x.dtype, shape=shape) for x in all_arrays]

def broadcast_to(
self, x: ArrayLike, shape: tuple[SupportsInt, ...]
self, x: ArrayLike, shape: tuple[ShapeItem, ...]
) -> TypeTracerArray:
raise ak._errors.wrap_error(NotImplementedError)

Expand Down Expand Up @@ -943,7 +996,7 @@ def nonzero(self, x: ArrayLike) -> tuple[TypeTracerArray, ...]:

def unique_values(self, x: ArrayLike) -> TypeTracerArray:
try_touch_data(x)
return TypeTracerArray._new(x.dtype)
return TypeTracerArray._new(x.dtype, shape=(None,))

def concat(self, arrays, *, axis: int | None = 0) -> TypeTracerArray:
if axis is None:
Expand Down
Loading

0 comments on commit 69df2f2

Please sign in to comment.