Skip to content

Commit

Permalink
Merge pull request #375 from rtosholdings/latest-v1.17.0
Browse files Browse the repository at this point in the history
v1.17.0
  • Loading branch information
rtosholdings-bot authored Apr 23, 2024
2 parents 4fa7152 + de3e6a3 commit 032583b
Show file tree
Hide file tree
Showing 16 changed files with 808 additions and 381 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
![](https://riptable.readthedocs.io/en/stable/_static/riptable_logo.PNG)

An open-source, 64-bit Python analytics engine for high-performance data analysis with
multithreading support. Riptable supports Python 3.10 through 3.11 on 64-bit Linux and
multithreading support. Riptable supports Python 3.10 through 3.12 on 64-bit Linux and
Windows.

Similar to Pandas and based on NumPy, Riptable optimizes analyzing large volumes of data
Expand Down
2 changes: 1 addition & 1 deletion conda_recipe/conda_build_config.yaml
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
python:
- 3.11
- 3.12
2 changes: 1 addition & 1 deletion conda_recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ requirements:
- pandas >=1.0,<3.0
- python
- python-dateutil
- riptide_cpp >=1.17.0,<2 # run with any (compatible) version in this range
- riptide_cpp >=1.19.0,<2 # run with any (compatible) version in this range
- typing-extensions >=4.9.0

about:
Expand Down
71 changes: 62 additions & 9 deletions dev_tools/_docstring_config.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,65 @@
import riptable
import contextlib

# Standardized riptable configuration settings applied when doing docstring validation.

# Standardize on these display settings when executing examples
riptable.Display.FORCE_REPR = True # Don't auto-detect console dimensions, just use CONSOLE_X/Y
riptable.Display.options.COL_MAX = 1_000_000 # display all Dataset columns (COL_ALL is incomplete)
riptable.Display.options.E_MAX = 100_000_000 # render up to 100MM before using scientific notation
riptable.Display.options.P_THRESHOLD = 0 # truncate small decimals, rather than scientific notation
riptable.Display.options.NUMBER_SEPARATOR = True # put commas in numbers
riptable.Display.options.HEAD_ROWS = 3
riptable.Display.options.TAIL_ROWS = 3
def _setup_display_config():
"""Initialize display config settings.
Any options that can be modified should be set here, even if set to default values.
"""
riptable.Display.FORCE_REPR = True # Don't auto-detect console dimensions, just use CONSOLE_X/Y
riptable.Display.options.CONSOLE_X = 150
riptable.Display.options.COL_MAX = 1_000_000 # display all Dataset columns (COL_ALL is incomplete)
riptable.Display.options.E_MAX = 100_000_000 # render up to 100MM before using scientific notation
riptable.Display.options.P_THRESHOLD = 0 # truncate small decimals, rather than scientific notation
riptable.Display.options.NUMBER_SEPARATOR = True # put commas in numbers
riptable.Display.options.HEAD_ROWS = 3
riptable.Display.options.TAIL_ROWS = 3
riptable.Display.options.ROW_ALL = False
riptable.Display.options.COL_ALL = False
riptable.Display.options.MAX_STRING_WIDTH = 15


def setup_init_config():
"""Initialize all config settings. Typically only done once."""
_setup_display_config()


class ScopedExampleSetup(contextlib.AbstractContextManager):
"""Context manager to clean up after any changes made during example setup."""

_CLEANUP_CALLBACKS = []

@staticmethod
def add_cleanup_callback(fn):
ScopedExampleSetup._CLEANUP_CALLBACKS.append(fn)

def __enter__(self) -> None:
return super().__enter__()

def __exit__(self, exc_type, exc_value, traceback) -> bool | None:
callbacks = ScopedExampleSetup._CLEANUP_CALLBACKS
ScopedExampleSetup._CLEANUP_CALLBACKS = []
for callback in callbacks:
callback()
return super().__exit__(exc_type, exc_value, traceback)


def setup_for_examples(*configs: tuple[str]):
"""Applies specified config setups for an example.
Configs are applied in order.
Any modifications done here need to be undone by registering a cleanup task with ScopedExampleSetup.
"""

for config in configs:
if config == "struct-display":
riptable.Display.options.CONSOLE_X = 120
riptable.Display.options.HEAD_ROWS = 15
riptable.Display.options.TAIL_ROWS = 15
ScopedExampleSetup.add_cleanup_callback(_setup_display_config) # reset all display configs.

else:
raise NotImplementedError(f"Unknown config, {config}")


# Initialize all config globally.
setup_init_config()
8 changes: 0 additions & 8 deletions dev_tools/docstring_xfails.txt
Original file line number Diff line number Diff line change
Expand Up @@ -273,18 +273,14 @@ riptable.rt_datetime.DateTimeCommon.tz_offset
riptable.rt_datetime.DateTimeCommon.year
riptable.rt_datetime.DateTimeCommon.yyyymmdd
riptable.rt_datetime.DateTimeNano.cut_time
riptable.rt_datetime.DateTimeNano.diff
riptable.rt_datetime.DateTimeNano.display_convert_func
riptable.rt_datetime.DateTimeNano.fill_invalid
riptable.rt_datetime.DateTimeNano.get_scalar
riptable.rt_datetime.DateTimeNano.hstack
riptable.rt_datetime.DateTimeNano.info
riptable.rt_datetime.DateTimeNano.isfinite
riptable.rt_datetime.DateTimeNano.isnotfinite
riptable.rt_datetime.DateTimeNano.newclassfrominstance
riptable.rt_datetime.DateTimeNano.random
riptable.rt_datetime.DateTimeNano.random_invalid
riptable.rt_datetime.DateTimeNano.resample
riptable.rt_datetime.DateTimeNano.shift
riptable.rt_datetime.DateTimeNano.to_arrow
riptable.rt_datetime.DateTimeNanoScalar
Expand Down Expand Up @@ -771,10 +767,6 @@ riptable.rt_str.FAString.substr_char_stop
riptable.rt_str.FAString.upper
riptable.rt_str.FAString.upper_inplace
riptable.rt_struct.Struct
riptable.rt_struct.Struct._A
riptable.rt_struct.Struct._G
riptable.rt_struct.Struct._H
riptable.rt_struct.Struct._V
riptable.rt_struct.Struct.all
riptable.rt_struct.Struct.any
riptable.rt_struct.Struct.apply_schema
Expand Down
67 changes: 36 additions & 31 deletions dev_tools/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@
# With template backend, matplotlib plots nothing
matplotlib.use("template")

# Apply riptable docstring configuration for examples.
from _docstring_config import *
# Riptable docstring configuration setup for examples.
import _docstring_config


ERROR_MSGS = {
Expand Down Expand Up @@ -101,6 +101,10 @@
"rt": riptable,
}

ATTRIBS_CONTEXT = {
"setup_for_examples": _docstring_config.setup_for_examples,
}


def riptable_error(code, **kwargs):
"""
Expand Down Expand Up @@ -210,10 +214,11 @@ def examples_errors(self):
error_msgs = ""
current_dir = set(os.listdir())
tempdir = pathlib.Path("tempdir") # special reserved directory for temporary files; will be deleted per test
for test in finder.find(self.raw_doc, self.name, globs=IMPORT_CONTEXT):
for test in finder.find(self.raw_doc, self.name, globs=dict(**IMPORT_CONTEXT, **ATTRIBS_CONTEXT)):
tempdir.mkdir()
f = io.StringIO()
failed_examples, total_examples = runner.run(test, out=f.write)
with _docstring_config.ScopedExampleSetup():
failed_examples, total_examples = runner.run(test, out=f.write)
if failed_examples:
error_msgs += f.getvalue()
shutil.rmtree(tempdir)
Expand Down Expand Up @@ -319,11 +324,11 @@ def non_hyphenated_array_like(self):

def riptable_validate(
func_name: str,
errors: typing.Optional(list[str]) = None,
not_errors: typing.Optional(list[str]) = None,
flake8_errors: typing.Optional(list[str]) = None,
flake8_not_errors: typing.Optional(list[str]) = None,
xfails: typing.Optional(list[str]) = None,
errors: typing.Optional[list[str]] = None,
not_errors: typing.Optional[list[str]] = None,
flake8_errors: typing.Optional[list[str]] = None,
flake8_not_errors: typing.Optional[list[str]] = None,
xfails: typing.Optional[list[str]] = None,
verbose: bool = False,
):
"""
Expand Down Expand Up @@ -513,8 +518,8 @@ def is_default_excluded(fullname: str) -> bool:

def is_included(
fullname: str,
includes: typing.Optional(list[str]) = None,
excludes: typing.Optional(list[str]) = None,
includes: typing.Optional[list[str]] = None,
excludes: typing.Optional[list[str]] = None,
) -> bool:
"""Indicates whether the name should be included in validation."""

Expand All @@ -537,14 +542,14 @@ def validate_all(
match: str,
not_match: str = None,
names_from: str = NAMES_FROM_OPTS[0],
errors: typing.Optional(list[str]) = None,
not_errors: typing.Optional(list[str]) = None,
flake8_errors: typing.Optional(list[str]) = None,
flake8_not_errors: typing.Optional(list[str]) = None,
errors: typing.Optional[list[str]] = None,
not_errors: typing.Optional[list[str]] = None,
flake8_errors: typing.Optional[list[str]] = None,
flake8_not_errors: typing.Optional[list[str]] = None,
ignore_deprecated: bool = False,
includes: typing.Optional(list[str]) = None,
excludes: typing.Optional(list[str]) = None,
xfails: typing.Optional(list[str]) = None,
includes: typing.Optional[list[str]] = None,
excludes: typing.Optional[list[str]] = None,
xfails: typing.Optional[list[str]] = None,
verbose: int = 0,
) -> dict:
"""
Expand Down Expand Up @@ -650,17 +655,17 @@ def print_validate_all_results(
match: str,
not_match: str = None,
names_from: str = NAMES_FROM_OPTS[0],
errors: typing.Optional(list[str]) = None,
not_errors: typing.Optional(list[str]) = None,
flake8_errors: typing.Optional(list[str]) = None,
flake8_not_errors: typing.Optional(list[str]) = None,
errors: typing.Optional[list[str]] = None,
not_errors: typing.Optional[list[str]] = None,
flake8_errors: typing.Optional[list[str]] = None,
flake8_not_errors: typing.Optional[list[str]] = None,
out_format: str = OUT_FORMAT_OPTS[0],
ignore_deprecated: bool = False,
includes: typing.Optional(list[str]) = None,
excludes: typing.Optional(list[str]) = None,
xfails: typing.Optional(list[str]) = None,
includes: typing.Optional[list[str]] = None,
excludes: typing.Optional[list[str]] = None,
xfails: typing.Optional[list[str]] = None,
outfile: typing.IO = sys.stdout,
outfailsfile: typing.Optional(typing.IO) = None,
outfailsfile: typing.Optional[typing.IO] = None,
verbose: int = 0,
):
if out_format not in OUT_FORMAT_OPTS:
Expand Down Expand Up @@ -713,11 +718,11 @@ def print_validate_all_results(

def print_validate_one_results(
func_name: str,
errors: typing.Optional(list[str]) = None,
not_errors: typing.Optional(list[str]) = None,
flake8_errors: typing.Optional(list[str]) = None,
flake8_not_errors: typing.Optional(list[str]) = None,
xfails: typing.Optional(list[str]) = None,
errors: typing.Optional[list[str]] = None,
not_errors: typing.Optional[list[str]] = None,
flake8_errors: typing.Optional[list[str]] = None,
flake8_not_errors: typing.Optional[list[str]] = None,
xfails: typing.Optional[list[str]] = None,
outfile: typing.IO = sys.stdout,
verbose: int = 0,
):
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ dependencies = [
"numpy >=1.23",
"pandas >=1.0,<3.0",
"python-dateutil",
"riptide_cpp >=1.17.0,<2",
"riptide_cpp >=1.19.0,<2",
"typing-extensions >=4.9.0",
]
classifiers = [
"Development Status :: 4 - Beta",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"License :: OSI Approved :: BSD License",
"Operating System :: OS Independent",
]
Expand Down
55 changes: 36 additions & 19 deletions riptable/rt_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2462,9 +2462,16 @@ def fill_backward(self, *args, limit: int = 0, fill_val=None, inplace: bool = Fa
# ------------------------------------------------------------
def isfiltered(self) -> FastArray:
"""
True where bin == 0.
Only applies to categoricals with base index 1, otherwise returns all False.
Different than invalid category.
Returns a boolean array of whether each category value is filtered.
For base-0 categoricals, return all False.
For base-1 categoricals, returns True where bin == 0.
For dict-based categoricals, returns True for values that don't exist in the provided mapping.
Returns
-------
out: FastArray
FastArray of bools.
See Also
--------
Expand All @@ -2473,8 +2480,11 @@ def isfiltered(self) -> FastArray:
"""
if self.base_index == 1:
return self._fa == 0
else:
return zeros(len(self), dtype=bool)

if self.base_index is None:
return self._fa.isin(self._grouping._enum.code_array, invert=True)

return zeros(len(self), dtype=bool)

# ------------------------------------------------------------
def set_name(self, name) -> Categorical:
Expand Down Expand Up @@ -4876,29 +4886,36 @@ def ilastkey(self):
@property
def unique_count(self):
"""
Number of unique values in the categorical.
It is necessary for every groupby operation.
Number of unique values in the :py:class:`~.rt_categorical.Categorical`.
Notes
-----
For categoricals in dict / enum mode that have generated their grouping object, this
will reflect the number of unique values that `occur` in the non-unique values. Empty
bins will not be included in the count.
This property is used for every groupby operation.
For :py:class:`~.rt_categorical.Categorical` objects constructed from dictionaries or
:py:class:`~enum.IntEnum` objects, the returned count includes unique invalid values from the
underlying array. Otherwise, invalid values are not counted.
See Also
--------
:py:meth:`.rt_categorical.Categorical.nunique` : Number of unique values in the :py:class:`~.rt_categorical.Categorical`.
:py:meth:`.rt_groupbyops.GroupByOps.count_uniques` : Count the unique values for each group.
"""
return self.grouping.unique_count

# ------------------------------------------------------------
def nunique(self):
"""
Number of unique values that occur in the Categorical.
Does not include invalids. Not the same as the length of possible uniques.
Number of unique values in the :py:class:`~.rt_categorical.Categorical`.
Not the same as the length of possible uniques.
Categoricals based on dictionary mapping / enum will return unique count including all possibly
invalid values from underlying array.
For :py:class:`~.rt_categorical.Categorical` objects constructed from dictionaries or
:py:class:`~enum.IntEnum` objects, the returned count includes unique invalid values from the
underlying array. Otherwise, invalid values are not counted.
See Also
--------
Categorical.unique_count
:py:attr:`.rt_categorical.Categorical.unique_count` : Number of unique values in the :py:class:`~.rt_categorical.Categorical`.
:py:meth:`.rt_groupbyops.GroupByOps.count_uniques` : Count the unique values for each group.
"""
un = unique(self._fa, sorted=False)
count = len(un)
Expand Down Expand Up @@ -6330,7 +6347,7 @@ def __del__(self):
# python has trouble deleting objects with circular references
if hasattr(self, "_categories_wrap"):
del self._categories_wrap
self._grouping = None
del self._grouping

# ------------------------------------------------------------
@classmethod
Expand Down Expand Up @@ -6504,7 +6521,7 @@ def column_name(arg):

return value

if np.isscalar(example_res) & ~transform: # userfunc is a scalar function
if np.isscalar(example_res) and not transform: # userfunc is a scalar function
res = self._scalar_compiled_numba_apply(iGroup, iFirstGroup, nCountGroup, userfunc, args)

res_ds = TypeRegister.Dataset(self.gb_keychain.gbkeys)
Expand Down
Loading

0 comments on commit 032583b

Please sign in to comment.