Merge pull request #375 from rtosholdings/latest-v1.17.0

v1.17.0
rtosholdings · Apr 23, 2024 · 032583b · 032583b
2 parents 4fa7152 + de3e6a3
commit 032583b
Show file tree

Hide file tree

Showing 16 changed files with 808 additions and 381 deletions.
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 ![](https://riptable.readthedocs.io/en/stable/_static/riptable_logo.PNG)
 
 An open-source, 64-bit Python analytics engine for high-performance data analysis with
-multithreading support. Riptable supports Python 3.10 through 3.11 on 64-bit Linux and
+multithreading support. Riptable supports Python 3.10 through 3.12 on 64-bit Linux and
 Windows.
 
 Similar to Pandas and based on NumPy, Riptable optimizes analyzing large volumes of data

diff --git a/conda_recipe/conda_build_config.yaml b/conda_recipe/conda_build_config.yaml
@@ -1,2 +1,2 @@
 python:
-  - 3.11
+  - 3.12
diff --git a/conda_recipe/meta.yaml b/conda_recipe/meta.yaml
@@ -21,7 +21,7 @@ requirements:
     - pandas >=1.0,<3.0
     - python
     - python-dateutil
-    - riptide_cpp >=1.17.0,<2 # run with any (compatible) version in this range
+    - riptide_cpp >=1.19.0,<2 # run with any (compatible) version in this range
     - typing-extensions >=4.9.0
 
 about:

diff --git a/dev_tools/_docstring_config.py b/dev_tools/_docstring_config.py
@@ -1,12 +1,65 @@
 import riptable
+import contextlib
 
-# Standardized riptable configuration settings applied when doing docstring validation.
 
-# Standardize on these display settings when executing examples
-riptable.Display.FORCE_REPR = True  # Don't auto-detect console dimensions, just use CONSOLE_X/Y
-riptable.Display.options.COL_MAX = 1_000_000  # display all Dataset columns (COL_ALL is incomplete)
-riptable.Display.options.E_MAX = 100_000_000  # render up to 100MM before using scientific notation
-riptable.Display.options.P_THRESHOLD = 0  # truncate small decimals, rather than scientific notation
-riptable.Display.options.NUMBER_SEPARATOR = True  # put commas in numbers
-riptable.Display.options.HEAD_ROWS = 3
-riptable.Display.options.TAIL_ROWS = 3
+def _setup_display_config():
+    """Initialize display config settings.
+    Any options that can be modified should be set here, even if set to default values.
+    """
+    riptable.Display.FORCE_REPR = True  # Don't auto-detect console dimensions, just use CONSOLE_X/Y
+    riptable.Display.options.CONSOLE_X = 150
+    riptable.Display.options.COL_MAX = 1_000_000  # display all Dataset columns (COL_ALL is incomplete)
+    riptable.Display.options.E_MAX = 100_000_000  # render up to 100MM before using scientific notation
+    riptable.Display.options.P_THRESHOLD = 0  # truncate small decimals, rather than scientific notation
+    riptable.Display.options.NUMBER_SEPARATOR = True  # put commas in numbers
+    riptable.Display.options.HEAD_ROWS = 3
+    riptable.Display.options.TAIL_ROWS = 3
+    riptable.Display.options.ROW_ALL = False
+    riptable.Display.options.COL_ALL = False
+    riptable.Display.options.MAX_STRING_WIDTH = 15
+
+
+def setup_init_config():
+    """Initialize all config settings. Typically only done once."""
+    _setup_display_config()
+
+
+class ScopedExampleSetup(contextlib.AbstractContextManager):
+    """Context manager to clean up after any changes made during example setup."""
+
+    _CLEANUP_CALLBACKS = []
+
+    @staticmethod
+    def add_cleanup_callback(fn):
+        ScopedExampleSetup._CLEANUP_CALLBACKS.append(fn)
+
+    def __enter__(self) -> None:
+        return super().__enter__()
+
+    def __exit__(self, exc_type, exc_value, traceback) -> bool | None:
+        callbacks = ScopedExampleSetup._CLEANUP_CALLBACKS
+        ScopedExampleSetup._CLEANUP_CALLBACKS = []
+        for callback in callbacks:
+            callback()
+        return super().__exit__(exc_type, exc_value, traceback)
+
+
+def setup_for_examples(*configs: tuple[str]):
+    """Applies specified config setups for an example.
+    Configs are applied in order.
+    Any modifications done here need to be undone by registering a cleanup task with ScopedExampleSetup.
+    """
+
+    for config in configs:
+        if config == "struct-display":
+            riptable.Display.options.CONSOLE_X = 120
+            riptable.Display.options.HEAD_ROWS = 15
+            riptable.Display.options.TAIL_ROWS = 15
+            ScopedExampleSetup.add_cleanup_callback(_setup_display_config)  # reset all display configs.
+
+        else:
+            raise NotImplementedError(f"Unknown config, {config}")
+
+
+# Initialize all config globally.
+setup_init_config()
diff --git a/dev_tools/docstring_xfails.txt b/dev_tools/docstring_xfails.txt
@@ -273,18 +273,14 @@ riptable.rt_datetime.DateTimeCommon.tz_offset
 riptable.rt_datetime.DateTimeCommon.year
 riptable.rt_datetime.DateTimeCommon.yyyymmdd
 riptable.rt_datetime.DateTimeNano.cut_time
-riptable.rt_datetime.DateTimeNano.diff
 riptable.rt_datetime.DateTimeNano.display_convert_func
 riptable.rt_datetime.DateTimeNano.fill_invalid
 riptable.rt_datetime.DateTimeNano.get_scalar
 riptable.rt_datetime.DateTimeNano.hstack
 riptable.rt_datetime.DateTimeNano.info
-riptable.rt_datetime.DateTimeNano.isfinite
-riptable.rt_datetime.DateTimeNano.isnotfinite
 riptable.rt_datetime.DateTimeNano.newclassfrominstance
 riptable.rt_datetime.DateTimeNano.random
 riptable.rt_datetime.DateTimeNano.random_invalid
-riptable.rt_datetime.DateTimeNano.resample
 riptable.rt_datetime.DateTimeNano.shift
 riptable.rt_datetime.DateTimeNano.to_arrow
 riptable.rt_datetime.DateTimeNanoScalar
@@ -771,10 +767,6 @@ riptable.rt_str.FAString.substr_char_stop
 riptable.rt_str.FAString.upper
 riptable.rt_str.FAString.upper_inplace
 riptable.rt_struct.Struct
-riptable.rt_struct.Struct._A
-riptable.rt_struct.Struct._G
-riptable.rt_struct.Struct._H
-riptable.rt_struct.Struct._V
 riptable.rt_struct.Struct.all
 riptable.rt_struct.Struct.any
 riptable.rt_struct.Struct.apply_schema

diff --git a/dev_tools/validate_docstrings.py b/dev_tools/validate_docstrings.py
@@ -58,8 +58,8 @@
 # With template backend, matplotlib plots nothing
 matplotlib.use("template")
 
-# Apply riptable docstring configuration for examples.
-from _docstring_config import *
+# Riptable docstring configuration setup for examples.
+import _docstring_config
 
 
 ERROR_MSGS = {
@@ -101,6 +101,10 @@
     "rt": riptable,
 }
 
+ATTRIBS_CONTEXT = {
+    "setup_for_examples": _docstring_config.setup_for_examples,
+}
+
 
 def riptable_error(code, **kwargs):
     """
@@ -210,10 +214,11 @@ def examples_errors(self):
         error_msgs = ""
         current_dir = set(os.listdir())
         tempdir = pathlib.Path("tempdir")  # special reserved directory for temporary files; will be deleted per test
-        for test in finder.find(self.raw_doc, self.name, globs=IMPORT_CONTEXT):
+        for test in finder.find(self.raw_doc, self.name, globs=dict(**IMPORT_CONTEXT, **ATTRIBS_CONTEXT)):
             tempdir.mkdir()
             f = io.StringIO()
-            failed_examples, total_examples = runner.run(test, out=f.write)
+            with _docstring_config.ScopedExampleSetup():
+                failed_examples, total_examples = runner.run(test, out=f.write)
             if failed_examples:
                 error_msgs += f.getvalue()
             shutil.rmtree(tempdir)
@@ -319,11 +324,11 @@ def non_hyphenated_array_like(self):
 
 def riptable_validate(
     func_name: str,
-    errors: typing.Optional(list[str]) = None,
-    not_errors: typing.Optional(list[str]) = None,
-    flake8_errors: typing.Optional(list[str]) = None,
-    flake8_not_errors: typing.Optional(list[str]) = None,
-    xfails: typing.Optional(list[str]) = None,
+    errors: typing.Optional[list[str]] = None,
+    not_errors: typing.Optional[list[str]] = None,
+    flake8_errors: typing.Optional[list[str]] = None,
+    flake8_not_errors: typing.Optional[list[str]] = None,
+    xfails: typing.Optional[list[str]] = None,
     verbose: bool = False,
 ):
     """
@@ -513,8 +518,8 @@ def is_default_excluded(fullname: str) -> bool:
 
 def is_included(
     fullname: str,
-    includes: typing.Optional(list[str]) = None,
-    excludes: typing.Optional(list[str]) = None,
+    includes: typing.Optional[list[str]] = None,
+    excludes: typing.Optional[list[str]] = None,
 ) -> bool:
     """Indicates whether the name should be included in validation."""
 
@@ -537,14 +542,14 @@ def validate_all(
     match: str,
     not_match: str = None,
     names_from: str = NAMES_FROM_OPTS[0],
-    errors: typing.Optional(list[str]) = None,
-    not_errors: typing.Optional(list[str]) = None,
-    flake8_errors: typing.Optional(list[str]) = None,
-    flake8_not_errors: typing.Optional(list[str]) = None,
+    errors: typing.Optional[list[str]] = None,
+    not_errors: typing.Optional[list[str]] = None,
+    flake8_errors: typing.Optional[list[str]] = None,
+    flake8_not_errors: typing.Optional[list[str]] = None,
     ignore_deprecated: bool = False,
-    includes: typing.Optional(list[str]) = None,
-    excludes: typing.Optional(list[str]) = None,
-    xfails: typing.Optional(list[str]) = None,
+    includes: typing.Optional[list[str]] = None,
+    excludes: typing.Optional[list[str]] = None,
+    xfails: typing.Optional[list[str]] = None,
     verbose: int = 0,
 ) -> dict:
     """
@@ -650,17 +655,17 @@ def print_validate_all_results(
     match: str,
     not_match: str = None,
     names_from: str = NAMES_FROM_OPTS[0],
-    errors: typing.Optional(list[str]) = None,
-    not_errors: typing.Optional(list[str]) = None,
-    flake8_errors: typing.Optional(list[str]) = None,
-    flake8_not_errors: typing.Optional(list[str]) = None,
+    errors: typing.Optional[list[str]] = None,
+    not_errors: typing.Optional[list[str]] = None,
+    flake8_errors: typing.Optional[list[str]] = None,
+    flake8_not_errors: typing.Optional[list[str]] = None,
     out_format: str = OUT_FORMAT_OPTS[0],
     ignore_deprecated: bool = False,
-    includes: typing.Optional(list[str]) = None,
-    excludes: typing.Optional(list[str]) = None,
-    xfails: typing.Optional(list[str]) = None,
+    includes: typing.Optional[list[str]] = None,
+    excludes: typing.Optional[list[str]] = None,
+    xfails: typing.Optional[list[str]] = None,
     outfile: typing.IO = sys.stdout,
-    outfailsfile: typing.Optional(typing.IO) = None,
+    outfailsfile: typing.Optional[typing.IO] = None,
     verbose: int = 0,
 ):
     if out_format not in OUT_FORMAT_OPTS:
@@ -713,11 +718,11 @@ def print_validate_all_results(
 
 def print_validate_one_results(
     func_name: str,
-    errors: typing.Optional(list[str]) = None,
-    not_errors: typing.Optional(list[str]) = None,
-    flake8_errors: typing.Optional(list[str]) = None,
-    flake8_not_errors: typing.Optional(list[str]) = None,
-    xfails: typing.Optional(list[str]) = None,
+    errors: typing.Optional[list[str]] = None,
+    not_errors: typing.Optional[list[str]] = None,
+    flake8_errors: typing.Optional[list[str]] = None,
+    flake8_not_errors: typing.Optional[list[str]] = None,
+    xfails: typing.Optional[list[str]] = None,
     outfile: typing.IO = sys.stdout,
     verbose: int = 0,
 ):

diff --git a/pyproject.toml b/pyproject.toml
@@ -12,14 +12,15 @@ dependencies = [
     "numpy >=1.23",
     "pandas >=1.0,<3.0",
     "python-dateutil",
-    "riptide_cpp >=1.17.0,<2",
+    "riptide_cpp >=1.19.0,<2",
     "typing-extensions >=4.9.0",
 ]
 classifiers = [
     "Development Status :: 4 - Beta",
     "Programming Language :: Python :: 3",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "License :: OSI Approved :: BSD License",
     "Operating System :: OS Independent",
 ]

diff --git a/riptable/rt_categorical.py b/riptable/rt_categorical.py
@@ -2462,9 +2462,16 @@ def fill_backward(self, *args, limit: int = 0, fill_val=None, inplace: bool = Fa
     # ------------------------------------------------------------
     def isfiltered(self) -> FastArray:
         """
-        True where bin == 0.
-        Only applies to categoricals with base index 1, otherwise returns all False.
-        Different than invalid category.
+        Returns a boolean array of whether each category value is filtered.
+
+        For base-0 categoricals, return all False.
+        For base-1 categoricals, returns True where bin == 0.
+        For dict-based categoricals, returns True for values that don't exist in the provided mapping.
+
+        Returns
+        -------
+        out: FastArray
+            FastArray of bools.
 
         See Also
         --------
@@ -2473,8 +2480,11 @@ def isfiltered(self) -> FastArray:
         """
         if self.base_index == 1:
             return self._fa == 0
-        else:
-            return zeros(len(self), dtype=bool)
+
+        if self.base_index is None:
+            return self._fa.isin(self._grouping._enum.code_array, invert=True)
+
+        return zeros(len(self), dtype=bool)
 
     # ------------------------------------------------------------
     def set_name(self, name) -> Categorical:
@@ -4876,29 +4886,36 @@ def ilastkey(self):
     @property
     def unique_count(self):
         """
-        Number of unique values in the categorical.
-        It is necessary for every groupby operation.
+        Number of unique values in the :py:class:`~.rt_categorical.Categorical`.
 
-        Notes
-        -----
-        For categoricals in dict / enum mode that have generated their grouping object, this
-        will reflect the number of unique values that `occur` in the non-unique values. Empty
-        bins will not be included in the count.
+        This property is used for every groupby operation.
+
+        For :py:class:`~.rt_categorical.Categorical` objects constructed from dictionaries or
+        :py:class:`~enum.IntEnum` objects, the returned count includes unique invalid values from the
+        underlying array. Otherwise, invalid values are not counted.
+
+        See Also
+        --------
+        :py:meth:`.rt_categorical.Categorical.nunique` : Number of unique values in the :py:class:`~.rt_categorical.Categorical`.
+        :py:meth:`.rt_groupbyops.GroupByOps.count_uniques` : Count the unique values for each group.
         """
         return self.grouping.unique_count
 
     # ------------------------------------------------------------
     def nunique(self):
         """
-        Number of unique values that occur in the Categorical.
-        Does not include invalids. Not the same as the length of possible uniques.
+        Number of unique values in the :py:class:`~.rt_categorical.Categorical`.
+
+        Not the same as the length of possible uniques.
 
-        Categoricals based on dictionary mapping / enum will return unique count including all possibly
-        invalid values from underlying array.
+        For :py:class:`~.rt_categorical.Categorical` objects constructed from dictionaries or
+        :py:class:`~enum.IntEnum` objects, the returned count includes unique invalid values from the
+        underlying array. Otherwise, invalid values are not counted.
 
         See Also
         --------
-        Categorical.unique_count
+        :py:attr:`.rt_categorical.Categorical.unique_count` : Number of unique values in the :py:class:`~.rt_categorical.Categorical`.
+        :py:meth:`.rt_groupbyops.GroupByOps.count_uniques` : Count the unique values for each group.
         """
         un = unique(self._fa, sorted=False)
         count = len(un)
@@ -6330,7 +6347,7 @@ def __del__(self):
         # python has trouble deleting objects with circular references
         if hasattr(self, "_categories_wrap"):
             del self._categories_wrap
-        self._grouping = None
+        del self._grouping
 
     # ------------------------------------------------------------
     @classmethod
@@ -6504,7 +6521,7 @@ def column_name(arg):
 
             return value
 
-        if np.isscalar(example_res) & ~transform:  # userfunc is a scalar function
+        if np.isscalar(example_res) and not transform:  # userfunc is a scalar function
             res = self._scalar_compiled_numba_apply(iGroup, iFirstGroup, nCountGroup, userfunc, args)
 
             res_ds = TypeRegister.Dataset(self.gb_keychain.gbkeys)
-Original file line number
+Diff line change
@@ -1,2 +1,2 @@
     python:
-      - 3.11
+      - 3.12