v1.7.0 (#346)

* v1.7.0-rc0 * v1.7.0-rc1 --------- Co-authored-by: rtosholdings-bot <rtosholdings-bot@sig.com>
rtosholdings · May 9, 2023 · 7ab07bf · 7ab07bf
1 parent 1a06bcb
commit 7ab07bf
Show file tree

Hide file tree

Showing 29 changed files with 1,504 additions and 316 deletions.
diff --git a/conda_recipe/meta.yaml b/conda_recipe/meta.yaml
@@ -16,10 +16,9 @@ requirements:
     - setuptools_scm
   run:
     - python
-    - riptide_cpp >=1.12.1,<2 # run with any (compatible) version in this range
-    - pandas >=0.24,<2.0
+    - riptide_cpp >=1.12.2,<2 # run with any (compatible) version in this range
+    - pandas >=1.0,<3.0
     - ansi2html >=1.5.2
-    - ipykernel
     - numpy >=1.22
     - numba >=0.56.2
     - python-dateutil

diff --git a/dev_tools/gen_requirements.py b/dev_tools/gen_requirements.py
@@ -13,6 +13,11 @@ def is_windows() -> bool:
     return platform.system() == "Windows"
 
 
+def is_python(major: int, minor: int) -> bool:
+    ver = sys.version_info
+    return ver.major == major and ver.minor == minor
+
+
 _ABSEIL_REQ = "abseil-cpp==20220623.*"
 _BENCHMARK_REQ = "benchmark>=1.7,<1.8"
 _NUMPY_REQ = "numpy>=1.22"
@@ -53,10 +58,9 @@ def is_windows() -> bool:
 runtime_reqs = [
     # No riptide_cpp as that must be handled separately
     "ansi2html>=1.5.2",
-    "ipykernel",
     "numba>=0.56.2",
     _NUMPY_REQ,
-    "pandas>=0.24,<2.0",
+    "pandas>=1.0,<3.0",
     "python-dateutil",
     _TBB_REQ,
 ]
@@ -72,7 +76,8 @@ def is_windows() -> bool:
     "bokeh",
     "bottleneck",
     "hypothesis",
-    "ipython",
+    "ipykernel",
+    "ipython<8.13" if is_python(3, 8) else "ipython",
     "matplotlib",
     "nose",
     "pyarrow",
@@ -92,6 +97,11 @@ def is_windows() -> bool:
     + tests_reqs
 )
 
+# Black formatting requirements.
+black_reqs = [
+    "black==22.*",
+]
+
 # Docstrings validation requirements.
 # Validation requires complete riptable for iteration and evaluating examples.
 docstrings_reqs = (
@@ -100,15 +110,11 @@ def is_windows() -> bool:
         "tomli",
     ]
     + flake8_reqs
+    + black_reqs
     + runtime_reqs
     + tests_reqs
 )
 
-# Black formatting requirements.
-black_reqs = [
-    "black==22.*",
-]
-
 # Pydocstyle doc style requirements.
 pydocstyle_reqs = [
     "pydocstyle==6.*",

diff --git a/dev_tools/validate_docstrings.py b/dev_tools/validate_docstrings.py
@@ -62,6 +62,7 @@
     "EX98": "flake8 error {error_code}: {error_message}{times_happening}",
     "EX97": "Do not import {imported_library}, as it is imported automatically for the examples",
     "EX96": "flake8 warning {error_code}: {error_message}{times_happening}",
+    "EX95": "black format error:\n{error_message}",
 }
 
 OUT_FORMAT_OPTS = "default", "json", "actions"
@@ -236,6 +237,9 @@ def validate_pep8(self):
             file.close()
             cmd = ["python", "-m", "flake8", "--quiet", "--statistics", fname]
             response = subprocess.run(cmd, capture_output=True, text=True)
+            if response.stderr:
+                stderr = response.stderr.strip("\n")
+                error_messages.append(f"1 ERROR {stderr}")
             stdout = response.stdout
             stdout = stdout.replace(fname, "")
             messages = stdout.strip("\n")
@@ -248,6 +252,34 @@ def validate_pep8(self):
             error_count, error_code, message = error_message.split(maxsplit=2)
             yield error_code, message, int(error_count)
 
+    def validate_format(self):
+        if not self.examples:
+            return
+
+        content = "".join((*self.examples_source_code,))
+
+        error_messages = []
+        try:
+            fd, fname = tempfile.mkstemp(prefix="val-", suffix=".py")
+            file = os.fdopen(fd, mode="w", encoding="utf-8")
+            file.write(content)
+            file.close()
+            cmd = ["python", "-m", "black", "--quiet", "--diff", fname]
+            response = subprocess.run(cmd, capture_output=True, text=True)
+            if response.stderr:
+                stderr = response.stderr.strip("\n")
+                error_messages.append(stderr)
+            stdout = response.stdout
+            stdout = stdout.replace(fname, "<example>")
+            messages = stdout.strip("\n")
+            if messages:
+                error_messages.append(messages)
+        finally:
+            os.remove(fname)
+
+        for error_message in error_messages:
+            yield error_message
+
     def non_hyphenated_array_like(self):
         return "array_like" in self.raw_doc
 
@@ -314,7 +346,9 @@ def matches(test: str, matches: list[str]):
             times_happening = f" ({error_count} times)" if error_count > 1 else ""
             result["errors"].append(
                 riptable_error(
-                    "EX98" if flake8_errors and matches(error_code, flake8_errors) else "EX96",
+                    "EX98"
+                    if error_code == "ERROR" or (flake8_errors and matches(error_code, flake8_errors))
+                    else "EX96",
                     error_code=error_code,
                     error_message=error_message,
                     times_happening=times_happening,
@@ -325,6 +359,14 @@ def matches(test: str, matches: list[str]):
             if re.search(f"import {wrong_import}\W+", examples_source_code):
                 result["errors"].append(riptable_error("EX97", imported_library=wrong_import))
 
+        for error_message in doc.validate_format():
+            result["errors"].append(
+                riptable_error(
+                    "EX95",
+                    error_message=error_message,
+                )
+            )
+
     if doc.non_hyphenated_array_like():
         result["errors"].append(riptable_error("GL97"))
 
@@ -636,7 +678,7 @@ def main():
     argparser.add_argument(
         "--flake8-errors",
         default=None,
-        help="Comma separated list of flake8 error codes to treat as EX98 errors. Others are treated as warnings.",
+        help="Comma separated list of flake8 error codes to treat as errors. Others are treated as warnings.",
     )
     argparser.add_argument(
         "--out",

diff --git a/docs/source/tutorial/tutorial.rst b/docs/source/tutorial/tutorial.rst
@@ -33,5 +33,6 @@ Appendix
 
    tutorial_numpy_rt
    tutorial_cat_reduce
+   tutorial_cat_adv_instantiation
 
 
diff --git a/docs/source/tutorial/tutorial_cat_adv_instantiation.rst b/docs/source/tutorial/tutorial_cat_adv_instantiation.rst
@@ -0,0 +1,131 @@
+
+A Useful Way to Instantiate a Categorical
+*****************************************
+
+It can sometimes be useful to instantiate a Categorical with only one
+category, then fill it in as needed.
+
+For example, let’s say we have a Dataset with a column that has a lot of
+categories, and we want to create a new Categorical column that keeps
+two of those categories, properly aligned with the rest of the data in
+the Dataset, and lumps the other categories into a category called
+‘Other.’
+
+Our Dataset, with a column of many categories::
+
+    >>> rng = np.random.default_rng(seed=42)
+    >>> N = 50
+    >>> ds_buildcat = rt.Dataset({'big_cat': rng.choice(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'], N)})
+    >>> ds_buildcat
+      #   big_cat
+    ---   -------
+      0   D      
+      1   I      
+      2   A      
+      3   I      
+      4   F      
+      5   B      
+      6   D      
+      7   F      
+      8   D      
+      9   B      
+     10   G      
+     11   G      
+     12   B      
+     13   C      
+     14   C      
+    ...   ...    
+     35   I      
+     36   J      
+     37   D      
+     38   C      
+     39   J      
+     40   G      
+     41   C      
+     42   G      
+     43   F      
+     44   J      
+     45   C      
+     46   J      
+     47   J      
+     48   B      
+     49   B    
+
+We create our ‘small’ Categorical instantiated with 3s, which fills the
+column with the ‘Other’ category::
+
+    >>> ds_buildcat.small_cat = rt.Cat(rt.full(ds_buildcat.shape[0], 3), categories=['B', 'D', 'Other']) 
+    >>> ds_buildcat.small_cat
+    >>> ds_buildcat
+      #   big_cat   small_cat
+    ---   -------   ---------
+      0   D         Other    
+      1   I         Other    
+      2   A         Other    
+      3   I         Other    
+      4   F         Other    
+      5   B         Other    
+      6   D         Other    
+      7   F         Other    
+      8   D         Other    
+      9   B         Other    
+     10   G         Other    
+     11   G         Other    
+     12   B         Other    
+     13   C         Other    
+     14   C         Other    
+    ...   ...       ...      
+     35   I         Other    
+     36   J         Other    
+     37   D         Other    
+     38   C         Other    
+     39   J         Other    
+     40   G         Other    
+     41   C         Other    
+     42   G         Other    
+     43   F         Other    
+     44   J         Other    
+     45   C         Other    
+     46   J         Other    
+     47   J         Other    
+     48   B         Other    
+     49   B         Other  
+
+Now we can fill in the aligned ‘B’ and ‘D’ categories::
+
+    >>> ds_buildcat.small_cat[ds_buildcat.big_cat == 'B'] = 'B'
+    >>> ds_buildcat.small_cat[ds_buildcat.big_cat == 'D'] = 'D'
+    >>> ds_buildcat
+      #   big_cat   small_cat
+    ---   -------   ---------
+      0   D         D        
+      1   I         Other    
+      2   A         Other    
+      3   I         Other    
+      4   F         Other    
+      5   B         B        
+      6   D         D        
+      7   F         Other    
+      8   D         D        
+      9   B         B        
+      10  G         Other    
+      11  G         Other    
+      12  B         B        
+      13  C         Other    
+      14  C         Other    
+     ...  ...       ...      
+      35  I         Other    
+      36  J         Other    
+      37  D         D        
+      38  C         Other    
+      39  J         Other    
+      40  G         Other    
+      41  C         Other    
+      42  G         Other    
+      43  F         Other    
+      44  J         Other    
+      45  C         Other    
+      46  J         Other    
+      47  J         Other    
+      48  B         B        
+      49  B         B
Original file line number	Diff line number	Diff line change
Expand Up		@@ -33,5 +33,6 @@ Appendix

		tutorial_numpy_rt
		tutorial_cat_reduce
		tutorial_cat_adv_instantiation