Skip to content

Commit

Permalink
v1.7.0 (#346)
Browse files Browse the repository at this point in the history
* v1.7.0-rc0

* v1.7.0-rc1

---------

Co-authored-by: rtosholdings-bot <rtosholdings-bot@sig.com>
  • Loading branch information
OrestZborowski-SIG and rtosholdings-bot authored May 9, 2023
1 parent 1a06bcb commit 7ab07bf
Show file tree
Hide file tree
Showing 29 changed files with 1,504 additions and 316 deletions.
5 changes: 2 additions & 3 deletions conda_recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,9 @@ requirements:
- setuptools_scm
run:
- python
- riptide_cpp >=1.12.1,<2 # run with any (compatible) version in this range
- pandas >=0.24,<2.0
- riptide_cpp >=1.12.2,<2 # run with any (compatible) version in this range
- pandas >=1.0,<3.0
- ansi2html >=1.5.2
- ipykernel
- numpy >=1.22
- numba >=0.56.2
- python-dateutil
Expand Down
22 changes: 14 additions & 8 deletions dev_tools/gen_requirements.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ def is_windows() -> bool:
return platform.system() == "Windows"


def is_python(major: int, minor: int) -> bool:
ver = sys.version_info
return ver.major == major and ver.minor == minor


_ABSEIL_REQ = "abseil-cpp==20220623.*"
_BENCHMARK_REQ = "benchmark>=1.7,<1.8"
_NUMPY_REQ = "numpy>=1.22"
Expand Down Expand Up @@ -53,10 +58,9 @@ def is_windows() -> bool:
runtime_reqs = [
# No riptide_cpp as that must be handled separately
"ansi2html>=1.5.2",
"ipykernel",
"numba>=0.56.2",
_NUMPY_REQ,
"pandas>=0.24,<2.0",
"pandas>=1.0,<3.0",
"python-dateutil",
_TBB_REQ,
]
Expand All @@ -72,7 +76,8 @@ def is_windows() -> bool:
"bokeh",
"bottleneck",
"hypothesis",
"ipython",
"ipykernel",
"ipython<8.13" if is_python(3, 8) else "ipython",
"matplotlib",
"nose",
"pyarrow",
Expand All @@ -92,6 +97,11 @@ def is_windows() -> bool:
+ tests_reqs
)

# Black formatting requirements.
black_reqs = [
"black==22.*",
]

# Docstrings validation requirements.
# Validation requires complete riptable for iteration and evaluating examples.
docstrings_reqs = (
Expand All @@ -100,15 +110,11 @@ def is_windows() -> bool:
"tomli",
]
+ flake8_reqs
+ black_reqs
+ runtime_reqs
+ tests_reqs
)

# Black formatting requirements.
black_reqs = [
"black==22.*",
]

# Pydocstyle doc style requirements.
pydocstyle_reqs = [
"pydocstyle==6.*",
Expand Down
46 changes: 44 additions & 2 deletions dev_tools/validate_docstrings.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
"EX98": "flake8 error {error_code}: {error_message}{times_happening}",
"EX97": "Do not import {imported_library}, as it is imported automatically for the examples",
"EX96": "flake8 warning {error_code}: {error_message}{times_happening}",
"EX95": "black format error:\n{error_message}",
}

OUT_FORMAT_OPTS = "default", "json", "actions"
Expand Down Expand Up @@ -236,6 +237,9 @@ def validate_pep8(self):
file.close()
cmd = ["python", "-m", "flake8", "--quiet", "--statistics", fname]
response = subprocess.run(cmd, capture_output=True, text=True)
if response.stderr:
stderr = response.stderr.strip("\n")
error_messages.append(f"1 ERROR {stderr}")
stdout = response.stdout
stdout = stdout.replace(fname, "")
messages = stdout.strip("\n")
Expand All @@ -248,6 +252,34 @@ def validate_pep8(self):
error_count, error_code, message = error_message.split(maxsplit=2)
yield error_code, message, int(error_count)

def validate_format(self):
if not self.examples:
return

content = "".join((*self.examples_source_code,))

error_messages = []
try:
fd, fname = tempfile.mkstemp(prefix="val-", suffix=".py")
file = os.fdopen(fd, mode="w", encoding="utf-8")
file.write(content)
file.close()
cmd = ["python", "-m", "black", "--quiet", "--diff", fname]
response = subprocess.run(cmd, capture_output=True, text=True)
if response.stderr:
stderr = response.stderr.strip("\n")
error_messages.append(stderr)
stdout = response.stdout
stdout = stdout.replace(fname, "<example>")
messages = stdout.strip("\n")
if messages:
error_messages.append(messages)
finally:
os.remove(fname)

for error_message in error_messages:
yield error_message

def non_hyphenated_array_like(self):
return "array_like" in self.raw_doc

Expand Down Expand Up @@ -314,7 +346,9 @@ def matches(test: str, matches: list[str]):
times_happening = f" ({error_count} times)" if error_count > 1 else ""
result["errors"].append(
riptable_error(
"EX98" if flake8_errors and matches(error_code, flake8_errors) else "EX96",
"EX98"
if error_code == "ERROR" or (flake8_errors and matches(error_code, flake8_errors))
else "EX96",
error_code=error_code,
error_message=error_message,
times_happening=times_happening,
Expand All @@ -325,6 +359,14 @@ def matches(test: str, matches: list[str]):
if re.search(f"import {wrong_import}\W+", examples_source_code):
result["errors"].append(riptable_error("EX97", imported_library=wrong_import))

for error_message in doc.validate_format():
result["errors"].append(
riptable_error(
"EX95",
error_message=error_message,
)
)

if doc.non_hyphenated_array_like():
result["errors"].append(riptable_error("GL97"))

Expand Down Expand Up @@ -636,7 +678,7 @@ def main():
argparser.add_argument(
"--flake8-errors",
default=None,
help="Comma separated list of flake8 error codes to treat as EX98 errors. Others are treated as warnings.",
help="Comma separated list of flake8 error codes to treat as errors. Others are treated as warnings.",
)
argparser.add_argument(
"--out",
Expand Down
1 change: 1 addition & 0 deletions docs/source/tutorial/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,6 @@ Appendix

tutorial_numpy_rt
tutorial_cat_reduce
tutorial_cat_adv_instantiation


131 changes: 131 additions & 0 deletions docs/source/tutorial/tutorial_cat_adv_instantiation.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@

A Useful Way to Instantiate a Categorical
*****************************************

It can sometimes be useful to instantiate a Categorical with only one
category, then fill it in as needed.

For example, let’s say we have a Dataset with a column that has a lot of
categories, and we want to create a new Categorical column that keeps
two of those categories, properly aligned with the rest of the data in
the Dataset, and lumps the other categories into a category called
‘Other.’

Our Dataset, with a column of many categories::

>>> rng = np.random.default_rng(seed=42)
>>> N = 50
>>> ds_buildcat = rt.Dataset({'big_cat': rng.choice(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'], N)})
>>> ds_buildcat
# big_cat
--- -------
0 D
1 I
2 A
3 I
4 F
5 B
6 D
7 F
8 D
9 B
10 G
11 G
12 B
13 C
14 C
... ...
35 I
36 J
37 D
38 C
39 J
40 G
41 C
42 G
43 F
44 J
45 C
46 J
47 J
48 B
49 B

We create our ‘small’ Categorical instantiated with 3s, which fills the
column with the ‘Other’ category::

>>> ds_buildcat.small_cat = rt.Cat(rt.full(ds_buildcat.shape[0], 3), categories=['B', 'D', 'Other'])
>>> ds_buildcat.small_cat
>>> ds_buildcat
# big_cat small_cat
--- ------- ---------
0 D Other
1 I Other
2 A Other
3 I Other
4 F Other
5 B Other
6 D Other
7 F Other
8 D Other
9 B Other
10 G Other
11 G Other
12 B Other
13 C Other
14 C Other
... ... ...
35 I Other
36 J Other
37 D Other
38 C Other
39 J Other
40 G Other
41 C Other
42 G Other
43 F Other
44 J Other
45 C Other
46 J Other
47 J Other
48 B Other
49 B Other

Now we can fill in the aligned ‘B’ and ‘D’ categories::

>>> ds_buildcat.small_cat[ds_buildcat.big_cat == 'B'] = 'B'
>>> ds_buildcat.small_cat[ds_buildcat.big_cat == 'D'] = 'D'
>>> ds_buildcat
# big_cat small_cat
--- ------- ---------
0 D D
1 I Other
2 A Other
3 I Other
4 F Other
5 B B
6 D D
7 F Other
8 D D
9 B B
10 G Other
11 G Other
12 B B
13 C Other
14 C Other
... ... ...
35 I Other
36 J Other
37 D D
38 C Other
39 J Other
40 G Other
41 C Other
42 G Other
43 F Other
44 J Other
45 C Other
46 J Other
47 J Other
48 B B
49 B B
Loading

0 comments on commit 7ab07bf

Please sign in to comment.