diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 339e0b5c39a86..4654c6be921b4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -189,7 +189,8 @@ repos: hooks: - id: codespell types_or: [python, rst, markdown] - files: ^pandas/core/ + files: ^pandas/ + exclude: ^pandas/tests/ - repo: https://github.com/MarcoGorelli/no-string-hints rev: v0.1.5 hooks: diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 549a3c8e4a681..a28b119854bb7 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -679,7 +679,7 @@ def makeCustomDataframe( # 4-level multindex on rows with names provided, 2-level multindex # on columns with default labels and default names. >> a=makeCustomDataframe(5,3,r_idx_nlevels=4, - r_idx_names=["FEE","FI","FO","FAM"], + r_idx_names=["FEE","FIH","FOH","FUM"], c_idx_nlevels=2) >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) diff --git a/pandas/_version.py b/pandas/_version.py index 14c2b5c6e7603..fbec4a694d721 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -293,7 +293,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # TAG-NUM-gHEX mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: - # unparseable. Maybe git-describe is misbehaving? + # unparsable. Maybe git-describe is misbehaving? pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 2d253d93295dd..233e58d14adf1 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -271,12 +271,12 @@ def copy_dev_clipboard(text): if "\r" in text: warnings.warn("Pyperclip cannot handle \\r characters on Cygwin.") - with open("/dev/clipboard", "wt") as fo: - fo.write(text) + with open("/dev/clipboard", "wt") as fd: + fd.write(text) def paste_dev_clipboard() -> str: - with open("/dev/clipboard") as fo: - content = fo.read() + with open("/dev/clipboard") as fd: + content = fd.read() return content return copy_dev_clipboard, paste_dev_clipboard diff --git a/pandas/io/common.py b/pandas/io/common.py index 998e8b63f8336..57ae46a421fbb 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -292,7 +292,7 @@ def _get_filepath_or_buffer( # urlopen function defined elsewhere in this module import urllib.request - # assuming storage_options is to be interpretted as headers + # assuming storage_options is to be interpreted as headers req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options) with urlopen(req_info) as req: content_encoding = req.headers.get("Content-Encoding", None) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 4fca057976277..11974d25d72d3 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -190,7 +190,7 @@ * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result 'foo' - If a column or index contains an unparseable date, the entire column or + If a column or index contains an unparsable date, the entire column or index will be returned unaltered as an object data type. If you don`t want to parse some cells as date just change their type in Excel to "Text". For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 1394a78dcb1a5..05d94366e6623 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -919,7 +919,7 @@ class DataFrameRenderer: Parameters ---------- fmt : DataFrameFormatter - Formatter with the formating options. + Formatter with the formatting options. """ def __init__(self, fmt: DataFrameFormatter): diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 9693008abcf7f..b1675fa5c5375 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -683,7 +683,7 @@ def _gen_columns(self) -> Iterator[str]: def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: """ - Create mapping between datatypes and their number of occurences. + Create mapping between datatypes and their number of occurrences. """ # groupby dtype.name to collect e.g. Categorical columns return df.dtypes.value_counts().groupby(lambda x: x.name).sum() diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 03e65029fb021..f86a1b13273e1 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1697,7 +1697,7 @@ def from_custom_template(cls, searchpath, name): """ loader = jinja2.ChoiceLoader([jinja2.FileSystemLoader(searchpath), cls.loader]) - # mypy doesnt like dynamically-defined class + # mypy doesn't like dynamically-defined classes # error: Variable "cls" is not valid as a type [valid-type] # error: Invalid base class "cls" [misc] class MyStyler(cls): # type:ignore[valid-type,misc] diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index bff13ec188b0e..8dcc9fa490635 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -162,25 +162,25 @@ def _json_normalize( Examples -------- >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, - ... {'name': {'given': 'Mose', 'family': 'Regner'}}, + ... {'name': {'given': 'Mark', 'family': 'Regner'}}, ... {'id': 2, 'name': 'Faye Raker'}] >>> pd.json_normalize(data) id name.first name.last name.given name.family name 0 1.0 Coleen Volk NaN NaN NaN - 1 NaN NaN NaN Mose Regner NaN + 1 NaN NaN NaN Mark Regner NaN 2 2.0 NaN NaN NaN NaN Faye Raker >>> data = [{'id': 1, ... 'name': "Cole Volk", ... 'fitness': {'height': 130, 'weight': 60}}, - ... {'name': "Mose Reg", + ... {'name': "Mark Reg", ... 'fitness': {'height': 130, 'weight': 60}}, ... {'id': 2, 'name': 'Faye Raker', ... 'fitness': {'height': 130, 'weight': 60}}] >>> pd.json_normalize(data, max_level=0) id name fitness 0 1.0 Cole Volk {'height': 130, 'weight': 60} - 1 NaN Mose Reg {'height': 130, 'weight': 60} + 1 NaN Mark Reg {'height': 130, 'weight': 60} 2 2.0 Faye Raker {'height': 130, 'weight': 60} Normalizes nested data up to level 1. @@ -188,14 +188,14 @@ def _json_normalize( >>> data = [{'id': 1, ... 'name': "Cole Volk", ... 'fitness': {'height': 130, 'weight': 60}}, - ... {'name': "Mose Reg", + ... {'name': "Mark Reg", ... 'fitness': {'height': 130, 'weight': 60}}, ... {'id': 2, 'name': 'Faye Raker', ... 'fitness': {'height': 130, 'weight': 60}}] >>> pd.json_normalize(data, max_level=1) id name fitness.height fitness.weight 0 1.0 Cole Volk 130 60 - 1 NaN Mose Reg 130 60 + 1 NaN Mark Reg 130 60 2 2.0 Faye Raker 130 60 >>> data = [{'state': 'Florida', diff --git a/setup.cfg b/setup.cfg index 440fb790b1ace..a6d636704664e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -68,6 +68,7 @@ junit_family=xunit2 [codespell] ignore-words-list=ba,blocs,coo,hist,nd,ser +ignore-regex=https://(\w+\.)+ [coverage:run] branch = False