From 368643683298b4f33843c1e93e4d907feff5ca32 Mon Sep 17 00:00:00 2001 From: zitorelova Date: Mon, 18 Jan 2021 17:52:27 -0800 Subject: [PATCH 1/6] Fix codespell misspellings --- .pre-commit-config.yaml | 3 ++- pandas/_version.py | 2 +- pandas/io/common.py | 2 +- pandas/io/excel/_base.py | 2 +- pandas/io/formats/format.py | 2 +- pandas/io/formats/info.py | 2 +- pandas/io/formats/style.py | 2 +- 7 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 88d18e3e230c6..ae36b58888438 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -184,4 +184,5 @@ repos: hooks: - id: codespell types_or: [python, rst, markdown] - files: ^pandas/core/ + files: ^pandas/ + exclude: ^pandas/tests/ diff --git a/pandas/_version.py b/pandas/_version.py index 14c2b5c6e7603..fbec4a694d721 100644 --- a/pandas/_version.py +++ b/pandas/_version.py @@ -293,7 +293,7 @@ def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command): # TAG-NUM-gHEX mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) if not mo: - # unparseable. Maybe git-describe is misbehaving? + # unparsable. Maybe git-describe is misbehaving? pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out return pieces diff --git a/pandas/io/common.py b/pandas/io/common.py index 47811d47e7238..9335988affa69 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -291,7 +291,7 @@ def _get_filepath_or_buffer( # urlopen function defined elsewhere in this module import urllib.request - # assuming storage_options is to be interpretted as headers + # assuming storage_options is to be interpreted as headers req_info = urllib.request.Request(filepath_or_buffer, headers=storage_options) with urlopen(req_info) as req: content_encoding = req.headers.get("Content-Encoding", None) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 962ba2c7f9ef7..753d865ecbda8 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -188,7 +188,7 @@ * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call result 'foo' - If a column or index contains an unparseable date, the entire column or + If a column or index contains an unparsable date, the entire column or index will be returned unaltered as an object data type. If you don`t want to parse some cells as date just change their type in Excel to "Text". For non-standard datetime parsing, use ``pd.to_datetime`` after ``pd.read_excel``. diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 2c17551a7c3b9..ebe8d976835a0 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -919,7 +919,7 @@ class DataFrameRenderer: Parameters ---------- fmt : DataFrameFormatter - Formatter with the formating options. + Formatter with the formatting options. """ def __init__(self, fmt: DataFrameFormatter): diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 9693008abcf7f..b1675fa5c5375 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -683,7 +683,7 @@ def _gen_columns(self) -> Iterator[str]: def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]: """ - Create mapping between datatypes and their number of occurences. + Create mapping between datatypes and their number of occurrences. """ # groupby dtype.name to collect e.g. Categorical columns return df.dtypes.value_counts().groupby(lambda x: x.name).sum() diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 782562f455607..acadc8387e554 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1697,7 +1697,7 @@ def from_custom_template(cls, searchpath, name): """ loader = jinja2.ChoiceLoader([jinja2.FileSystemLoader(searchpath), cls.loader]) - # mypy doesnt like dynamically-defined class + # mypy doesn't like dynamically-defined classes # error: Variable "cls" is not valid as a type [valid-type] # error: Invalid base class "cls" [misc] class MyStyler(cls): # type:ignore[valid-type,misc] From b51cb8e0a8d1453ec63b78819db8860bcaa43cf9 Mon Sep 17 00:00:00 2001 From: zitorelova Date: Mon, 18 Jan 2021 18:03:25 -0800 Subject: [PATCH 2/6] Add to codespell ignore-words-list --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 440fb790b1ace..4f032ef06749d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -67,7 +67,7 @@ filterwarnings = junit_family=xunit2 [codespell] -ignore-words-list=ba,blocs,coo,hist,nd,ser +ignore-words-list=ba,blocs,coo,hist,nd,ser,mose,fo,ist [coverage:run] branch = False From 99499337b1845e897be7d0db1644af01d9c8942d Mon Sep 17 00:00:00 2001 From: zitorelova Date: Mon, 18 Jan 2021 18:07:04 -0800 Subject: [PATCH 3/6] Revert changes to pre-commit-config.yaml --- .pre-commit-config.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ae36b58888438..88d18e3e230c6 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -184,5 +184,4 @@ repos: hooks: - id: codespell types_or: [python, rst, markdown] - files: ^pandas/ - exclude: ^pandas/tests/ + files: ^pandas/core/ From e29c0aba776dccd7e6dc568c33d467256de5b710 Mon Sep 17 00:00:00 2001 From: zitorelova Date: Tue, 19 Jan 2021 13:00:10 -0800 Subject: [PATCH 4/6] Resolve mose and fo misspellings --- pandas/_testing/__init__.py | 2 +- pandas/io/clipboard/__init__.py | 8 ++++---- pandas/io/json/_normalize.py | 12 ++++++------ setup.cfg | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 549a3c8e4a681..a28b119854bb7 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -679,7 +679,7 @@ def makeCustomDataframe( # 4-level multindex on rows with names provided, 2-level multindex # on columns with default labels and default names. >> a=makeCustomDataframe(5,3,r_idx_nlevels=4, - r_idx_names=["FEE","FI","FO","FAM"], + r_idx_names=["FEE","FIH","FOH","FUM"], c_idx_nlevels=2) >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 2d253d93295dd..233e58d14adf1 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -271,12 +271,12 @@ def copy_dev_clipboard(text): if "\r" in text: warnings.warn("Pyperclip cannot handle \\r characters on Cygwin.") - with open("/dev/clipboard", "wt") as fo: - fo.write(text) + with open("/dev/clipboard", "wt") as fd: + fd.write(text) def paste_dev_clipboard() -> str: - with open("/dev/clipboard") as fo: - content = fo.read() + with open("/dev/clipboard") as fd: + content = fd.read() return content return copy_dev_clipboard, paste_dev_clipboard diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index bff13ec188b0e..8dcc9fa490635 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -162,25 +162,25 @@ def _json_normalize( Examples -------- >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, - ... {'name': {'given': 'Mose', 'family': 'Regner'}}, + ... {'name': {'given': 'Mark', 'family': 'Regner'}}, ... {'id': 2, 'name': 'Faye Raker'}] >>> pd.json_normalize(data) id name.first name.last name.given name.family name 0 1.0 Coleen Volk NaN NaN NaN - 1 NaN NaN NaN Mose Regner NaN + 1 NaN NaN NaN Mark Regner NaN 2 2.0 NaN NaN NaN NaN Faye Raker >>> data = [{'id': 1, ... 'name': "Cole Volk", ... 'fitness': {'height': 130, 'weight': 60}}, - ... {'name': "Mose Reg", + ... {'name': "Mark Reg", ... 'fitness': {'height': 130, 'weight': 60}}, ... {'id': 2, 'name': 'Faye Raker', ... 'fitness': {'height': 130, 'weight': 60}}] >>> pd.json_normalize(data, max_level=0) id name fitness 0 1.0 Cole Volk {'height': 130, 'weight': 60} - 1 NaN Mose Reg {'height': 130, 'weight': 60} + 1 NaN Mark Reg {'height': 130, 'weight': 60} 2 2.0 Faye Raker {'height': 130, 'weight': 60} Normalizes nested data up to level 1. @@ -188,14 +188,14 @@ def _json_normalize( >>> data = [{'id': 1, ... 'name': "Cole Volk", ... 'fitness': {'height': 130, 'weight': 60}}, - ... {'name': "Mose Reg", + ... {'name': "Mark Reg", ... 'fitness': {'height': 130, 'weight': 60}}, ... {'id': 2, 'name': 'Faye Raker', ... 'fitness': {'height': 130, 'weight': 60}}] >>> pd.json_normalize(data, max_level=1) id name fitness.height fitness.weight 0 1.0 Cole Volk 130 60 - 1 NaN Mose Reg 130 60 + 1 NaN Mark Reg 130 60 2 2.0 Faye Raker 130 60 >>> data = [{'state': 'Florida', diff --git a/setup.cfg b/setup.cfg index 4f032ef06749d..13c631dd47b57 100644 --- a/setup.cfg +++ b/setup.cfg @@ -67,7 +67,7 @@ filterwarnings = junit_family=xunit2 [codespell] -ignore-words-list=ba,blocs,coo,hist,nd,ser,mose,fo,ist +ignore-words-list=ba,blocs,coo,hist,nd,ser,ist [coverage:run] branch = False From 4a99fa8299e1996d914c9746b423f58389232894 Mon Sep 17 00:00:00 2001 From: zitorelova Date: Wed, 20 Jan 2021 15:35:57 -0800 Subject: [PATCH 5/6] Ignore URLs in codespell --- setup.cfg | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 13c631dd47b57..a6d636704664e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -67,7 +67,8 @@ filterwarnings = junit_family=xunit2 [codespell] -ignore-words-list=ba,blocs,coo,hist,nd,ser,ist +ignore-words-list=ba,blocs,coo,hist,nd,ser +ignore-regex=https://(\w+\.)+ [coverage:run] branch = False From 367a4eafae7389101984089a734c28b3cef32e5f Mon Sep 17 00:00:00 2001 From: zitorelova Date: Wed, 20 Jan 2021 15:36:38 -0800 Subject: [PATCH 6/6] Run codespell everywhere except test suite --- .pre-commit-config.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 88d18e3e230c6..ae36b58888438 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -184,4 +184,5 @@ repos: hooks: - id: codespell types_or: [python, rst, markdown] - files: ^pandas/core/ + files: ^pandas/ + exclude: ^pandas/tests/