diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 60b4206932c94a..3d1bba9e6bbcad 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -89,7 +89,6 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then - MSG='Validate docstrings (GL03, GL04, GL05, GL06, GL07, GL09, GL10, SS01, SS02, SS04, SS05, PR03, PR04, PR05, PR06, PR10, EX04, RT01, RT04, RT05, SA02, SA03)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=GL03,GL04,GL05,GL06,GL07,GL09,GL10,SS02,SS04,SS05,PR03,PR04,PR05,PR10,EX04,RT01,RT04,RT05,SA02,SA03 diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index 4bf5d7bead0a6a..b421b9bb8ca496 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -1,6 +1,6 @@ .. _whatsnew_133: -What's new in 1.3.3 (September ??, 2021) +What's new in 1.3.3 (September 12, 2021) ---------------------------------------- These are the changes in pandas 1.3.3. See :ref:`release` for a full changelog @@ -15,7 +15,6 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) -- Performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) - Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) - Fixed regression in :meth:`.GroupBy.apply` where ``nan`` values were dropped even with ``dropna=False`` (:issue:`43205`) - Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`) @@ -29,8 +28,8 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`) - Fixed regression in :meth:`DataFrame.groupby` where aggregation on columns with object types dropped results on those columns (:issue:`42395`, :issue:`43108`) - Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float`` ``Series`` with list-like fill value having a dtype which couldn't cast lostlessly (like ``float32`` filled with ``float64``) (:issue:`43424`) -- Fixed regression in :func:`read_csv` throwing an ``AttributeError`` when the file handle is an ``tempfile.SpooledTemporaryFile`` object (:issue:`43439`) -- +- Fixed regression in :func:`read_csv` raising ``AttributeError`` when the file handle is an ``tempfile.SpooledTemporaryFile`` object (:issue:`43439`) +- Fixed performance regression in :meth:`core.window.ewm.ExponentialMovingWindow.mean` (:issue:`42333`) .. --------------------------------------------------------------------------- @@ -39,8 +38,6 @@ Fixed regressions Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Performance improvement for :meth:`DataFrame.__setitem__` when the key or value is not a :class:`DataFrame`, or key is not list-like (:issue:`43274`) -- -- .. --------------------------------------------------------------------------- @@ -48,17 +45,7 @@ Performance improvements Bug fixes ~~~~~~~~~ -- Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) -- - -.. --------------------------------------------------------------------------- - -.. _whatsnew_133.other: - -Other -~~~~~ -- -- +- Fixed bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 7b9997e8f0bd6a..328499a4ae98e9 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -107,6 +107,7 @@ Other enhancements - :meth:`DataFrame.to_stata` and :meth:`StataWriter` now accept the keyword only argument ``value_labels`` to save labels for non-categorical columns - Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`) - Add :meth:`Series.str.removeprefix` and :meth:`Series.str.removesuffix` introduced in Python 3.9 to remove pre-/suffixes from string-type :class:`Series` (:issue:`36944`) +- Attempting to write into a file in missing parent directory with :meth:`DataFrame.to_csv`, :meth:`DataFrame.to_html`, :meth:`DataFrame.to_excel`, :meth:`DataFrame.to_feather`, :meth:`DataFrame.to_parquet`, :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_json`, :meth:`DataFrame.to_pickle`, and :meth:`DataFrame.to_xml` now explicitly mentions missing parent directory, the same is true for :class:`Series` counterparts (:issue:`24306`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index ce665ca8121317..c6e65f8b961871 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -301,9 +301,4 @@ cpdef sparse_{{opname}}_{{dtype}}({{dtype}}_t[:] x, else: raise NotImplementedError - -cpdef sparse_fill_{{opname}}_{{dtype}}({{dtype}}_t xfill, - {{dtype}}_t yfill): - return {{(opname, 'xfill', 'yfill', dtype) | get_op}} - {{endfor}} diff --git a/pandas/_libs/util.pxd b/pandas/_libs/util.pxd index bd1e21b0d86655..be22fc368c28f5 100644 --- a/pandas/_libs/util.pxd +++ b/pandas/_libs/util.pxd @@ -1,19 +1,8 @@ cimport numpy as cnp -from numpy cimport ndarray from pandas._libs.tslibs.util cimport * -cdef extern from "numpy/ndarraytypes.h": - void PyArray_CLEARFLAGS(ndarray arr, int flags) nogil - - -cdef extern from "numpy/arrayobject.h": - enum: - NPY_ARRAY_C_CONTIGUOUS - NPY_ARRAY_F_CONTIGUOUS - - cdef extern from "src/headers/stdint.h": enum: UINT8_MAX enum: UINT16_MAX @@ -42,10 +31,3 @@ ctypedef fused numeric: cnp.float32_t cnp.float64_t - - -cdef inline void set_array_not_contiguous(ndarray ao) nogil: - # Numpy>=1.8-compliant equivalent to: - # ao->flags &= ~(NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); - PyArray_CLEARFLAGS(ao, - (NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS)) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 40837ccad6ac8b..088c44334495cb 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -449,7 +449,7 @@ def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] def to_numpy( self, - dtype: Dtype | None = None, + dtype: npt.DTypeLike | None = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: @@ -478,12 +478,7 @@ def to_numpy( ------- numpy.ndarray """ - # error: Argument "dtype" to "asarray" has incompatible type - # "Union[ExtensionDtype, str, dtype[Any], Type[str], Type[float], Type[int], - # Type[complex], Type[bool], Type[object], None]"; expected "Union[dtype[Any], - # None, type, _SupportsDType, str, Union[Tuple[Any, int], Tuple[Any, Union[int, - # Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]" - result = np.asarray(self, dtype=dtype) # type: ignore[arg-type] + result = np.asarray(self, dtype=dtype) if copy or na_value is not lib.no_default: result = result.copy() if na_value is not lib.no_default: diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 877babe4f18e85..e40d8b74c768c9 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -224,12 +224,9 @@ def __len__(self) -> int: def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT: return type(self)(~self._data, self._mask.copy()) - # error: Argument 1 of "to_numpy" is incompatible with supertype "ExtensionArray"; - # supertype defines the argument type as "Union[ExtensionDtype, str, dtype[Any], - # Type[str], Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" - def to_numpy( # type: ignore[override] + def to_numpy( self, - dtype: NpDtype | None = None, + dtype: npt.DTypeLike | None = None, copy: bool = False, na_value: Scalar = lib.no_default, ) -> np.ndarray: diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index ec7bd132832d13..410497d61c98b8 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -10,6 +10,7 @@ Dtype, NpDtype, Scalar, + npt, ) from pandas.compat.numpy import function as nv @@ -365,12 +366,9 @@ def skew( # ------------------------------------------------------------------------ # Additional Methods - # error: Argument 1 of "to_numpy" is incompatible with supertype "ExtensionArray"; - # supertype defines the argument type as "Union[ExtensionDtype, str, dtype[Any], - # Type[str], Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" - def to_numpy( # type: ignore[override] + def to_numpy( self, - dtype: NpDtype | None = None, + dtype: npt.DTypeLike | None = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 4be7f4eb0c521c..9411d3535e06ff 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -24,6 +24,7 @@ Scalar, ScalarIndexer, SequenceIndexer, + npt, ) from pandas.compat import ( pa_version_under1p0, @@ -199,12 +200,9 @@ def __arrow_array__(self, type=None): """Convert myself to a pyarrow Array or ChunkedArray.""" return self._data - # error: Argument 1 of "to_numpy" is incompatible with supertype "ExtensionArray"; - # supertype defines the argument type as "Union[ExtensionDtype, str, dtype[Any], - # Type[str], Type[float], Type[int], Type[complex], Type[bool], Type[object], None]" - def to_numpy( # type: ignore[override] + def to_numpy( self, - dtype: NpDtype | None = None, + dtype: npt.DTypeLike | None = None, copy: bool = False, na_value=lib.no_default, ) -> np.ndarray: diff --git a/pandas/core/base.py b/pandas/core/base.py index c7a707fd5cd6ed..048831a74d1a5a 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -516,16 +516,8 @@ def to_numpy( """ if is_extension_array_dtype(self.dtype): # error: Too many arguments for "to_numpy" of "ExtensionArray" - - # error: Argument 1 to "to_numpy" of "ExtensionArray" has incompatible type - # "Optional[Union[dtype[Any], None, type, _SupportsDType[dtype[Any]], str, - # Union[Tuple[Any, int], Tuple[Any, Union[SupportsIndex, - # Sequence[SupportsIndex]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]"; - # expected "Optional[Union[ExtensionDtype, Union[str, dtype[Any]], - # Type[str], Type[float], Type[int], Type[complex], Type[bool], - # Type[object]]]" return self.array.to_numpy( # type: ignore[call-arg] - dtype, copy=copy, na_value=na_value, **kwargs # type: ignore[arg-type] + dtype, copy=copy, na_value=na_value, **kwargs ) elif kwargs: bad_keys = list(kwargs.keys())[0] diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 469e0e3901721a..28c44ff64c3e15 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -792,24 +792,6 @@ def count(self) -> Series: ) return self._reindex_output(result, fill_value=0) - def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None): - """Calculate pct_change of each value to previous entry in group""" - # TODO: Remove this conditional when #23918 is fixed - if freq: - return self.apply( - lambda x: x.pct_change( - periods=periods, fill_method=fill_method, limit=limit, freq=freq - ) - ) - if fill_method is None: # GH30463 - fill_method = "pad" - limit = 0 - filled = getattr(self, fill_method)(limit=limit) - fill_grp = filled.groupby(self.grouper.codes) - shifted = fill_grp.shift(periods=periods, freq=freq) - - return (filled / shifted) - 1 - @doc(Series.nlargest) def nlargest(self, n: int = 5, keep: str = "first"): f = partial(Series.nlargest, n=n, keep=keep) @@ -1086,14 +1068,10 @@ def _aggregate_item_by_item(self, func, *args, **kwargs) -> DataFrame: # test_resample_apply_product obj = self._obj_with_exclusions - result: dict[int | str, NDFrame] = {} - for i, item in enumerate(obj): - ser = obj.iloc[:, i] - colg = SeriesGroupBy( - ser, selection=item, grouper=self.grouper, exclusions=self.exclusions - ) + result: dict[int, NDFrame] = {} - result[i] = colg.aggregate(func, *args, **kwargs) + for i, (item, sgb) in enumerate(self._iterate_column_groupbys(obj)): + result[i] = sgb.aggregate(func, *args, **kwargs) res_df = self.obj._constructor(result) res_df.columns = obj.columns @@ -1168,11 +1146,7 @@ def _wrap_applied_output_series( applied_index = self._selected_obj._get_axis(self.axis) singular_series = len(values) == 1 and applied_index.nlevels == 1 - # assign the name to this series if singular_series: - keys = self.grouper.group_keys_seq - values[0].name = keys[0] - # GH2893 # we have series in the values array, we want to # produce a series: @@ -1372,14 +1346,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: # gets here with non-unique columns output = {} inds = [] - for i, col in enumerate(obj): - subset = obj.iloc[:, i] - sgb = SeriesGroupBy( - subset, - selection=col, - grouper=self.grouper, - exclusions=self.exclusions, - ) + for i, (colname, sgb) in enumerate(self._iterate_column_groupbys(obj)): try: output[i] = sgb.transform(wrapper) except TypeError: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 256276c3c91e31..a969b9d0ee1609 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1037,7 +1037,7 @@ def reset_identity(values): if self.as_index: # possible MI return case - group_keys = self.grouper.group_keys_seq + group_keys = self.grouper.result_index group_levels = self.grouper.levels group_names = self.grouper.names @@ -3236,6 +3236,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): ) return res + @final @Substitution(name="groupby") @Appender(_common_see_also) def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0): @@ -3247,6 +3248,7 @@ def pct_change(self, periods=1, fill_method="pad", limit=None, freq=None, axis=0 Series or DataFrame Percentage changes within each group. """ + # TODO: Remove this conditional for SeriesGroupBy when GH#23918 is fixed if freq is not None or axis != 0: return self.apply( lambda x: x.pct_change( diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e3fcff1557ca95..da0f8d2549a8b2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -228,6 +228,11 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: # expected "ndarray") return self.values # type: ignore[return-value] + def values_for_json(self) -> np.ndarray: + # Incompatible return value type (got "Union[ndarray[Any, Any], + # ExtensionArray]", expected "ndarray[Any, Any]") + return self.values # type: ignore[return-value] + @final @cache_readonly def fill_value(self): @@ -1375,6 +1380,9 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray: # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(values).reshape(self.shape) + def values_for_json(self) -> np.ndarray: + return np.asarray(self.values) + def interpolate( self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs ): @@ -1805,6 +1813,11 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock): is_numeric = False values: DatetimeArray | TimedeltaArray + def values_for_json(self) -> np.ndarray: + # special casing datetimetz to avoid conversion through + # object dtype + return self.values._ndarray + class DatetimeTZBlock(DatetimeLikeBlock): """implement a datetime64 block with a tz attribute""" diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index a9894ab5acf237..fa5f41d373e432 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -998,24 +998,25 @@ def column_arrays(self) -> list[np.ndarray]: """ Used in the JSON C code to access column arrays. This optimizes compared to using `iget_values` by converting each - block.values to a np.ndarray only once up front """ - # special casing datetimetz to avoid conversion through object dtype - arrays = [ - blk.values._ndarray - if isinstance(blk, DatetimeTZBlock) - else np.asarray(blk.values) - for blk in self.blocks - ] - result = [] - for i in range(len(self.items)): - arr = arrays[self.blknos[i]] - if arr.ndim == 2: - values = arr[self.blklocs[i]] + # This is an optimized equivalent to + # result = [self.iget_values(i) for i in range(len(self.items))] + result: list[np.ndarray | None] = [None] * len(self.items) + + for blk in self.blocks: + mgr_locs = blk._mgr_locs + values = blk.values_for_json() + if values.ndim == 1: + # TODO(EA2D): special casing not needed with 2D EAs + result[mgr_locs[0]] = values + else: - values = arr - result.append(values) - return result + for i, loc in enumerate(mgr_locs): + result[loc] = values[i] + + # error: Incompatible return value type (got "List[None]", + # expected "List[ndarray[Any, Any]]") + return result # type: ignore[return-value] def iset(self, loc: int | slice | np.ndarray, value: ArrayLike): """ diff --git a/pandas/io/common.py b/pandas/io/common.py index 5c5b9c65b8abd1..46be1f9bb09b2e 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -17,6 +17,7 @@ ) import mmap import os +from pathlib import Path import tempfile from typing import ( IO, @@ -520,6 +521,21 @@ def infer_compression( raise ValueError(msg) +def check_parent_directory(path: Path | str) -> None: + """ + Check if parent directory of a file exists, raise OSError if it does not + + Parameters + ---------- + path: Path or str + Path to check parent directory of + + """ + parent = Path(path).parent + if not parent.is_dir(): + raise OSError(fr"Cannot save file into a non-existent directory: '{parent}'") + + def get_handle( path_or_buf: FilePathOrBuffer, mode: str, @@ -632,6 +648,10 @@ def get_handle( compression_args = dict(ioargs.compression) compression = compression_args.pop("method") + # Only for write methods + if "r" not in mode and is_path: + check_parent_directory(str(handle)) + if compression: # compression libraries do not like an explicit text-mode ioargs.mode = ioargs.mode.replace("t", "") diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 3fd3d84f901616..d636838d21d0e2 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -96,7 +96,10 @@ from pandas.core.indexes.timedeltas import TimedeltaIndex from pandas.core.reshape.concat import concat -from pandas.io.common import stringify_path +from pandas.io.common import ( + check_parent_directory, + stringify_path, +) from pandas.io.formats.printing import ( adjoin, justify, @@ -1147,6 +1150,7 @@ def get_buffer(buf: FilePathOrBuffer[str] | None, encoding: str | None = None): if hasattr(buf, "write"): yield buf elif isinstance(buf, str): + check_parent_directory(str(buf)) with open(buf, "w", encoding=encoding, newline="") as f: # GH#30034 open instead of codecs.open prevents a file leak # if we have an invalid encoding argument. diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index f40fb8cba34351..0debe39eb294dd 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -24,16 +24,11 @@ @pytest.fixture -def groupby_with_truncated_bingrouper(): +def frame_for_truncated_bingrouper(): """ - GroupBy object such that gb.grouper is a BinGrouper and - len(gb.grouper.result_index) < len(gb.grouper.group_keys_seq) - - Aggregations on this groupby should have - - dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") - - As either the index or an index level. + DataFrame used by groupby_with_truncated_bingrouper, made into + a separate fixture for easier re-use in + test_groupby_apply_timegrouper_with_nat_apply_squeeze """ df = DataFrame( { @@ -48,6 +43,22 @@ def groupby_with_truncated_bingrouper(): ], } ) + return df + + +@pytest.fixture +def groupby_with_truncated_bingrouper(frame_for_truncated_bingrouper): + """ + GroupBy object such that gb.grouper is a BinGrouper and + len(gb.grouper.result_index) < len(gb.grouper.group_keys_seq) + + Aggregations on this groupby should have + + dti = date_range("2013-09-01", "2013-10-01", freq="5D", name="Date") + + As either the index or an index level. + """ + df = frame_for_truncated_bingrouper tdg = Grouper(key="Date", freq="5D") gb = df.groupby(tdg) @@ -847,3 +858,31 @@ def test_groupby_apply_timegrouper_with_nat_scalar_returns( ) tm.assert_series_equal(res, expected) + + def test_groupby_apply_timegrouper_with_nat_apply_squeeze( + self, frame_for_truncated_bingrouper + ): + df = frame_for_truncated_bingrouper + + # We need to create a GroupBy object with only one non-NaT group, + # so use a huge freq so that all non-NaT dates will be grouped together + tdg = Grouper(key="Date", freq="100Y") + + with tm.assert_produces_warning(FutureWarning, match="`squeeze` parameter"): + gb = df.groupby(tdg, squeeze=True) + + # check that we will go through the singular_series path + # in _wrap_applied_output_series + assert gb.ngroups == 1 + assert gb._selected_obj._get_axis(gb.axis).nlevels == 1 + + # function that returns a Series + res = gb.apply(lambda x: x["Quantity"] * 2) + + key = Timestamp("2013-12-31") + ordering = df["Date"].sort_values().dropna().index + mi = MultiIndex.from_product([[key], ordering], names=["Date", None]) + + ex_values = df["Quantity"].take(ordering).values * 2 + expected = Series(ex_values, index=mi, name="Quantity") + tm.assert_series_equal(res, expected) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index fc834c7acf39fa..ad0b25d26d6f61 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -227,6 +227,33 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext): ): reader(path) + @pytest.mark.parametrize( + "method, module, error_class, fn_ext", + [ + (pd.DataFrame.to_csv, "os", OSError, "csv"), + (pd.DataFrame.to_html, "os", OSError, "html"), + (pd.DataFrame.to_excel, "xlrd", OSError, "xlsx"), + (pd.DataFrame.to_feather, "pyarrow", OSError, "feather"), + (pd.DataFrame.to_parquet, "pyarrow", OSError, "parquet"), + (pd.DataFrame.to_stata, "os", OSError, "dta"), + (pd.DataFrame.to_json, "os", OSError, "json"), + (pd.DataFrame.to_pickle, "os", OSError, "pickle"), + ], + ) + # NOTE: Missing parent directory for pd.DataFrame.to_hdf is handled by PyTables + def test_write_missing_parent_directory(self, method, module, error_class, fn_ext): + pytest.importorskip(module) + + dummy_frame = pd.DataFrame({"a": [1, 2, 3], "b": [2, 3, 4], "c": [3, 4, 5]}) + + path = os.path.join(HERE, "data", "missing_folder", "does_not_exist." + fn_ext) + + with pytest.raises( + error_class, + match=r"Cannot save file into a non-existent directory: .*missing_folder", + ): + method(dummy_frame, path) + @pytest.mark.parametrize( "reader, module, error_class, fn_ext", [ diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index 4f4815b9008ad4..b8d146c597d2c4 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -202,10 +202,13 @@ def test_str_output(datapath, parser): def test_wrong_file_path(parser): + path = "/my/fake/path/output.xml" + with pytest.raises( - FileNotFoundError, match=("No such file or directory|没有那个文件或目录") + OSError, + match=(r"Cannot save file into a non-existent directory: .*path"), ): - geom_df.to_xml("/my/fake/path/output.xml", parser=parser) + geom_df.to_xml(path, parser=parser) # INDEX