From 614e3af3a5802dd75fff73a9ceeebbcc8132e547 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 5 Jan 2019 08:52:43 -0600 Subject: [PATCH] Array api docs (#24626) --- doc/source/10min.rst | 2 +- doc/source/api/arrays.rst | 401 +++++++++++++++++++++++++++++++ doc/source/api/index.rst | 2 +- doc/source/api/scalars.rst | 204 ---------------- doc/source/api/series.rst | 162 ++++++------- doc/source/basics.rst | 2 +- doc/source/categorical.rst | 2 +- doc/source/comparison_with_r.rst | 2 +- doc/source/groupby.rst | 2 +- doc/source/whatsnew/v0.15.0.rst | 4 +- pandas/core/arrays/array_.py | 14 +- pandas/core/arrays/sparse.py | 18 +- pandas/core/frame.py | 10 +- pandas/core/generic.py | 12 +- 14 files changed, 505 insertions(+), 332 deletions(-) create mode 100644 doc/source/api/arrays.rst diff --git a/doc/source/10min.rst b/doc/source/10min.rst index a7557e6e1d1c24..972b562cfebba4 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -657,7 +657,7 @@ Categoricals ------------ pandas can include categorical data in a ``DataFrame``. For full docs, see the -:ref:`categorical introduction ` and the :ref:`API documentation `. +:ref:`categorical introduction ` and the :ref:`API documentation `. .. ipython:: python diff --git a/doc/source/api/arrays.rst b/doc/source/api/arrays.rst new file mode 100644 index 00000000000000..d8ce2ab7bf73e0 --- /dev/null +++ b/doc/source/api/arrays.rst @@ -0,0 +1,401 @@ +{{ header }} + +.. _api.arrays: + +============= +Pandas Arrays +============= + +.. currentmodule:: pandas + +For most data types, pandas uses NumPy arrays as the concrete +objects contained with a :class:`Index`, :class:`Series`, or +:class:`DataFrame`. + +For some data types, pandas extends NumPy's type system. + +=================== ========================= ================== ============================= +Kind of Data Pandas Data Type Scalar Array +=================== ========================= ================== ============================= +TZ-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :ref:`api.arrays.datetime` +Timedeltas (none) :class:`Timedelta` :ref:`api.arrays.timedelta` +Period (time spans) :class:`PeriodDtype` :class:`Period` :ref:`api.arrays.period` +Intervals :class:`IntervalDtype` :class:`Interval` :ref:`api.arrays.interval` +Nullable Integer :class:`Int64Dtype`, ... (none) :ref:`api.arrays.integer_na` +Categorical :class:`CategoricalDtype` (none) :ref:`api.arrays.categorical` +Sparse :class:`SparseDtype` (none) :ref:`api.arrays.sparse` +=================== ========================= ================== ============================= + +Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). +The top-level :meth:`array` method can be used to create a new array, which may be +stored in a :class:`Series`, :class:`Index`, or as a column in a :class:`DataFrame`. + +.. autosummary:: + :toctree: generated/ + + array + +.. _api.arrays.datetime: + +Datetime Data +------------- + +NumPy cannot natively represent timezone-aware datetimes. Pandas supports this +with the :class:`arrays.DatetimeArray` extension array, which can hold timezone-naive +or timezone-aware values. + +:class:`Timestamp`, a subclass of :class:`datetime.datetime`, is pandas' +scalar type for timezone-naive or timezone-aware datetime data. + +.. autosummary:: + :toctree: generated/ + + Timestamp + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp.asm8 + Timestamp.day + Timestamp.dayofweek + Timestamp.dayofyear + Timestamp.days_in_month + Timestamp.daysinmonth + Timestamp.fold + Timestamp.hour + Timestamp.is_leap_year + Timestamp.is_month_end + Timestamp.is_month_start + Timestamp.is_quarter_end + Timestamp.is_quarter_start + Timestamp.is_year_end + Timestamp.is_year_start + Timestamp.max + Timestamp.microsecond + Timestamp.min + Timestamp.minute + Timestamp.month + Timestamp.nanosecond + Timestamp.quarter + Timestamp.resolution + Timestamp.second + Timestamp.tz + Timestamp.tzinfo + Timestamp.value + Timestamp.week + Timestamp.weekofyear + Timestamp.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp.astimezone + Timestamp.ceil + Timestamp.combine + Timestamp.ctime + Timestamp.date + Timestamp.day_name + Timestamp.dst + Timestamp.floor + Timestamp.freq + Timestamp.freqstr + Timestamp.fromordinal + Timestamp.fromtimestamp + Timestamp.isocalendar + Timestamp.isoformat + Timestamp.isoweekday + Timestamp.month_name + Timestamp.normalize + Timestamp.now + Timestamp.replace + Timestamp.round + Timestamp.strftime + Timestamp.strptime + Timestamp.time + Timestamp.timestamp + Timestamp.timetuple + Timestamp.timetz + Timestamp.to_datetime64 + Timestamp.to_julian_date + Timestamp.to_period + Timestamp.to_pydatetime + Timestamp.today + Timestamp.toordinal + Timestamp.tz_convert + Timestamp.tz_localize + Timestamp.tzname + Timestamp.utcfromtimestamp + Timestamp.utcnow + Timestamp.utcoffset + Timestamp.utctimetuple + Timestamp.weekday + +A collection of timestamps may be stored in a :class:`arrays.DatetimeArray`. +For timezone-aware data, the ``.dtype`` of a ``DatetimeArray`` is a +:class:`DatetimeTZDtype`. For timezone-naive data, ``np.dtype("datetime64[ns]")`` +is used. + +If the data are tz-aware, then every value in the array must have the same timezone. + +.. autosummary:: + :toctree: generated/ + + arrays.DatetimeArray + DatetimeTZDtype + +.. _api.arrays.timedelta: + +Timedelta Data +-------------- + +NumPy can natively represent timedeltas. Pandas provides :class:`Timedelta` +for symmetry with :class:`Timestamp`. + +.. autosummary:: + :toctree: generated/ + + Timedelta + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timedelta.asm8 + Timedelta.components + Timedelta.days + Timedelta.delta + Timedelta.freq + Timedelta.is_populated + Timedelta.max + Timedelta.microseconds + Timedelta.min + Timedelta.nanoseconds + Timedelta.resolution + Timedelta.seconds + Timedelta.value + Timedelta.view + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timedelta.ceil + Timedelta.floor + Timedelta.isoformat + Timedelta.round + Timedelta.to_pytimedelta + Timedelta.to_timedelta64 + Timedelta.total_seconds + +A collection of timedeltas may be stored in a :class:`TimedeltaArray`. + +.. autosumarry:: + :toctree: generated/ + + arrays.TimedeltaArray + +.. _api.arrays.period: + +Timespan Data +------------- + +Pandas represents spans of times as :class:`Period` objects. + +Period +------ +.. autosummary:: + :toctree: generated/ + + Period + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period.day + Period.dayofweek + Period.dayofyear + Period.days_in_month + Period.daysinmonth + Period.end_time + Period.freq + Period.freqstr + Period.hour + Period.is_leap_year + Period.minute + Period.month + Period.ordinal + Period.quarter + Period.qyear + Period.second + Period.start_time + Period.week + Period.weekday + Period.weekofyear + Period.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period.asfreq + Period.now + Period.strftime + Period.to_timestamp + +A collection of timedeltas may be stored in a :class:`arrays.PeriodArray`. +Every period in a ``PeriodArray`` must have the same ``freq``. + +.. autosummary:: + :toctree: generated/ + + arrays.DatetimeArray + PeriodDtype + +.. _api.arrays.interval: + +Interval Data +------------- + +Arbitrary intervals can be represented as :class:`Interval` objects. + +.. autosummary:: + :toctree: generated/ + + Interval + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Interval.closed + Interval.closed_left + Interval.closed_right + Interval.left + Interval.length + Interval.mid + Interval.open_left + Interval.open_right + Interval.overlaps + Interval.right + +A collection of intervals may be stored in an :class:`IntervalArray`. + +.. autosummary:: + :toctree: generated/ + + IntervalArray + IntervalDtype + +.. _api.arrays.integer_na: + +Nullable Integer +---------------- + +:class:`numpy.ndarray` cannot natively represent integer-data with missing values. +Pandas provides this through :class:`arrays.IntegerArray`. + +.. autosummary:: + :toctree: generated/ + + arrays.IntegerArray + Int8Dtype + Int16Dtype + Int32Dtype + Int64Dtype + UInt8Dtype + UInt16Dtype + UInt32Dtype + UInt64Dtype + +.. _api.arrays.categorical: + +Categorical Data +---------------- + +Pandas defines a custom data type for representing data that can take only a +limited, fixed set of values. The dtype of a ``Categorical`` can be described by +a :class:`pandas.api.types.CategoricalDtype`. + +.. autosummary:: + :toctree: generated/ + :template: autosummary/class_without_autosummary.rst + + api.types.CategoricalDtype + +.. autosummary:: + :toctree: generated/ + + api.types.CategoricalDtype.categories + api.types.CategoricalDtype.ordered + +Categorical data can be stored in a :class:`pandas.Categorical` + +.. autosummary:: + :toctree: generated/ + :template: autosummary/class_without_autosummary.rst + + Categorical + +The alternative :meth:`Categorical.from_codes` constructor can be used when you +have the categories and integer codes already: + +.. autosummary:: + :toctree: generated/ + + Categorical.from_codes + +The dtype information is available on the ``Categorical`` + +.. autosummary:: + :toctree: generated/ + + Categorical.dtype + Categorical.categories + Categorical.ordered + Categorical.codes + +``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts +the Categorical back to a NumPy array, so categories and order information is not preserved! + +.. autosummary:: + :toctree: generated/ + + Categorical.__array__ + +A ``Categorical`` can be stored in a ``Series`` or ``DataFrame``. +To create a Series of dtype ``category``, use ``cat = s.astype(dtype)`` or +``Series(..., dtype=dtype)`` where ``dtype`` is either + +* the string ``'category'`` +* an instance of :class:`~pandas.api.types.CategoricalDtype`. + +If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical +data. See :ref:`api.series.cat` for more. + +.. _api.arrays.sparse: + +Sparse Data +----------- + +Data where a single value is repeated many times (e.g. ``0`` or ``NaN``) may +be stored efficiently as a :class:`SparseArray`. + +.. autosummary:: + :toctree: generated/ + + SparseArray + SparseDtype + +The ``Series.sparse`` accessor may be used to access sparse-specific attributes +and methods if the :class:`Series` contains sparse values. See +:ref:`api.series.sparse` for more. diff --git a/doc/source/api/index.rst b/doc/source/api/index.rst index 0bd89fc826a211..e4d118e278128f 100644 --- a/doc/source/api/index.rst +++ b/doc/source/api/index.rst @@ -26,9 +26,9 @@ public functions related to data types in pandas. general_functions series frame + arrays panel indexing - scalars offset_frequency window groupby diff --git a/doc/source/api/scalars.rst b/doc/source/api/scalars.rst index 662a4d5a8fcfe5..e69de29bb2d1d6 100644 --- a/doc/source/api/scalars.rst +++ b/doc/source/api/scalars.rst @@ -1,204 +0,0 @@ -{{ header }} - -.. _api.scalars: - -======= -Scalars -======= -.. currentmodule:: pandas - -Period ------- -.. autosummary:: - :toctree: generated/ - - Period - -Properties -~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Period.day - Period.dayofweek - Period.dayofyear - Period.days_in_month - Period.daysinmonth - Period.end_time - Period.freq - Period.freqstr - Period.hour - Period.is_leap_year - Period.minute - Period.month - Period.ordinal - Period.quarter - Period.qyear - Period.second - Period.start_time - Period.week - Period.weekday - Period.weekofyear - Period.year - -Methods -~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Period.asfreq - Period.now - Period.strftime - Period.to_timestamp - -Timestamp ---------- -.. autosummary:: - :toctree: generated/ - - Timestamp - -Properties -~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Timestamp.asm8 - Timestamp.day - Timestamp.dayofweek - Timestamp.dayofyear - Timestamp.days_in_month - Timestamp.daysinmonth - Timestamp.fold - Timestamp.hour - Timestamp.is_leap_year - Timestamp.is_month_end - Timestamp.is_month_start - Timestamp.is_quarter_end - Timestamp.is_quarter_start - Timestamp.is_year_end - Timestamp.is_year_start - Timestamp.max - Timestamp.microsecond - Timestamp.min - Timestamp.minute - Timestamp.month - Timestamp.nanosecond - Timestamp.quarter - Timestamp.resolution - Timestamp.second - Timestamp.tz - Timestamp.tzinfo - Timestamp.value - Timestamp.week - Timestamp.weekofyear - Timestamp.year - -Methods -~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Timestamp.astimezone - Timestamp.ceil - Timestamp.combine - Timestamp.ctime - Timestamp.date - Timestamp.day_name - Timestamp.dst - Timestamp.floor - Timestamp.freq - Timestamp.freqstr - Timestamp.fromordinal - Timestamp.fromtimestamp - Timestamp.isocalendar - Timestamp.isoformat - Timestamp.isoweekday - Timestamp.month_name - Timestamp.normalize - Timestamp.now - Timestamp.replace - Timestamp.round - Timestamp.strftime - Timestamp.strptime - Timestamp.time - Timestamp.timestamp - Timestamp.timetuple - Timestamp.timetz - Timestamp.to_datetime64 - Timestamp.to_julian_date - Timestamp.to_period - Timestamp.to_pydatetime - Timestamp.today - Timestamp.toordinal - Timestamp.tz_convert - Timestamp.tz_localize - Timestamp.tzname - Timestamp.utcfromtimestamp - Timestamp.utcnow - Timestamp.utcoffset - Timestamp.utctimetuple - Timestamp.weekday - -Interval --------- -.. autosummary:: - :toctree: generated/ - - Interval - -Properties -~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Interval.closed - Interval.closed_left - Interval.closed_right - Interval.left - Interval.length - Interval.mid - Interval.open_left - Interval.open_right - Interval.overlaps - Interval.right - -Timedelta ---------- -.. autosummary:: - :toctree: generated/ - - Timedelta - -Properties -~~~~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Timedelta.asm8 - Timedelta.components - Timedelta.days - Timedelta.delta - Timedelta.freq - Timedelta.is_populated - Timedelta.max - Timedelta.microseconds - Timedelta.min - Timedelta.nanoseconds - Timedelta.resolution - Timedelta.seconds - Timedelta.value - Timedelta.view - -Methods -~~~~~~~ -.. autosummary:: - :toctree: generated/ - - Timedelta.ceil - Timedelta.floor - Timedelta.isoformat - Timedelta.round - Timedelta.to_pytimedelta - Timedelta.to_timedelta64 - Timedelta.total_seconds diff --git a/doc/source/api/series.rst b/doc/source/api/series.rst index 8e4c378b9fefe8..1631f04b1c72f2 100644 --- a/doc/source/api/series.rst +++ b/doc/source/api/series.rst @@ -281,14 +281,34 @@ Time series-related Series.tshift Series.slice_shift +Accessors +--------- + +Pandas provides dtype-specific methods under various accessors. +These are separate namespaces within :class:`Series` that only apply +to specific data types. + +=========================== ================================= +Data Type Accessor +=========================== ================================= +Datetime, Timedelta, Period :ref:`dt ` +String :ref:`str ` +Categorical :ref:`cat ` +Sparse :ref:`sparse ` +=========================== ================================= + +.. _api.series.dt: + Datetimelike Properties ------------------------ +~~~~~~~~~~~~~~~~~~~~~~~ + ``Series.dt`` can be used to access the values of the series as datetimelike and return several properties. These can be accessed like ``Series.dt.``. Datetime Properties -~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_attribute.rst @@ -323,7 +343,8 @@ Datetime Properties Series.dt.freq Datetime Methods -~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst @@ -341,7 +362,8 @@ Datetime Methods Series.dt.day_name Period Properties -~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_attribute.rst @@ -351,7 +373,8 @@ Period Properties Series.dt.end_time Timedelta Properties -~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_attribute.rst @@ -363,7 +386,8 @@ Timedelta Properties Series.dt.components Timedelta Methods -~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^ + .. autosummary:: :toctree: generated/ :template: autosummary/accessor_method.rst @@ -371,8 +395,12 @@ Timedelta Methods Series.dt.to_pytimedelta Series.dt.total_seconds + +.. _api.series.str: + String handling ---------------- +~~~~~~~~~~~~~~~ + ``Series.str`` can be used to access the values of the series as strings and apply several methods to it. These can be accessed like ``Series.str.``. @@ -448,82 +476,13 @@ strings and apply several methods to it. These can be accessed like Series.dt Index.str -.. _api.arrays: - -Arrays ------- -Pandas and third-party libraries can extend NumPy's type system (see :ref:`extending.extension-types`). - -.. autosummary:: - :toctree: generated/ - - array - -.. _api.categorical: - -Categorical -~~~~~~~~~~~ - -Pandas defines a custom data type for representing data that can take only a -limited, fixed set of values. The dtype of a ``Categorical`` can be described by -a :class:`pandas.api.types.CategoricalDtype`. - -.. autosummary:: - :toctree: generated/ - :template: autosummary/class_without_autosummary.rst - - api.types.CategoricalDtype - -.. autosummary:: - :toctree: generated/ - - api.types.CategoricalDtype.categories - api.types.CategoricalDtype.ordered - -Categorical data can be stored in a :class:`pandas.Categorical` - -.. autosummary:: - :toctree: generated/ - :template: autosummary/class_without_autosummary.rst - - Categorical - -The alternative :meth:`Categorical.from_codes` constructor can be used when you -have the categories and integer codes already: - -.. autosummary:: - :toctree: generated/ - - Categorical.from_codes - -The dtype information is available on the ``Categorical`` - -.. autosummary:: - :toctree: generated/ +.. _api.series.cat: - Categorical.dtype - Categorical.categories - Categorical.ordered - Categorical.codes - -``np.asarray(categorical)`` works by implementing the array interface. Be aware, that this converts -the Categorical back to a NumPy array, so categories and order information is not preserved! - -.. autosummary:: - :toctree: generated/ - - Categorical.__array__ - -A ``Categorical`` can be stored in a ``Series`` or ``DataFrame``. -To create a Series of dtype ``category``, use ``cat = s.astype(dtype)`` or -``Series(..., dtype=dtype)`` where ``dtype`` is either - -* the string ``'category'`` -* an instance of :class:`~pandas.api.types.CategoricalDtype`. +Categorical Accessor +~~~~~~~~~~~~~~~~~~~~ -If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical -data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the -following usable methods and properties: +Categorical-dtype specific methods and attributes are available under +the ``Series.cat`` accessor. .. autosummary:: :toctree: generated/ @@ -546,6 +505,31 @@ following usable methods and properties: Series.cat.as_ordered Series.cat.as_unordered + +.. _api.series.sparse: + +Sparse Accessor +~~~~~~~~~~~~~~~ + +Sparse-dtype specific methods and attributes are provided under the +``Series.sparse`` accessor. + +.. autosummary:: + :toctree: generated/ + :template: autosummary/accessor_attribute.rst + + Series.sparse.npoints + Series.sparse.density + Series.sparse.fill_value + Series.sparse.sp_values + +.. autosummary:: + :toctree: generated/ + + Series.sparse.from_coo + Series.sparse.to_coo + + Plotting -------- ``Series.plot`` is both a callable method and a namespace attribute for @@ -597,25 +581,13 @@ Serialization / IO / Conversion Series.to_clipboard Series.to_latex + Sparse ------ + .. autosummary:: :toctree: generated/ SparseSeries.to_coo SparseSeries.from_coo -.. autosummary:: - :toctree: generated/ - :template: autosummary/accessor_attribute.rst - - Series.sparse.npoints - Series.sparse.density - Series.sparse.fill_value - Series.sparse.sp_values - -.. autosummary:: - :toctree: generated/ - - Series.sparse.from_coo - Series.sparse.to_coo diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 73ae26150b9465..13681485d2f692 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1947,7 +1947,7 @@ documentation sections for more on each type. =================== ========================= ================== ============================= ============================= Kind of Data Data Type Scalar Array Documentation =================== ========================= ================== ============================= ============================= -tz-aware datetime :class:`DatetimeArray` :class:`Timestamp` :class:`arrays.DatetimeArray` :ref:`timeseries.timezone` +tz-aware datetime :class:`DatetimeTZDtype` :class:`Timestamp` :class:`arrays.DatetimeArray` :ref:`timeseries.timezone` Categorical :class:`CategoricalDtype` (none) :class:`Categorical` :ref:`categorical` period (time spans) :class:`PeriodDtype` :class:`Period` :class:`arrays.PeriodArray` :ref:`timeseries.periods` sparse :class:`SparseDtype` (none) :class:`arrays.SparseArray` :ref:`sparse` diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 68e39e68220a74..a6315c548b3827 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -34,7 +34,7 @@ The categorical data type is useful in the following cases: * As a signal to other Python libraries that this column should be treated as a categorical variable (e.g. to use suitable statistical methods or plot types). -See also the :ref:`API docs on categoricals`. +See also the :ref:`API docs on categoricals`. .. _categorical.objectcreation: diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index a0143d717105cb..dfd388125708ed 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -512,7 +512,7 @@ In pandas this is accomplished with ``pd.cut`` and ``astype("category")``: pd.Series([1, 2, 3, 2, 2, 3]).astype("category") For more details and examples see :ref:`categorical introduction ` and the -:ref:`API documentation `. There is also a documentation regarding the +:ref:`API documentation `. There is also a documentation regarding the :ref:`differences to R's factor `. diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index a37aa2644a8050..953f40d1afebea 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -1351,7 +1351,7 @@ important than their content, or as input to an algorithm which only accepts the integer encoding. (For more information about support in pandas for full categorical data, see the :ref:`Categorical introduction ` and the -:ref:`API documentation `.) +:ref:`API documentation `.) .. ipython:: python diff --git a/doc/source/whatsnew/v0.15.0.rst b/doc/source/whatsnew/v0.15.0.rst index 6f74f0393d1233..420125afd29a45 100644 --- a/doc/source/whatsnew/v0.15.0.rst +++ b/doc/source/whatsnew/v0.15.0.rst @@ -72,7 +72,7 @@ methods to manipulate. Thanks to Jan Schulz for much of this API/implementation. :issue:`8075`, :issue:`8076`, :issue:`8143`, :issue:`8453`, :issue:`8518`). For full docs, see the :ref:`categorical introduction ` and the -:ref:`API documentation `. +:ref:`API documentation `. .. ipython:: python :okwarning: @@ -101,7 +101,7 @@ For full docs, see the :ref:`categorical introduction ` and the - The ``Categorical.labels`` attribute was renamed to ``Categorical.codes`` and is read only. If you want to manipulate codes, please use one of the - :ref:`API methods on Categoricals `. + :ref:`API methods on Categoricals `. - The ``Categorical.levels`` attribute is renamed to ``Categorical.categories``. diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py index 9b2240eb62906d..32c08e40b80337 100644 --- a/pandas/core/arrays/array_.py +++ b/pandas/core/arrays/array_.py @@ -47,13 +47,13 @@ def array(data, # type: Sequence[object] Currently, pandas will infer an extension dtype for sequences of ============================== ===================================== - scalar type Array Type - ============================= ===================================== - * :class:`pandas.Interval` :class:`pandas.IntervalArray` - * :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` - * :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` - * :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` - ============================= ===================================== + Scalar Type Array Type + ============================== ===================================== + :class:`pandas.Interval` :class:`pandas.IntervalArray` + :class:`pandas.Period` :class:`pandas.arrays.PeriodArray` + :class:`datetime.datetime` :class:`pandas.arrays.DatetimeArray` + :class:`datetime.timedelta` :class:`pandas.arrays.TimedeltaArray` + ============================== ===================================== For all other cases, NumPy's usual inference rules will be used. diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 7c8f58c9a3203c..6114e578dc90fb 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -56,19 +56,19 @@ class SparseDtype(ExtensionDtype): ---------- dtype : str, ExtensionDtype, numpy.dtype, type, default numpy.float64 The dtype of the underlying array storing the non-fill value values. - fill_value : scalar, optional. + fill_value : scalar, optional The scalar value not stored in the SparseArray. By default, this depends on `dtype`. - ========== ========== - dtype na_value - ========== ========== - float ``np.nan`` - int ``0`` - bool ``False`` - datetime64 ``pd.NaT`` + =========== ========== + dtype na_value + =========== ========== + float ``np.nan`` + int ``0`` + bool ``False`` + datetime64 ``pd.NaT`` timedelta64 ``pd.NaT`` - ========== ========== + =========== ========== The default value may be overridden by specifying a `fill_value`. """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a50def73578269..7659f0696008b3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6966,6 +6966,11 @@ def corr(self, method='pearson', min_periods=1): ------- y : DataFrame + See Also + -------- + DataFrame.corrwith + Series.corr + Examples -------- >>> histogram_intersection = lambda a, b: np.minimum(a, b @@ -6976,11 +6981,6 @@ def corr(self, method='pearson', min_periods=1): dogs cats dogs 1.0 0.3 cats 0.3 1.0 - - See Also - ------- - DataFrame.corrwith - Series.corr """ numeric_df = self._get_numeric_data() cols = numeric_df.columns diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d0555bd2e44b1b..b3c14bac91f172 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9223,7 +9223,10 @@ def _tz_convert(ax, tz): def tz_localize(self, tz, axis=0, level=None, copy=True, ambiguous='raise', nonexistent='raise'): """ - Localize tz-naive TimeSeries to target time zone. + Localize tz-naive index of a Series or DataFrame to target time zone. + + This operation localizes the Index. To localize the values in a + timezone-naive Series, use :meth:`Series.dt.tz_localize`. Parameters ---------- @@ -9250,10 +9253,9 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, - 'NaT' will return NaT where there are ambiguous times - 'raise' will raise an AmbiguousTimeError if there are ambiguous times - nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, - default 'raise' + nonexistent : str, default 'raise' A nonexistent time does not exist in a particular timezone - where clocks moved forward due to DST. + where clocks moved forward due to DST. Valid valuse are: - 'shift_forward' will shift the nonexistent time forward to the closest existing time @@ -9268,6 +9270,8 @@ def tz_localize(self, tz, axis=0, level=None, copy=True, Returns ------- + Series or DataFrame + Same type as the input. Raises ------