diff --git a/MANIFEST.in b/MANIFEST.in
index cf6a1835433a4..494ad69efbc56 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,9 +1,4 @@
-include MANIFEST.in
-include LICENSE
include RELEASE.md
-include README.md
-include setup.py
-include pyproject.toml
graft doc
prune doc/build
@@ -16,10 +11,12 @@ global-exclude *.bz2
global-exclude *.csv
global-exclude *.dta
global-exclude *.feather
+global-exclude *.tar
global-exclude *.gz
global-exclude *.h5
global-exclude *.html
global-exclude *.json
+global-exclude *.jsonl
global-exclude *.pickle
global-exclude *.png
global-exclude *.pyc
@@ -40,6 +37,11 @@ global-exclude .DS_Store
global-exclude .git*
global-exclude \#*
+# GH 39321
+# csv_dir_path fixture checks the existence of the directory
+# exclude the whole directory to avoid running related tests in sdist
+prune pandas/tests/io/parser/data
+
include versioneer.py
include pandas/_version.py
include pandas/io/formats/templates/*.tpl
diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml
index e833ea1f1f398..53ee212360475 100644
--- a/conda.recipe/meta.yaml
+++ b/conda.recipe/meta.yaml
@@ -19,7 +19,7 @@ requirements:
- pip
- cython
- numpy
- - setuptools >=3.3
+ - setuptools >=38.6.0
- python-dateutil >=2.7.3
- pytz
run:
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
index 06e1af75053d3..1ee8e3401e7f4 100644
--- a/doc/source/getting_started/install.rst
+++ b/doc/source/getting_started/install.rst
@@ -219,7 +219,7 @@ Dependencies
================================================================ ==========================
Package Minimum supported version
================================================================ ==========================
-`setuptools `__ 24.2.0
+`setuptools `__ 38.6.0
`NumPy `__ 1.16.5
`python-dateutil `__ 2.7.3
`pytz `__ 2017.3
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 76bd95c1c5d9d..f6d79cea84839 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -164,6 +164,8 @@ If installed, we now require:
+-----------------+-----------------+----------+---------+
| mypy (dev) | 0.800 | | X |
+-----------------+-----------------+----------+---------+
+| setuptools | 38.6.0 | | X |
++-----------------+-----------------+----------+---------+
For `optional libraries `_ the general recommendation is to use the latest version.
The following table lists the lowest version per library that is currently being tested throughout the development of pandas.
diff --git a/pyproject.toml b/pyproject.toml
index 2b78147e9294d..9f11475234566 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,16 +1,17 @@
[build-system]
# Minimum requirements for the build system to execute.
-# See https://github.com/scipy/scipy/pull/10431 for the AIX issue.
+# See https://github.com/scipy/scipy/pull/12940 for the AIX issue.
requires = [
- "setuptools",
+ "setuptools>=38.6.0",
"wheel",
"Cython>=0.29.21,<3", # Note: sync with setup.py
- "numpy==1.16.5; python_version=='3.7' and platform_system!='AIX'",
- "numpy==1.17.3; python_version=='3.8' and platform_system!='AIX'",
- "numpy==1.16.5; python_version=='3.7' and platform_system=='AIX'",
- "numpy==1.17.3; python_version=='3.8' and platform_system=='AIX'",
+ "numpy==1.16.5; python_version=='3.7'",
+ "numpy==1.17.3; python_version=='3.8'",
"numpy; python_version>='3.9'",
]
+# uncomment to enable pep517 after versioneer problem is fixed.
+# https://github.com/python-versioneer/python-versioneer/issues/193
+# build-backend = "setuptools.build_meta"
[tool.black]
target-version = ['py37', 'py38']
diff --git a/setup.cfg b/setup.cfg
index a6d636704664e..5093ff81ad17f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,11 +1,65 @@
+[metadata]
+name = pandas
+description = Powerful data structures for data analysis, time series, and statistics
+long_description = file: README.md
+long_description_content_type = text/markdown
+url = https://pandas.pydata.org
+author = The Pandas Development Team
+author_email = pandas-dev@python.org
+license = BSD-3-Clause
+license_file = LICENSE
+platforms = any
+classifiers =
+ Development Status :: 5 - Production/Stable
+ Environment :: Console
+ Intended Audience :: Science/Research
+ License :: OSI Approved :: BSD License
+ Operating System :: OS Independen
+ Programming Language :: Cython
+ Programming Language :: Python
+ Programming Language :: Python :: 3
+ Programming Language :: Python :: 3 :: Only
+ Programming Language :: Python :: 3.7
+ Programming Language :: Python :: 3.8
+ Programming Language :: Python :: 3.9
+ Topic :: Scientific/Engineering
+project_urls =
+ Bug Tracker = https://github.com/pandas-dev/pandas/issues
+ Documentation = https://pandas.pydata.org/pandas-docs/stable
+ Source Code = https://github.com/pandas-dev/pandas
+
+[options]
+packages = find:
+install_requires =
+ numpy>=1.16.5
+ python-dateutil>=2.7.3
+ pytz>=2017.3
+python_requires = >=3.7.1
+include_package_data = True
+zip_safe = False
+
+[options.entry_points]
+pandas_plotting_backends =
+ matplotlib = pandas:plotting._matplotlib
+
+[options.extras_require]
+test =
+ hypothesis>=3.58
+ pytest>=5.0.1
+ pytest-xdist
+
+[options.package_data]
+* = templates/*, _libs/**/*.dll
[build_ext]
-inplace = 1
+inplace = True
+
+[options.packages.find]
+include = pandas, pandas.*
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
# resulting files.
-
[versioneer]
VCS = git
style = pep440
@@ -38,16 +92,16 @@ bootstrap =
import pandas as pd
np # avoiding error when importing again numpy or pandas
pd # (in some cases we want to do it to show users)
-ignore = E203, # space before : (needed for how black formats slicing)
- E402, # module level import not at top of file
- W503, # line break before binary operator
- # Classes/functions in different blocks can generate those errors
- E302, # expected 2 blank lines, found 0
- E305, # expected 2 blank lines after class or function definition, found 0
- # We use semicolon at the end to avoid displaying plot objects
- E703, # statement ends with a semicolon
- E711, # comparison to none should be 'if cond is none:'
-
+ignore =
+ E203, # space before : (needed for how black formats slicing)
+ E402, # module level import not at top of file
+ W503, # line break before binary operator
+ # Classes/functions in different blocks can generate those errors
+ E302, # expected 2 blank lines, found 0
+ E305, # expected 2 blank lines after class or function definition, found 0
+ # We use semicolon at the end to avoid displaying plot objects
+ E703, # statement ends with a semicolon
+ E711, # comparison to none should be 'if cond is none:'
exclude =
doc/source/development/contributing_docstring.rst,
# work around issue of undefined variable warnings
@@ -64,18 +118,18 @@ xfail_strict = True
filterwarnings =
error:Sparse:FutureWarning
error:The SparseArray:FutureWarning
-junit_family=xunit2
+junit_family = xunit2
[codespell]
-ignore-words-list=ba,blocs,coo,hist,nd,ser
-ignore-regex=https://(\w+\.)+
+ignore-words-list = ba,blocs,coo,hist,nd,ser
+ignore-regex = https://(\w+\.)+
[coverage:run]
branch = False
omit =
- */tests/*
- pandas/_typing.py
- pandas/_version.py
+ */tests/*
+ pandas/_typing.py
+ pandas/_version.py
plugins = Cython.Coverage
[coverage:report]
@@ -130,10 +184,10 @@ warn_unused_ignores = True
show_error_codes = True
[mypy-pandas.tests.*]
-check_untyped_defs=False
+check_untyped_defs = False
[mypy-pandas._version]
-check_untyped_defs=False
+check_untyped_defs = False
[mypy-pandas.io.clipboard]
-check_untyped_defs=False
+check_untyped_defs = False
diff --git a/setup.py b/setup.py
index f9c4a1158fee0..34c80925a80a8 100755
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
import sys
import numpy
-from setuptools import Command, Extension, find_packages, setup
+from setuptools import Command, Extension, setup
from setuptools.command.build_ext import build_ext as _build_ext
import versioneer
@@ -34,7 +34,6 @@ def is_platform_mac():
return sys.platform == "darwin"
-min_numpy_ver = "1.16.5"
min_cython_ver = "0.29.21" # note: sync with pyproject.toml
try:
@@ -99,96 +98,6 @@ def build_extensions(self):
super().build_extensions()
-DESCRIPTION = "Powerful data structures for data analysis, time series, and statistics"
-LONG_DESCRIPTION = """
-**pandas** is a Python package that provides fast, flexible, and expressive data
-structures designed to make working with structured (tabular, multidimensional,
-potentially heterogeneous) and time series data both easy and intuitive. It
-aims to be the fundamental high-level building block for doing practical,
-**real world** data analysis in Python. Additionally, it has the broader goal
-of becoming **the most powerful and flexible open source data analysis /
-manipulation tool available in any language**. It is already well on its way
-toward this goal.
-
-pandas is well suited for many different kinds of data:
-
- - Tabular data with heterogeneously-typed columns, as in an SQL table or
- Excel spreadsheet
- - Ordered and unordered (not necessarily fixed-frequency) time series data.
- - Arbitrary matrix data (homogeneously typed or heterogeneous) with row and
- column labels
- - Any other form of observational / statistical data sets. The data actually
- need not be labeled at all to be placed into a pandas data structure
-
-The two primary data structures of pandas, Series (1-dimensional) and DataFrame
-(2-dimensional), handle the vast majority of typical use cases in finance,
-statistics, social science, and many areas of engineering. For R users,
-DataFrame provides everything that R's ``data.frame`` provides and much
-more. pandas is built on top of `NumPy `__ and is
-intended to integrate well within a scientific computing environment with many
-other 3rd party libraries.
-
-Here are just a few of the things that pandas does well:
-
- - Easy handling of **missing data** (represented as NaN) in floating point as
- well as non-floating point data
- - Size mutability: columns can be **inserted and deleted** from DataFrame and
- higher dimensional objects
- - Automatic and explicit **data alignment**: objects can be explicitly
- aligned to a set of labels, or the user can simply ignore the labels and
- let `Series`, `DataFrame`, etc. automatically align the data for you in
- computations
- - Powerful, flexible **group by** functionality to perform
- split-apply-combine operations on data sets, for both aggregating and
- transforming data
- - Make it **easy to convert** ragged, differently-indexed data in other
- Python and NumPy data structures into DataFrame objects
- - Intelligent label-based **slicing**, **fancy indexing**, and **subsetting**
- of large data sets
- - Intuitive **merging** and **joining** data sets
- - Flexible **reshaping** and pivoting of data sets
- - **Hierarchical** labeling of axes (possible to have multiple labels per
- tick)
- - Robust IO tools for loading data from **flat files** (CSV and delimited),
- Excel files, databases, and saving / loading data from the ultrafast **HDF5
- format**
- - **Time series**-specific functionality: date range generation and frequency
- conversion, moving window statistics, date shifting and lagging.
-
-Many of these principles are here to address the shortcomings frequently
-experienced using other languages / scientific research environments. For data
-scientists, working with data is typically divided into multiple stages:
-munging and cleaning data, analyzing / modeling it, then organizing the results
-of the analysis into a form suitable for plotting or tabular display. pandas is
-the ideal tool for all of these tasks.
-"""
-
-DISTNAME = "pandas"
-LICENSE = "BSD"
-AUTHOR = "The PyData Development Team"
-EMAIL = "pydata@googlegroups.com"
-URL = "https://pandas.pydata.org"
-DOWNLOAD_URL = ""
-PROJECT_URLS = {
- "Bug Tracker": "https://github.com/pandas-dev/pandas/issues",
- "Documentation": "https://pandas.pydata.org/pandas-docs/stable/",
- "Source Code": "https://github.com/pandas-dev/pandas",
-}
-CLASSIFIERS = [
- "Development Status :: 5 - Production/Stable",
- "Environment :: Console",
- "Operating System :: OS Independent",
- "Intended Audience :: Science/Research",
- "Programming Language :: Python",
- "Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Cython",
- "Topic :: Scientific/Engineering",
-]
-
-
class CleanCommand(Command):
"""Custom distutils command to clean the .so and .pyc files."""
@@ -711,51 +620,11 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
# ----------------------------------------------------------------------
-def setup_package():
- setuptools_kwargs = {
- "install_requires": [
- "python-dateutil >= 2.7.3",
- "pytz >= 2017.3",
- f"numpy >= {min_numpy_ver}",
- ],
- "setup_requires": [f"numpy >= {min_numpy_ver}"],
- "zip_safe": False,
- }
-
+if __name__ == "__main__":
+ # Freeze to support parallel compilation when using spawn instead of fork
+ multiprocessing.freeze_support()
setup(
- name=DISTNAME,
- maintainer=AUTHOR,
version=versioneer.get_version(),
- packages=find_packages(include=["pandas", "pandas.*"]),
- package_data={"": ["templates/*", "_libs/**/*.dll"]},
ext_modules=maybe_cythonize(extensions, compiler_directives=directives),
- maintainer_email=EMAIL,
- description=DESCRIPTION,
- license=LICENSE,
cmdclass=cmdclass,
- url=URL,
- download_url=DOWNLOAD_URL,
- project_urls=PROJECT_URLS,
- long_description=LONG_DESCRIPTION,
- classifiers=CLASSIFIERS,
- platforms="any",
- python_requires=">=3.7.1",
- extras_require={
- "test": [
- # sync with setup.cfg minversion & install.rst
- "pytest>=5.0.1",
- "pytest-xdist",
- "hypothesis>=3.58",
- ]
- },
- entry_points={
- "pandas_plotting_backends": ["matplotlib = pandas:plotting._matplotlib"]
- },
- **setuptools_kwargs,
)
-
-
-if __name__ == "__main__":
- # Freeze to support parallel compilation when using spawn instead of fork
- multiprocessing.freeze_support()
- setup_package()