Skip to content

Commit

Permalink
[MRG] PyPy support for all but a couple of estimators (scikit-learn#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
rlamy authored and jnothman committed Jul 20, 2018
1 parent 813d7de commit 5592a2e
Show file tree
Hide file tree
Showing 24 changed files with 186 additions and 34 deletions.
17 changes: 17 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@ jobs:
path: ~/log.txt
destination: log.txt

pypy3:
docker:
- image: pypy:3-6.0.0
steps:
- restore_cache:
keys:
- pypy3-ccache-{{ .Branch }}
- pypy3-ccache
- checkout
- run: ./build_tools/circle/build_test_pypy.sh
- save_cache:
key: pypy3-ccache-{{ .Branch }}-{{ .BuildNum }}
paths:
- ~/.ccache
- ~/.cache/pip

deploy:
docker:
- image: circleci/python:3.6.1
Expand All @@ -88,6 +104,7 @@ workflows:
jobs:
- python3
- python2
- pypy3
- deploy:
requires:
- python3
30 changes: 30 additions & 0 deletions build_tools/circle/build_test_pypy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -x
set -e

apt-get -yq update
apt-get -yq install libatlas-dev libatlas-base-dev liblapack-dev gfortran ccache

pip install virtualenv

if command -v pypy3; then
virtualenv -p $(command -v pypy3) pypy-env
elif command -v pypy; then
virtualenv -p $(command -v pypy) pypy-env
fi

source pypy-env/bin/activate

python --version
which python

pip install --extra-index https://antocuni.github.io/pypy-wheels/ubuntu numpy==1.14.4 Cython pytest
pip install "scipy>=1.1.0" sphinx numpydoc docutils

ccache -M 512M
export CCACHE_COMPRESS=1
export PATH=/usr/lib/ccache:$PATH

pip install -e .

make test
10 changes: 10 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,23 @@
# doc/modules/clustering.rst and use sklearn from the local folder rather than
# the one from site-packages.

import platform
from distutils.version import LooseVersion

import pytest
from _pytest.doctest import DoctestItem


def pytest_collection_modifyitems(config, items):

# FeatureHasher is not compatible with PyPy
if platform.python_implementation() == 'PyPy':
skip_marker = pytest.mark.skip(
reason='FeatureHasher is not compatible with PyPy')
for item in items:
if item.name == 'sklearn.feature_extraction.hashing.FeatureHasher':
item.add_marker(skip_marker)

# numpy changed the str/repr formatting of numpy arrays in 1.14. We want to
# run doctests only for numpy >= 1.14.
skip_doctests = True
Expand Down
6 changes: 6 additions & 0 deletions doc/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
from os.path import exists
from os.path import join
import warnings

import numpy as np

from sklearn.utils import IS_PYPY
from sklearn.utils.testing import SkipTest
from sklearn.utils.testing import check_skip_network
from sklearn.datasets import get_data_home
Expand Down Expand Up @@ -56,6 +58,8 @@ def setup_twenty_newsgroups():


def setup_working_with_text_data():
if IS_PYPY and os.environ.get('CI', None):
raise SkipTest('Skipping too slow test with PyPy on CI')
check_skip_network()
cache_path = _pkl_filepath(get_data_home(), CACHE_NAME)
if not exists(cache_path):
Expand Down Expand Up @@ -98,6 +102,8 @@ def pytest_runtest_setup(item):
setup_working_with_text_data()
elif fname.endswith('modules/compose.rst') or is_index:
setup_compose()
elif IS_PYPY and fname.endswith('modules/feature_extraction.rst'):
raise SkipTest('FeatureHasher is not compatible with PyPy')
elif fname.endswith('modules/impute.rst'):
setup_impute()
elif fname.endswith('statistical_inference/unsupervised_learning.rst'):
Expand Down
6 changes: 6 additions & 0 deletions doc/developers/advanced_installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ Scikit-learn requires:
- NumPy (>= 1.8.2),
- SciPy (>= 0.13.3).

.. note::

For installing on PyPy, PyPy3-v5.10+, Numpy 1.14.0+, and scipy 1.1.0+
are required. For PyPy, only installation instructions with pip apply.


Building Scikit-learn also requires

- Cython >=0.23
Expand Down
2 changes: 1 addition & 1 deletion doc/developers/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ and Cython optimizations.

* Travis is used for testing on Linux platforms
* Appveyor is used for testing on Windows platforms
* CircleCI is used to build the docs for viewing
* CircleCI is used to build the docs for viewing and for testing with PyPy on Linux

Please note that if one of the following markers appear in the latest commit
message, the following actions are taken.
Expand Down
10 changes: 4 additions & 6 deletions doc/faq.rst
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,10 @@ careful choice of algorithms.
Do you support PyPy?
--------------------

In case you didn't know, `PyPy <http://pypy.org/>`_ is the new, fast,
just-in-time compiling Python implementation. We don't support it.
When the `NumPy support <http://buildbot.pypy.org/numpy-status/latest.html>`_
in PyPy is complete or near-complete, and SciPy is ported over as well,
we can start thinking of a port.
We use too much of NumPy to work with a partial implementation.
In case you didn't know, `PyPy <http://pypy.org/>`_ is an alternative
Python implementation with a built-in just-in-time compiler. Experimental
support for PyPy3-v5.10+ has been added, which requires Numpy 1.14.0+,
and scipy 1.1.0+.

How do I deal with string data (or trees, graphs...)?
-----------------------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions doc/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ it as ``scikit-learn[alldeps]``. The most common use case for this is in a
application or a Docker image. This option is not intended for manual
installation from the command line.

.. note::

For installing on PyPy, PyPy3-v5.10+, Numpy 1.14.0+, and scipy 1.1.0+
are required.


For installation instructions for more distributions see
:ref:`other distributions <install_by_distribution>`.
For compiling the development version from source, or building the package
Expand Down
6 changes: 6 additions & 0 deletions doc/whats_new/v0.20.rst
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,12 @@ Miscellaneous
:issue:`9101` by :user:`alex-33 <alex-33>`
and :user:`Maskani Filali Mohamed <maskani-moh>`.

- Add almost complete PyPy 3 support. Known unsupported functionalities are
:func:`datasets.load_svmlight_file`, :class:`feature_extraction.FeatureHasher` and
:class:`feature_extraction.text.HashingVectorizer`. For running on PyPy, PyPy3-v5.10+,
Numpy 1.14.0+, and scipy 1.1.0+ are required.
:issue:`11010` by :user:`Ronan Lamy <rlamy>` and `Roman Yurchak`_.

Bug fixes
.........

Expand Down
14 changes: 11 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
# Copyright (C) 2007-2009 Cournapeau David <cournape@gmail.com>
# 2010 Fabian Pedregosa <fabian.pedregosa@inria.fr>
# License: 3-clause BSD
descr = """A set of python modules for machine learning and data mining"""

import sys
import os
import platform
import shutil
from distutils.command.clean import clean as Clean
from pkg_resources import parse_version
Expand Down Expand Up @@ -41,8 +41,12 @@

VERSION = sklearn.__version__

SCIPY_MIN_VERSION = '0.13.3'
NUMPY_MIN_VERSION = '1.8.2'
if platform.python_implementation() == 'PyPy':
SCIPY_MIN_VERSION = '1.1.0'
NUMPY_MIN_VERSION = '1.14.0'
else:
SCIPY_MIN_VERSION = '0.13.3'
NUMPY_MIN_VERSION = '1.8.2'


# Optional setuptools features
Expand Down Expand Up @@ -185,6 +189,10 @@ def setup_package():
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.6',
('Programming Language :: Python :: '
'Implementation :: CPython'),
('Programming Language :: Python :: '
'Implementation :: PyPy')
],
cmdclass=cmdclass,
install_requires=[
Expand Down
8 changes: 5 additions & 3 deletions sklearn/datasets/setup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@

import numpy
import os
import platform


def configuration(parent_package='', top_path=None):
Expand All @@ -10,9 +11,10 @@ def configuration(parent_package='', top_path=None):
config.add_data_dir('descr')
config.add_data_dir('images')
config.add_data_dir(os.path.join('tests', 'data'))
config.add_extension('_svmlight_format',
sources=['_svmlight_format.pyx'],
include_dirs=[numpy.get_include()])
if platform.python_implementation() != 'PyPy':
config.add_extension('_svmlight_format',
sources=['_svmlight_format.pyx'],
include_dirs=[numpy.get_include()])
config.add_subpackage('tests')
return config

Expand Down
13 changes: 11 additions & 2 deletions sklearn/datasets/svmlight_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,21 @@
import numpy as np
import scipy.sparse as sp

from ._svmlight_format import _load_svmlight_file
from .. import __version__
from ..externals import six
from ..externals.six import u, b
from ..externals.six.moves import range, zip
from ..utils import check_array
from ..utils import check_array, IS_PYPY

if not IS_PYPY:
from ._svmlight_format import _load_svmlight_file
else:
def _load_svmlight_file(*args, **kwargs):
raise NotImplementedError(
'load_svmlight_file is currently not '
'compatible with PyPy (see '
'https://github.com/scikit-learn/scikit-learn/issues/11543 '
'for the status updates).')


def load_svmlight_file(f, n_features=None, dtype=np.float64,
Expand Down
9 changes: 7 additions & 2 deletions sklearn/datasets/tests/test_svmlight_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from sklearn.utils.testing import assert_raises
from sklearn.utils.testing import assert_raises_regex
from sklearn.utils.testing import assert_in
from sklearn.utils.testing import fails_if_pypy
from sklearn.utils.fixes import sp_version

import sklearn
Expand All @@ -30,6 +31,8 @@
invalidfile = os.path.join(currdir, "data", "svmlight_invalid.txt")
invalidfile2 = os.path.join(currdir, "data", "svmlight_invalid_order.txt")

pytestmark = fails_if_pypy


def test_load_svmlight_file():
X, y = load_svmlight_file(datafile)
Expand Down Expand Up @@ -119,7 +122,8 @@ def test_load_compressed():
with NamedTemporaryFile(prefix="sklearn-test", suffix=".gz") as tmp:
tmp.close() # necessary under windows
with open(datafile, "rb") as f:
shutil.copyfileobj(f, gzip.open(tmp.name, "wb"))
with gzip.open(tmp.name, "wb") as fh_out:
shutil.copyfileobj(f, fh_out)
Xgz, ygz = load_svmlight_file(tmp.name)
# because we "close" it manually and write to it,
# we need to remove it manually.
Expand All @@ -130,7 +134,8 @@ def test_load_compressed():
with NamedTemporaryFile(prefix="sklearn-test", suffix=".bz2") as tmp:
tmp.close() # necessary under windows
with open(datafile, "rb") as f:
shutil.copyfileobj(f, BZ2File(tmp.name, "wb"))
with BZ2File(tmp.name, "wb") as fh_out:
shutil.copyfileobj(f, fh_out)
Xbz, ybz = load_svmlight_file(tmp.name)
# because we "close" it manually and write to it,
# we need to remove it manually.
Expand Down
8 changes: 5 additions & 3 deletions sklearn/ensemble/gradient_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,12 +1336,14 @@ def _resize_state(self):
raise ValueError('resize with smaller n_estimators %d < %d' %
(total_n_estimators, self.estimators_[0]))

self.estimators_.resize((total_n_estimators, self.loss_.K))
self.train_score_.resize(total_n_estimators)
self.estimators_ = np.resize(self.estimators_,
(total_n_estimators, self.loss_.K))
self.train_score_ = np.resize(self.train_score_, total_n_estimators)
if (self.subsample < 1 or hasattr(self, 'oob_improvement_')):
# if do oob resize arrays or create new if not available
if hasattr(self, 'oob_improvement_'):
self.oob_improvement_.resize(total_n_estimators)
self.oob_improvement_ = np.resize(self.oob_improvement_,
total_n_estimators)
else:
self.oob_improvement_ = np.zeros((total_n_estimators,),
dtype=np.float64)
Expand Down
13 changes: 11 additions & 2 deletions sklearn/feature_extraction/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,18 @@
import numpy as np
import scipy.sparse as sp

from . import _hashing
from ..utils import IS_PYPY
from ..base import BaseEstimator, TransformerMixin

if not IS_PYPY:
from ._hashing import transform as _hashing_transform
else:
def _hashing_transform(*args, **kwargs):
raise NotImplementedError(
'FeatureHasher is not compatible with PyPy (see '
'https://github.com/scikit-learn/scikit-learn/issues/11540 '
'for the status updates).')


def _iteritems(d):
"""Like d.iteritems, but accepts any collections.Mapping."""
Expand Down Expand Up @@ -155,7 +164,7 @@ def transform(self, raw_X):
elif self.input_type == "string":
raw_X = (((f, 1) for f in x) for x in raw_X)
indices, indptr, values = \
_hashing.transform(raw_X, self.n_features, self.dtype,
_hashing_transform(raw_X, self.n_features, self.dtype,
self.alternate_sign)
n_samples = indptr.shape[0] - 1

Expand Down
10 changes: 6 additions & 4 deletions sklearn/feature_extraction/setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import platform


def configuration(parent_package='', top_path=None):
Expand All @@ -10,10 +11,11 @@ def configuration(parent_package='', top_path=None):
if os.name == 'posix':
libraries.append('m')

config.add_extension('_hashing',
sources=['_hashing.pyx'],
include_dirs=[numpy.get_include()],
libraries=libraries)
if platform.python_implementation() != 'PyPy':
config.add_extension('_hashing',
sources=['_hashing.pyx'],
include_dirs=[numpy.get_include()],
libraries=libraries)
config.add_subpackage("tests")

return config
4 changes: 3 additions & 1 deletion sklearn/feature_extraction/tests/test_feature_hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@

from sklearn.feature_extraction import FeatureHasher
from sklearn.utils.testing import (assert_raises, assert_true, assert_equal,
ignore_warnings)
ignore_warnings, fails_if_pypy)

pytestmark = fails_if_pypy


def test_feature_hasher_dicts():
Expand Down
Loading

0 comments on commit 5592a2e

Please sign in to comment.