Skip to content

Commit

Permalink
Major refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
daviddiazvico committed Nov 19, 2018
1 parent d27b666 commit 7d4962a
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 46 deletions.
10 changes: 4 additions & 6 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,22 @@ language: python
matrix:
include:
- python: 3.6
- python: 3.7
branches:
only:
- master
install:
- python setup.py install
- conda install -y sphinx
- pip install recommonmark sphinxcontrib-napoleon travis-sphinx
- pip install recommonmark sphinx sphinxcontrib-napoleon travis-sphinx
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
- chmod +x ./cc-test-reporter
- ./cc-test-reporter before-build
script:
- python setup.py test
- mkdir docs
- export PYTHONPATH=`pwd`
- sphinx-quickstart -q -p scikit-datasets -a "David Diaz Vico" -v 0.1 -r 0.1.17 -l en --ext-autodoc --ext-viewcode --ext-githubpages --extensions sphinxcontrib.napoleon --no-makefile --no-batchfile docs
- sphinx-quickstart -q -p scikit-datasets -a "David Diaz Vico" -v 0.1 -r 0.1.18 -l en --ext-autodoc --ext-viewcode --ext-githubpages --extensions sphinxcontrib.napoleon --no-makefile --no-batchfile docs
- sphinx-apidoc -o docs/_static/ skdatasets -F -a -l
- travis-sphinx -v build -s docs -n
after_success:
- if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$TRAVIS_PYTHON_VERSION" == "3.7" ]]; then ./cc-test-reporter after-build --debug --coverage-input-type=coverage.py --exit-code $TRAVIS_TEST_RESULT; fi
- travis-sphinx deploy
- if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$TRAVIS_PYTHON_VERSION" == "3.6" ]]; then ./cc-test-reporter after-build --debug --coverage-input-type=coverage.py --exit-code $TRAVIS_TEST_RESULT; fi
- travis-sphinx deploy
23 changes: 12 additions & 11 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,31 @@
import sys
"""
@author: David Diaz Vico
@license: MIT
"""

from setuptools import find_packages, setup

needs_pytest = {'pytest', 'test', 'ptr'}.intersection(sys.argv)
pytest_runner = ['pytest-runner'] if needs_pytest else []
setup(name='scikit-datasets',
packages=find_packages(),
version='0.1.17',
version='0.1.18',
description='Scikit-learn-compatible datasets',
long_description=open('README.md', 'r').read(),
author='David Diaz Vico',
author_email='david.diaz.vico@outlook.com',
url='https://github.com/daviddiazvico/scikit-datasets',
download_url='https://github.com/daviddiazvico/scikit-datasets/archive/v0.1.17.tar.gz',
download_url='https://github.com/daviddiazvico/scikit-datasets/archive/v0.1.18.tar.gz',
keywords=['scikit-learn'],
classifiers=['Intended Audience :: Science/Research',
'Topic :: Scientific/Engineering',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7'],
'Programming Language :: Python :: 3.6'],
install_requires=['scikit-learn'],
extras_require={'cran': ['rdata'],
'forex': ['forex_python'],
'keel': ['pandas'],
'keras': ['keras']},
setup_requires=pytest_runner,
tests_require=['pytest-cov'],
test_suite='tests',
)
setup_requires=['pytest-runner'],
tests_require=['coverage', 'forex_python', 'keras', 'pandas', 'pytest',
'pytest-cov', 'rdata', 'tensorflow'],
test_suite='tests')
5 changes: 3 additions & 2 deletions skdatasets/keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from keras.datasets import (boston_housing, cifar10, cifar100, fashion_mnist,
imdb, mnist, reuters)
from sklearn.datasets.base import Bunch
from sklearn.model_selection import check_cv


DATASETS = {'boston_housing': boston_housing.load_data,
Expand Down Expand Up @@ -42,5 +43,5 @@ def fetch_keras(name, **kwargs):
n_features = np.prod(X.shape[1:])
X = X.reshape([X.shape[0], n_features]) / X_max
X_test = X_test.reshape([X_test.shape[0], n_features]) / X_max
return Bunch(data=X, target=y, data_test=X_test, target_test=y_test,
DESCR=name)
cv = check_cv(cv=(X, X_test), y=(y, y_test))
return Bunch(data=X, target=y, outer_cv=cv, DESCR=name)
27 changes: 15 additions & 12 deletions skdatasets/libsvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import scipy as sp
from sklearn.datasets import load_svmlight_file, load_svmlight_files
from sklearn.datasets.base import Bunch, get_data_home
from sklearn.model_selection import PredefinedSplit
from sklearn.model_selection import check_cv, PredefinedSplit
from urllib.request import urlretrieve


Expand Down Expand Up @@ -57,30 +57,33 @@ def _load(collection, name, dirname=None):
filename_tr,
filename_val,
filename_t])
cv = PredefinedSplit([item for sublist in [[-1] * X_tr.shape[0], [0] * X_val.shape[0]] for item in sublist])
X = sp.sparse.vstack((X_tr, X_val))
y = np.hstack((y_tr, y_val))
inner_cv = PredefinedSplit([item for sublist in [[-1] * X_tr.shape[0], [0] * X_val.shape[0]] for item in sublist])
outer_cv = check_cv(cv=(X, X_test), y=(y, y_test))
X_remaining = y_remaining = None
elif (filename_tr is not None) and (filename_val is not None):
_, _, X_tr, y_tr, X_val, y_val = load_svmlight_files([filename,
filename_tr,
filename_val])
cv = PredefinedSplit([item for sublist in [[-1] * X_tr.shape[0], [0] * X_val.shape[0]] for item in sublist])
X = sp.sparse.vstack((X_tr, X_val))
y = np.hstack((y_tr, y_val))
X_test = y_test = X_remaining = y_remaining = None
inner_cv = PredefinedSplit([item for sublist in [[-1] * X_tr.shape[0], [0] * X_val.shape[0]] for item in sublist])
outer_cv = X_remaining = y_remaining = None
elif (filename_t is not None) and (filename_r is not None):
X, y, X_test, y_test, X_remaining, y_remaining = load_svmlight_files([filename,
filename_t,
filename_r])
cv = None
inner_cv = None
outer_cv = check_cv(cv=(X, X_test), y=(y, y_test))
elif filename_t is not None:
X, y, X_test, y_test = load_svmlight_files([filename, filename_t])
X_remaining = y_remaining = cv = None
inner_cv = X_remaining = y_remaining = None
outer_cv = check_cv(cv=(X, X_test), y=(y, y_test))
else:
X, y = load_svmlight_file(filename)
X_test = y_test = X_remaining = y_remaining = cv = None
return X, y, X_test, y_test, X_remaining, y_remaining, cv
inner_cv = outer_cv = X_remaining = y_remaining = None
return X, y, inner_cv, outer_cv, X_remaining, y_remaining


def fetch_libsvm(collection, name, data_home=None):
Expand Down Expand Up @@ -111,10 +114,10 @@ def fetch_libsvm(collection, name, data_home=None):
collection, name.replace('/', '-'))
if not os.path.exists(dirname):
os.makedirs(dirname)
X, y, X_test, y_test, X_remaining, y_remaining, cv = _load(collection, name,
X, y, inner_cv, outer_cv, X_remaining, y_remaining = _load(collection, name,
dirname=dirname)
data = Bunch(data=X, target=y, data_test=X_test, target_test=y_test,
inner_cv=cv, data_remaining=X_remaining,
target_remaining=y_remaining, DESCR=name)
data = Bunch(data=X, target=y, inner_cv=inner_cv, outer_cv=outer_cv,
data_remaining=X_remaining, target_remaining=y_remaining,
DESCR=name)
data = Bunch(**{k: v for k, v in data.items() if v is not None})
return data
11 changes: 6 additions & 5 deletions skdatasets/uci.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
import os
from sklearn.datasets.base import Bunch, get_data_home
from sklearn.model_selection import check_cv
from urllib.request import urlretrieve


Expand Down Expand Up @@ -38,8 +39,9 @@ def _fetch(name, dirname=None):
filename)
urlretrieve(url, filename=filename)
X_test, y_test = _load_csv(filename)
cv = check_cv(cv=(X, X_test), y=(y, y_test))
except:
X_test = y_test = None
cv = None
try:
filename = name + '.names'
url = BASE_URL + '/' + name + '/' + filename
Expand All @@ -54,7 +56,7 @@ def _fetch(name, dirname=None):
urlretrieve(url, filename=filename)
with open(filename) as rst_file:
fdescr = rst_file.read()
return X, y, X_test, y_test, fdescr
return X, y, cv, fdescr


def fetch_uci(name, data_home=None):
Expand All @@ -80,8 +82,7 @@ def fetch_uci(name, data_home=None):
dirname = os.path.join(get_data_home(data_home=data_home), 'uci', name)
if not os.path.exists(dirname):
os.makedirs(dirname)
X, y, X_test, y_test, DESCR = _fetch(name, dirname=dirname)
data = Bunch(data=X, target=y, data_test=X_test, target_test=y_test,
DESCR=DESCR)
X, y, cv, DESCR = _fetch(name, dirname=dirname)
data = Bunch(data=X, target=y, outer_cv=cv, DESCR=DESCR)
data = Bunch(**{k: v for k, v in data.items() if v is not None})
return data
8 changes: 4 additions & 4 deletions tests/test_keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
from skdatasets.keras import fetch_keras


def check(data, shape, test_shape):
def check(data, shape, splits=1):
"""Check dataset properties."""
assert data.data.shape == shape
assert data.target.shape[0] == shape[0]
assert data.data_test.shape == test_shape
assert data.target_test.shape[0] == test_shape[0]
if splits > 1:
assert len(list(data.outer_cv.split())) == splits


def test_keras_mnist():
"""Tests keras MNIST dataset."""
data = fetch_keras('mnist')
check(data, (60000, 28*28), (10000, 28*28))
check(data, (60000, 28*28), 1)
11 changes: 5 additions & 6 deletions tests/test_libsvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,12 @@
from skdatasets.libsvm import fetch_libsvm


def check(data, shape, test_shape=None):
def check(data, shape, splits=1):
"""Check dataset properties."""
assert data.data.shape == shape
assert data.target.shape[0] == shape[0]
if test_shape is not None:
assert data.data_test.shape == test_shape
assert data.target_test.shape[0] == test_shape[0]
if splits > 1:
assert len(list(data.outer_cv.split())) == splits
if hasattr(data, 'inner_cv'):
assert isinstance(data.inner_cv, BaseCrossValidator)

Expand All @@ -30,7 +29,7 @@ def test_fetch_libsvm_australian():
def test_fetch_libsvm_liver_disorders():
"""Tests LIBSVM liver-disorders dataset."""
data = fetch_libsvm(collection='binary', name='liver-disorders')
check(data, (290, 5), test_shape=(145, 5))
check(data, (290, 5), 1)


def test_fetch_libsvm_duke():
Expand All @@ -48,4 +47,4 @@ def test_fetch_libsvm_cod_rna():
def test_fetch_libsvm_satimage():
"""Tests LIBSVM satimage dataset."""
data = fetch_libsvm(collection='multiclass', name='satimage.scale')
check(data, (8870, 36), test_shape=(4435, 36))
check(data, (8870, 36), 1)

0 comments on commit 7d4962a

Please sign in to comment.