diff --git a/setup.py b/setup.py index 66304fdab8702..22528c47b7073 100755 --- a/setup.py +++ b/setup.py @@ -23,7 +23,8 @@ DISTNAME = 'scikit-learn' DESCRIPTION = 'A set of python modules for machine learning and data mining' -LONG_DESCRIPTION = open('README.rst').read() +with open('README.rst') as f: + LONG_DESCRIPTION = f.read() MAINTAINER = 'Andreas Mueller' MAINTAINER_EMAIL = 'amueller@ais.uni-bonn.de' URL = 'http://scikit-learn.org' diff --git a/sklearn/datasets/base.py b/sklearn/datasets/base.py index 844129092a446..358a872d94730 100644 --- a/sklearn/datasets/base.py +++ b/sklearn/datasets/base.py @@ -183,7 +183,10 @@ def load_files(container_path, description=None, categories=None, target = target[indices] if load_content: - data = [open(filename, 'rb').read() for filename in filenames] + data = [] + for filename in filenames: + with open(filename, 'rb') as f: + data.append(f.read()) if encoding is not None: data = [d.decode(encoding, decode_error) for d in data] return Bunch(data=data, @@ -301,7 +304,8 @@ def load_digits(n_class=10): module_path = dirname(__file__) data = np.loadtxt(join(module_path, 'data', 'digits.csv.gz'), delimiter=',') - descr = open(join(module_path, 'descr', 'digits.rst')).read() + with open(join(module_path, 'descr', 'digits.rst')) as f: + descr = f.read() target = data[:, -1] flat_data = data[:, :-1] images = flat_data.view() @@ -402,26 +406,31 @@ def load_boston(): (506, 13) """ module_path = dirname(__file__) - data_file = csv.reader(open(join(module_path, 'data', - 'boston_house_prices.csv'))) - fdescr = open(join(module_path, 'descr', 'boston_house_prices.rst')) - temp = next(data_file) - n_samples = int(temp[0]) - n_features = int(temp[1]) - data = np.empty((n_samples, n_features)) - target = np.empty((n_samples,)) - temp = next(data_file) # names of features - feature_names = np.array(temp) - - for i, d in enumerate(data_file): - data[i] = np.asarray(d[:-1], dtype=np.float) - target[i] = np.asarray(d[-1], dtype=np.float) + + fdescr_name = join(module_path, 'descr', 'boston_house_prices.rst') + with open(fdescr_name) as f: + descr_text = f.read() + + data_file_name = join(module_path, 'data', 'boston_house_prices.csv') + with open(data_file_name) as f: + data_file = csv.reader(f) + temp = next(data_file) + n_samples = int(temp[0]) + n_features = int(temp[1]) + data = np.empty((n_samples, n_features)) + target = np.empty((n_samples,)) + temp = next(data_file) # names of features + feature_names = np.array(temp) + + for i, d in enumerate(data_file): + data[i] = np.asarray(d[:-1], dtype=np.float) + target[i] = np.asarray(d[-1], dtype=np.float) return Bunch(data=data, target=target, # last column is target value feature_names=feature_names[:-1], - DESCR=fdescr.read()) + DESCR=descr_text) def load_sample_images(): diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py index 7e71d261d206a..5dc0b5d8cb845 100644 --- a/sklearn/datasets/tests/test_svmlight_format.py +++ b/sklearn/datasets/tests/test_svmlight_format.py @@ -113,17 +113,19 @@ def test_load_compressed(): with NamedTemporaryFile(prefix="sklearn-test", suffix=".gz") as tmp: tmp.close() # necessary under windows - shutil.copyfileobj(open(datafile, "rb"), gzip.open(tmp.name, "wb")) - Xgz, ygz = load_svmlight_file(tmp.name) - assert_array_equal(X.toarray(), Xgz.toarray()) - assert_array_equal(y, ygz) + with open(datafile, "rb") as f: + shutil.copyfileobj(f, gzip.open(tmp.name, "wb")) + Xgz, ygz = load_svmlight_file(tmp.name) + assert_array_equal(X.toarray(), Xgz.toarray()) + assert_array_equal(y, ygz) with NamedTemporaryFile(prefix="sklearn-test", suffix=".bz2") as tmp: tmp.close() # necessary under windows - shutil.copyfileobj(open(datafile, "rb"), BZ2File(tmp.name, "wb")) - Xbz, ybz = load_svmlight_file(tmp.name) - assert_array_equal(X.toarray(), Xbz.toarray()) - assert_array_equal(y, ybz) + with open(datafile, "rb") as f: + shutil.copyfileobj(f, BZ2File(tmp.name, "wb")) + Xbz, ybz = load_svmlight_file(tmp.name) + assert_array_equal(X.toarray(), Xbz.toarray()) + assert_array_equal(y, ybz) @raises(ValueError) diff --git a/sklearn/datasets/twenty_newsgroups.py b/sklearn/datasets/twenty_newsgroups.py index 9e2d84fdc4e48..c325aa9c5204a 100644 --- a/sklearn/datasets/twenty_newsgroups.py +++ b/sklearn/datasets/twenty_newsgroups.py @@ -201,7 +201,8 @@ def fetch_20newsgroups(data_home=None, subset='train', categories=None, cache = None if os.path.exists(cache_path): try: - compressed_content = open(cache_path, 'rb').read() + with open(cache_path, 'rb') as f: + compressed_content = f.read() uncompressed_content = codecs.decode( compressed_content, 'zlib_codec') cache = pickle.loads(uncompressed_content) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index 632c28dcc5bee..53ca77244dda5 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -189,7 +189,8 @@ def test_configure(): # Blas/Atlas development headers warnings.simplefilter('ignore', UserWarning) if PY3: - exec(open('setup.py').read(), dict(__name__='__main__')) + with open('setup.py') as f: + exec(f.read(), dict(__name__='__main__')) else: execfile('setup.py', dict(__name__='__main__')) finally: