API: Raise FileNotFoundError for nonexistent files (#14116)

For a nonexistent file, raise the more specific FileNotFoundError for Python >= 3.3 in read_csv, read_table, and read_hdf. This error is backported to Python 2.x as IOError. Closes gh-14086.
pandas-dev · Sep 1, 2016 · 58199c5 · 58199c5
1 parent 306e647
commit 58199c5
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 6 deletions.
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -455,6 +455,7 @@ API changes
 - ``Timestamp.to_pydatetime`` will issue a ``UserWarning`` when ``warn=True``, and the instance has a non-zero number of nanoseconds (:issue:`14101`)
 - ``Panel.to_sparse`` will raise a ``NotImplementedError`` exception when called (:issue:`13778`)
 - ``Index.reshape`` will raise a ``NotImplementedError`` exception when called (:issue:`12882`)
+- ``pd.read_csv()``, ``pd.read_table()``, and ``pd.read_hdf()`` raise the builtin ``FileNotFoundError`` exception for Python 3.x when called on a nonexistent file, and this is back-ported as IOError in Python 2.x (:issue:`14086`)
 - Non-convertible dates in an excel date column will be returned without conversion and the column will be ``object`` dtype, rather than raising an exception  (:issue:`10001`)
 - ``eval``'s upcasting rules for ``float32`` types have been updated to be more consistent with NumPy's rules.  New behavior will not upcast to ``float64`` if you multiply a pandas ``float32`` object by a scalar float64. (:issue:`12388`)
 - An ``UnsupportedFunctionCall`` error is now raised if NumPy ufuncs like ``np.mean`` are called on groupby or resample objects (:issue:`12811`)

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -107,6 +107,10 @@ def signature(f):
     long = int
     unichr = chr
 
+    # This was introduced in Python 3.3, but we don't support
+    # Python 3.x < 3.4, so checking PY3 is safe.
+    FileNotFoundError = FileNotFoundError
+
     # list-producing versions of the major Python iterating functions
     def lrange(*args, **kwargs):
         return list(range(*args, **kwargs))
@@ -125,6 +129,8 @@ def lfilter(*args, **kwargs):
     import re
     _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$")
 
+    FileNotFoundError = IOError
+
     def isidentifier(s, dotted=False):
         return bool(_name_re.match(s))
 

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -322,7 +322,8 @@ def read_hdf(path_or_buf, key=None, **kwargs):
             exists = False
 
         if not exists:
-            raise IOError('File %s does not exist' % path_or_buf)
+            raise compat.FileNotFoundError(
+                'File %s does not exist' % path_or_buf)
 
         # can't auto open/close if we are using an iterator
         # so delegate to the iterator

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
@@ -654,9 +654,10 @@ def test_file(self):
         tm.assert_frame_equal(url_table, local_table)
 
     def test_nonexistent_path(self):
-        # don't segfault pls #2428
+        # gh-2428: pls no segfault
+        # gh-14086: raise more helpful FileNotFoundError
         path = '%s.csv' % tm.rands(10)
-        self.assertRaises(IOError, self.read_csv, path)
+        self.assertRaises(compat.FileNotFoundError, self.read_csv, path)
 
     def test_missing_trailing_delimiters(self):
         data = """A,B,C,D

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -339,7 +339,8 @@ def test_api(self):
 
         # File path doesn't exist
         path = ""
-        self.assertRaises(IOError, read_hdf, path, 'df')
+        self.assertRaises(compat.FileNotFoundError,
+                          read_hdf, path, 'df')
 
     def test_api_default_format(self):
 

diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -34,6 +34,7 @@ import numpy as np
 cimport util
 
 import pandas.lib as lib
+import pandas.compat as compat
 from pandas.types.common import (is_categorical_dtype, CategoricalDtype,
                                  is_integer_dtype, is_float_dtype,
                                  is_bool_dtype, is_object_dtype,
@@ -631,7 +632,6 @@ cdef class TextReader:
                     raise ValueError('Multiple files found in compressed '
                                      'zip file %s', str(zip_names))
             elif self.compression == 'xz':
-                from pandas import compat
                 lzma = compat.import_lzma()
 
                 if isinstance(source, basestring):
@@ -663,7 +663,7 @@ cdef class TextReader:
 
             if ptr == NULL:
                 if not os.path.exists(source):
-                    raise IOError('File %s does not exist' % source)
+                    raise compat.FileNotFoundError('File %s does not exist' % source)
                 raise IOError('Initializing from file failed')
 
             self.parser.source = ptr