diff --git a/doc/source/io.rst b/doc/source/io.rst index ae04996b4fddf..4199f161501ec 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -4537,7 +4537,7 @@ See the documentation for `pyarrow `__ and .. note:: These engines are very similar and should read/write nearly identical parquet format files. - Currently ``pyarrow`` does not support timedelta data, and ``fastparquet`` does not support timezone aware datetimes (they are coerced to UTC). + Currently ``pyarrow`` does not support timedelta data, ``fastparquet>=0.1.4`` supports timezone aware datetimes. These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library). .. ipython:: python diff --git a/pandas/core/internals.py b/pandas/core/internals.py index c2d3d0852384c..ec884035fe0c4 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -224,12 +224,17 @@ def make_block_scalar(self, values): """ return ScalarBlock(values) - def make_block_same_class(self, values, placement=None, ndim=None): + def make_block_same_class(self, values, placement=None, ndim=None, + dtype=None): """ Wrap given values in a block of same type as self. """ + if dtype is not None: + # issue 19431 fastparquet is passing this + warnings.warn("dtype argument is deprecated, will be removed " + "in a future release.", FutureWarning) if placement is None: placement = self.mgr_locs return make_block(values, placement=placement, ndim=ndim, - klass=self.__class__) + klass=self.__class__, dtype=dtype) def __unicode__(self): diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 57884e9816ed3..f17306b8b52f9 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -285,6 +285,13 @@ def test_delete(self): with pytest.raises(Exception): newb.delete(3) + def test_make_block_same_class(self): + # issue 19431 + block = create_block('M8[ns, US/Eastern]', [3]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + block.make_block_same_class(block.values, dtype=block.values.dtype) + class TestDatetimeBlock(object): diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 6c172c80514e7..11cbea8ce6331 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -71,6 +71,15 @@ def fp(): return 'fastparquet' +@pytest.fixture +def fp_lt_014(): + if not _HAVE_FASTPARQUET: + pytest.skip("fastparquet is not installed") + if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): + pytest.skip("fastparquet is >= 0.1.4") + return 'fastparquet' + + @pytest.fixture def df_compat(): return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'}) @@ -435,8 +444,10 @@ def test_basic(self, fp, df_full): df = df_full # additional supported types for fastparquet + if LooseVersion(fastparquet.__version__) >= LooseVersion('0.1.4'): + df['datetime_tz'] = pd.date_range('20130101', periods=3, + tz='US/Eastern') df['timedelta'] = pd.timedelta_range('1 day', periods=3) - check_round_trip(df, fp) @pytest.mark.skip(reason="not supported") @@ -468,14 +479,15 @@ def test_categorical(self, fp): df = pd.DataFrame({'a': pd.Categorical(list('abc'))}) check_round_trip(df, fp) - def test_datetime_tz(self, fp): - # doesn't preserve tz + def test_datetime_tz(self, fp_lt_014): + + # fastparquet<0.1.4 doesn't preserve tz df = pd.DataFrame({'a': pd.date_range('20130101', periods=3, tz='US/Eastern')}) - # warns on the coercion with catch_warnings(record=True): - check_round_trip(df, fp, expected=df.astype('datetime64[ns]')) + check_round_trip(df, fp_lt_014, + expected=df.astype('datetime64[ns]')) def test_filter_row_groups(self, fp): d = {'a': list(range(0, 3))}