Skip to content

Commit

Permalink
TST: Avoid time dependency in GCS zip test (#44552)
Browse files Browse the repository at this point in the history
* TST: Avoid time dependency in GCS zip test

* Use context manager
  • Loading branch information
mroeschke authored Nov 22, 2021
1 parent 3a25cb9 commit 23176c0
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions pandas/tests/io/test_gcs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from io import BytesIO
import os
import zipfile

import numpy as np
import pytest
Expand Down Expand Up @@ -88,16 +89,23 @@ def test_to_read_gcs(gcs_buffer, format):
tm.assert_frame_equal(df1, df2)


def assert_equal_zip_safe(result: bytes, expected: bytes):
def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str):
"""
We would like to assert these are equal, but the 10th and 11th bytes are a
last-modified timestamp, which in some builds is off-by-one, so we check around
that.
For zip compression, only compare the CRC-32 checksum of the file contents
to avoid checking the time-dependent last-modified timestamp which
in some CI builds is off-by-one
See https://en.wikipedia.org/wiki/ZIP_(file_format)#File_headers
"""
assert result[:9] == expected[:9]
assert result[11:] == expected[11:]
if compression == "zip":
# Only compare the CRC checksum of the file contents
with zipfile.ZipFile(BytesIO(result)) as exp, zipfile.ZipFile(
BytesIO(expected)
) as res:
for res_info, exp_info in zip(res.infolist(), exp.infolist()):
assert res_info.CRC == exp_info.CRC
else:
assert result == expected


@td.skip_if_no("gcsfs")
Expand Down Expand Up @@ -126,7 +134,7 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding)
df.to_csv(path_gcs, compression=compression, encoding=encoding)
res = gcs_buffer.getvalue()
expected = buffer.getvalue()
assert_equal_zip_safe(res, expected)
assert_equal_zip_safe(res, expected, compression_only)

read_df = read_csv(
path_gcs, index_col=0, compression=compression_only, encoding=encoding
Expand All @@ -142,7 +150,7 @@ def test_to_csv_compression_encoding_gcs(gcs_buffer, compression_only, encoding)

res = gcs_buffer.getvalue()
expected = buffer.getvalue()
assert_equal_zip_safe(res, expected)
assert_equal_zip_safe(res, expected, compression_only)

read_df = read_csv(path_gcs, index_col=0, compression="infer", encoding=encoding)
tm.assert_frame_equal(df, read_df)
Expand Down

0 comments on commit 23176c0

Please sign in to comment.