From 44986f27b9cfe2231c23aa610608ea7c1602a01c Mon Sep 17 00:00:00 2001 From: Vladimir Solomatin Date: Tue, 23 Jul 2019 20:19:32 +0300 Subject: [PATCH] Initial version --- .drone.yml | 28 ++++++++++++ .gitignore | 66 ++++++++++++++++++++++++++++ LICENSE | 116 +++++++++++++++++++++++++++++++++++++++++++++++++ MANIFEST.in | 1 + README.rst | 39 +++++++++++++++++ gzip_stream.py | 87 +++++++++++++++++++++++++++++++++++++ setup.py | 45 +++++++++++++++++++ tests.py | 22 ++++++++++ tox.ini | 41 +++++++++++++++++ 9 files changed, 445 insertions(+) create mode 100644 .drone.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 MANIFEST.in create mode 100644 README.rst create mode 100644 gzip_stream.py create mode 100644 setup.py create mode 100644 tests.py create mode 100644 tox.ini diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..4019e8b --- /dev/null +++ b/.drone.yml @@ -0,0 +1,28 @@ +kind: pipeline +name: default + +steps: + - name: linter + image: snakepacker/python:all + commands: + - tox -e lint + + - name: python 3.5 + image: snakepacker/python:all + commands: + - tox -e py35 + + - name: python 3.6 + image: snakepacker/python:all + commands: + - tox -e py36 + + - name: python 3.7 + image: snakepacker/python:all + commands: + - tox -e py37 + + - name: python 3.8 + image: snakepacker/python:all + commands: + - tox -e py38 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f1ec99e --- /dev/null +++ b/.gitignore @@ -0,0 +1,66 @@ +# Created by .ignore support plugin (hsz.mobi) +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.pytest_cache/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# mypy +.mypy_cache/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..670154e --- /dev/null +++ b/LICENSE @@ -0,0 +1,116 @@ +CC0 1.0 Universal + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator and +subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for the +purpose of contributing to a commons of creative, cultural and scientific +works ("Commons") that the public can reliably and without fear of later +claims of infringement build upon, modify, incorporate in other works, reuse +and redistribute as freely as possible in any form whatsoever and for any +purposes, including without limitation commercial purposes. These owners may +contribute to the Commons to promote the ideal of a free culture and the +further production of creative, cultural and scientific works, or to gain +reputation or greater distribution for their Work in part through the use and +efforts of others. + +For these and/or other purposes and motivations, and without any expectation +of additional consideration or compensation, the person associating CC0 with a +Work (the "Affirmer"), to the extent that he or she is an owner of Copyright +and Related Rights in the Work, voluntarily elects to apply CC0 to the Work +and publicly distribute the Work under its terms, with knowledge of his or her +Copyright and Related Rights in the Work and the meaning and intended legal +effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not limited +to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, communicate, + and translate a Work; + + ii. moral rights retained by the original author(s) and/or performer(s); + + iii. publicity and privacy rights pertaining to a person's image or likeness + depicted in a Work; + + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + + v. rights protecting the extraction, dissemination, use and reuse of data in + a Work; + + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation thereof, + including any amended or successor version of such directive); and + + vii. other similar, equivalent or corresponding rights throughout the world + based on applicable law or treaty, and any national implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention of, +applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and +unconditionally waives, abandons, and surrenders all of Affirmer's Copyright +and Related Rights and associated claims and causes of action, whether now +known or unknown (including existing as well as future claims and causes of +action), in the Work (i) in all territories worldwide, (ii) for the maximum +duration provided by applicable law or treaty (including future time +extensions), (iii) in any current or future medium and for any number of +copies, and (iv) for any purpose whatsoever, including without limitation +commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes +the Waiver for the benefit of each member of the public at large and to the +detriment of Affirmer's heirs and successors, fully intending that such Waiver +shall not be subject to revocation, rescission, cancellation, termination, or +any other legal or equitable action to disrupt the quiet enjoyment of the Work +by the public as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason be +judged legally invalid or ineffective under applicable law, then the Waiver +shall be preserved to the maximum extent permitted taking into account +Affirmer's express Statement of Purpose. In addition, to the extent the Waiver +is so judged Affirmer hereby grants to each affected person a royalty-free, +non transferable, non sublicensable, non exclusive, irrevocable and +unconditional license to exercise Affirmer's Copyright and Related Rights in +the Work (i) in all territories worldwide, (ii) for the maximum duration +provided by applicable law or treaty (including future time extensions), (iii) +in any current or future medium and for any number of copies, and (iv) for any +purpose whatsoever, including without limitation commercial, advertising or +promotional purposes (the "License"). The License shall be deemed effective as +of the date CC0 was applied by Affirmer to the Work. Should any part of the +License for any reason be judged legally invalid or ineffective under +applicable law, such partial invalidity or ineffectiveness shall not +invalidate the remainder of the License, and in such case Affirmer hereby +affirms that he or she will not (i) exercise any of his or her remaining +Copyright and Related Rights in the Work or (ii) assert any associated claims +and causes of action with respect to the Work, in either case contrary to +Affirmer's express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + + b. Affirmer offers the Work as-is and makes no representations or warranties + of any kind concerning the Work, express, implied, statutory or otherwise, + including without limitation warranties of title, merchantability, fitness + for a particular purpose, non infringement, or the absence of latent or + other defects, accuracy, or the present or absence of errors, whether or not + discoverable, all to the greatest extent permissible under applicable law. + + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without limitation + any person's Copyright and Related Rights in the Work. Further, Affirmer + disclaims responsibility for obtaining any necessary consents, permissions + or other rights required for any use of the Work. + + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to this + CC0 or use of the Work. + +For more information, please see + diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..42a3c62 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +exclude tests.py diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..c78a7a0 --- /dev/null +++ b/README.rst @@ -0,0 +1,39 @@ +=========== +gzip-stream +=========== + +`gzip-stream` is a super-tiny library that will help you compress by GZIP +on-the-fly. + +`GZIPCompressedStream` class instance acting like an any other stream (in fact, +`GZIPCompressedStream` inherits `io.RawIOBase `_), +but wraps another stream and compress it on-the-fly. + +.. code-block:: python + + from gzip_stream import GZIPCompressedStream + from my_upload_lib import MyUploadClient + + upload_client = MyUploadClient() + with open('my_very_big_1tb_file.txt') as file_to_upload: + compressed_stream = GZIPCompressedStream( + file_to_upload, + compression_level=7 + ) + upload_client.upload_fileobj(compressed_stream) + +`GZIPCompressedStream` does not read entire stream, but instead read it +by chunks, until compressed output size will not satisfy read size. + +Module works on Python ~= 3.5. + +Installation +------------ +.. code-block:: bash + + pip install gzip-stream + + +License +------- +Public Domain: `CC0 1.0 Universal `_. diff --git a/gzip_stream.py b/gzip_stream.py new file mode 100644 index 0000000..15992f2 --- /dev/null +++ b/gzip_stream.py @@ -0,0 +1,87 @@ +import gzip +import io +from typing import BinaryIO + + +class GZIPCompressedStream(io.RawIOBase): + def __init__(self, stream: BinaryIO, *, compression_level: int): + assert 1 <= compression_level <= 9 + + self._compression_level = compression_level + self._stream = stream + + self._compressed_stream = io.BytesIO() + self._compressor = gzip.GzipFile( + mode='wb', + fileobj=self._compressed_stream, + compresslevel=compression_level + ) + + # because of the GZIP header written by `GzipFile.__init__`: + self._compressed_stream.seek(0) + + @property + def compression_level(self) -> int: + return self._compression_level + + @property + def stream(self) -> BinaryIO: + return self._stream + + def readable(self) -> bool: + return True + + def _read_compressed_into(self, b: memoryview) -> int: + buf = self._compressed_stream.read(len(b)) + b[:len(buf)] = buf + return len(buf) + + def readinto(self, b: bytearray) -> int: + b = memoryview(b) + + offset = 0 + size = len(b) + while offset < size: + offset += self._read_compressed_into(b[offset:]) + if offset < size: + # self._compressed_buffer now empty + if self._compressor.closed: + # nothing to compress anymore + break + # compress next bytes + self._read_n_compress(size) + + return offset + + def _read_n_compress(self, size: int): + assert size > 0 + + data = self._stream.read(size) + + # rewind buffer to the start to free up memory + # (because anything currently in the buffer should be already + # streamed off the object) + self._compressed_stream.seek(0) + self._compressed_stream.truncate(0) + + if data: + self._compressor.write(data) + else: + # this will write final data (will flush zlib with Z_FINISH) + self._compressor.close() + + # rewind to the buffer start + self._compressed_stream.seek(0) + + def __repr__(self) -> str: + return ( + '{self.__class__.__name__}(' + '{self.stream!r}, ' + 'compression_level={self.compression_level!r}' + ')' + ).format(self=self) + + +__all__ = ( + 'GZIPCompressedStream', +) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..5b9e232 --- /dev/null +++ b/setup.py @@ -0,0 +1,45 @@ +from setuptools import setup + + +setup( + name='gzip-stream', + version='1.0.0', + + py_modules=['gzip_stream'], + provides=['gzip_stream'], + + description='Compress stream by GZIP on the fly.', + long_description=open('README.rst').read(), + keywords=['gzip', 'compression'], + + url='https://github.com/leenr/gzip-stream', + author='leenr', + author_email='i@leenr.ru', + maintainer='leenr', + maintainer_email='i@leenr.ru', + + platforms=['posix'], + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication', + 'Operating System :: POSIX', + 'Operating System :: MacOS :: MacOS X', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Topic :: Software Development :: Libraries' + ], + + python_requires="~=3.5", + extras_require={ + 'develop': [ + 'pytest~=5.0', + 'pytest-cov~=2.7', + 'pylama~=7.7', + 'faker~=2.0' + ] + } +) diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..4b181ba --- /dev/null +++ b/tests.py @@ -0,0 +1,22 @@ +from io import BytesIO +from gzip import decompress + +import pytest +from faker import Faker + +from gzip_stream import GZIPCompressedStream + + +@pytest.mark.parametrize( + 'data', [ + b'', b't', b'test', + Faker().text(4 * 1024).encode(), + Faker().text(256 * 1024).encode() + ], + ids=['0 bytes', '1 bytes', '4 bytes', + 'fake text - ~4 KB', 'fake text - ~256 KB'] +) +def test_basic(data): + input_stream = BytesIO(data) + output_stream = GZIPCompressedStream(input_stream, compression_level=5) + assert decompress(output_stream.read()) == data diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..d22232c --- /dev/null +++ b/tox.ini @@ -0,0 +1,41 @@ +[tox] +envlist = lint,py3{5,6,7,8} + + +[testenv] +deps = + pytest + pytest-cov +commands = + pytest \ + tests.py + +[testenv:lint] +deps = + pylama + +commands = + pylama --options tox.ini \ + gzip_stream.py tests.py setup.py + + +[pytest] +addopts = --cov gzip_stream + --cov-config tox.ini + --verbose + + +[pylama] +linters = mccabe,pycodestyle,pyflakes + +[pylama:pycodestyle] +max_line_length = 80 +show-pep8 = True +show-source = True + + +[coverage:run] +branch = True + +[coverage:report] +show_missing = True