diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..49acf47 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,2 @@ +[DESIGN] +max-attributes=10 diff --git a/.travis.yml b/.travis.yml index 767fce5..5dfa501 100644 --- a/.travis.yml +++ b/.travis.yml @@ -8,5 +8,7 @@ before_install: - sudo apt-get install -y imagemagick install: - pip install -r requirements.txt + - pip install pylint script: + - pylint cgc.py - ./tests.py diff --git a/README.md b/README.md index 002cd66..5b2ad64 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,17 @@ Printable pages of cards with the correct size and pixel density will be created This utility avoids the need to use the "Printable PDFs" provided for some IDC expansions. Ink and paper are not wasted, a person can print the exact cards they want, and this addresses how not every expansion has "Printable PDFs" available. +## Caching + +The cache modes decreases the amount of time to re-process similar images. It was introduced in CGC 1.3.0 and is disabled by default because the cache methods could be unreliable in unknown edge case scenarios. Using cache mode requires to first run CGC at least once. + +* name = A simple cache to see if an image name has already been processed. +* sha512 = A checksum check to see if an image has been modified already. + +``` +$ ./cgc-cli.py --cache name +``` + # Developers Refer to the technical design document for more information about the development of CGC. diff --git a/cgc-cli.py b/cgc-cli.py index a69b621..cf0bab9 100755 --- a/cgc-cli.py +++ b/cgc-cli.py @@ -12,15 +12,19 @@ def main(): log_level_arg = "INFO" tmp_dest_dir_arg = "/tmp/cgc" parser = ArgumentParser() - parser.add_argument("--src", help="the source directory.") - parser.add_argument("--dest", help="the destination directory.") - parser.add_argument("--ppi-height", help="the desired height in inches.", + parser.add_argument("--src", help="the source directory") + parser.add_argument("--dest", help="the destination directory") + parser.add_argument("--ppi-height", help="the desired height in inches", type=int) - parser.add_argument("--ppi-width", help="the desired width in inches.", + parser.add_argument("--ppi-width", help="the desired width in inches", type=int) parser.add_argument("--single", help="convert a single card to a" + \ " printable format.") - parser.add_argument("-v", help="verbose logging.", action="store_true") + parser.add_argument("--cache", help="the cache mode to use: name or sha512", + type=str) + parser.add_argument("-v", help="verbose logging", action="store_true") + parser.add_argument("--version", help="display the CGC version", + action="store_true") args = parser.parse_args() if args.v: @@ -33,6 +37,9 @@ def main(): # to create the necessary directories. cgc = CGC(tmp_dest_dir=tmp_dest_dir_arg, log_level=log_level_arg) + if args.version: + print(cgc.get_version()) + if args.src: cgc.tmp_src_dir = args.src @@ -42,6 +49,16 @@ def main(): if args.ppi_width: cgc.height_physical_width = args.ppi_width + if args.cache: + + if args.cache not in ["name", "sha512"]: + logging.warn("Invalid cache mode specified. Use name or sha512. " + "No cache will be used.") + else: + cgc.cache_mode = args.cache + + # The last argument to process is to see what action should be done + # (processing one or all cards). if args.single: cgc.convert_single(args.single) else: diff --git a/cgc.py b/cgc.py index 925d24d..6283cb0 100755 --- a/cgc.py +++ b/cgc.py @@ -8,6 +8,7 @@ from os import listdir, makedirs from os.path import basename, exists, isdir from math import ceil +from hashlib import sha512 # image processing library from PIL import Image @@ -15,6 +16,8 @@ class CGC: """CGC provides methods for reformatting cards into printable sheets.""" + cgc_version = "1.3.0" + def __init__(self, tmp_dest_dir="/tmp/cgc", height_physical_inches=2.5, width_physical_inches=3.5, log_level="INFO"): """Initialize CGC by creating temporary directories @@ -25,6 +28,7 @@ def __init__(self, tmp_dest_dir="/tmp/cgc", height_physical_inches=2.5, width_physical_inches (int) """ logging.basicConfig(level=log_level) + self.cache_mode = None self.height_physical_inches = height_physical_inches self.width_physical_inches = width_physical_inches self.tmp_src_dir = "/tmp/cards" @@ -46,6 +50,11 @@ def __init__(self, tmp_dest_dir="/tmp/cgc", height_physical_inches=2.5, except IOError as e: logging.critical("Failed to make temporary directories.\n%s", e) + @classmethod + def get_version(cls): + """Returns the CGC version string.""" + return cls.cgc_version + @staticmethod def find_first_image(images_dir): """Locate the first image in a directory. @@ -137,7 +146,7 @@ def convert_rotate(self, image_path_src, image_path_dest, degrees="90"): return True - def image_rotate_by_dimensions(self, image_path): + def convert_rotate_by_dimensions(self, image_path): """Rotate an image only if the width is greater than the height. Args: @@ -175,6 +184,111 @@ def convert_image_density(self, image_path_src, image_path_dest, ppi): return True + @staticmethod + def listdir_full_path(src): + """Return a list of full paths to each file in a directory. + + Args: + src (str): The source directory to look for files in. + + Yields: + list: The full path of each file in a directory. + """ + + for file in listdir(src): + yield src + "/" + file + + def cache_mode_name(self, src_dir=None, dest_dir=None): + """Use a cache by comparing file names from a source and destination + directory. If the file name from the source directory is missing in the + destination then it will be returned. It is assumed that those file + images need to be proccessed. + + Args: + None + + Returns: + list: The full path to each file that is missing in the destination + directory. + """ + + # These variables cannot be assigned as arugments because the "self" + # variable is not available yet during the function initialization. + if src_dir is None: + src_dir = self.tmp_src_dir + + if dest_dir is None: + dest_dir = self.tmp_dir_individual + + dest_full_paths = list(self.listdir_full_path(dest_dir)) + files_cache_invalid = [] + src_file_found = False + + for src_file in listdir(src_dir): + + for dest_full_path in dest_full_paths: + + if dest_full_path.endswith(src_file): + src_file_found = True + + if not src_file_found: + files_cache_invalid.append(src_dir + "/" + src_file) + + logging.debug("Cache is invalid for: %s", files_cache_invalid) + return files_cache_invalid + + def cache_mode_sha512(self, src_dir=None, dest_dir=None): + """Use a cache by comparing SHA512 checksums for each file from a + source and destination directory. If the checksum is the same then + the destination file might not have been processed yet. It is assumed + that those file images need to be proccessed. + + Args: + None + + Returns: + list: The full path to each file that has the same checksum in the + source and destination directory. + """ + + if src_dir is None: + src_dir = self.tmp_src_dir + + if dest_dir is None: + dest_dir = self.tmp_dir_individual + + dest_full_paths = list(self.listdir_full_path(dest_dir)) + files_cache_invalid = [] + src_hash = "" + dest_hash = "" + + for src_file in listdir(src_dir): + src_full_path = src_dir + "/" + src_file + + for dest_full_path in dest_full_paths: + + if dest_full_path.endswith(src_file): + + with open(dest_full_path, "rb") as dest_full_path_file: + dest_hash = sha512(dest_full_path_file.read()).hexdigest() + logging.debug("dest_full_path: %s", dest_full_path) + logging.debug("dest_hash: %s", dest_hash) + + with open(src_full_path, "rb") as src_full_path_file: + src_hash = sha512(src_full_path_file.read()).hexdigest() + logging.debug("src_full_path: %s", src_full_path) + logging.debug("src_hash: %s", src_hash) + + if src_hash == dest_hash: + # Assume that if the file is exactly the same it probably + # needs to be processed. + files_cache_invalid.append(src_full_path) + + if files_cache_invalid != []: + logging.debug("Cache is invalid for: %s", files_cache_invalid) + + return files_cache_invalid + def convert_merge(self, convert_merge_method, image_paths, merged_image_name="out.jpg"): """Merge one or more images either vertically or horizontally. @@ -231,7 +345,7 @@ def convert_single(self, image_path_src, ppi=None): image_path_dest, ppi): return False - if not self.image_rotate_by_dimensions(image_path_dest): + if not self.convert_rotate_by_dimensions(image_path_dest): return False return True @@ -239,7 +353,7 @@ def convert_single(self, image_path_src, ppi=None): def convert_batch_directory(self, images_dir): """Convert an entire directory from a specified path to be a different density and rotate them if needed. (Both the - "convert_image_density" and "image_rotate_by_dimensions" methods + "convert_image_density" and "convert_rotate_by_dimensions" methods are used on each image. Args: @@ -251,9 +365,18 @@ def convert_batch_directory(self, images_dir): first_image = self.find_first_image(images_dir) first_image_info = self.image_info(first_image) ppi = self.calc_ppi(first_image_info) + image_paths_src = [] + + if self.cache_mode == "name": + image_paths_src = self.cache_mode_name() + elif self.cache_mode == "sha512": + image_paths_src = self.cache_mode_sha512() + else: + + for image in listdir(images_dir): + image_paths_src.append(images_dir + "/" + image) - for image in listdir(images_dir): - image_path_src = images_dir + "/" + image + for image_path_src in image_paths_src: if not isdir(image_path_src): self.convert_single(image_path_src, ppi) diff --git a/cgc_tdd.md b/cgc_tdd.md index 77e57f6..0eb1eb4 100644 --- a/cgc_tdd.md +++ b/cgc_tdd.md @@ -21,7 +21,7 @@ The "Card Games Converter" (CGC) is a utility for converting pictures of cards i * image_info = Find the resolution dimensions of an image. * Input * image_path (str) = The full path to an image. - * Output + * Outputs * height (int) * width (int) * calc_ppi = Calculate and return the PPI for an image based on it's height and width. @@ -32,7 +32,7 @@ The "Card Games Converter" (CGC) is a utility for converting pictures of cards i * run_cmd = Execute a shell command. * Input * cmd (list) = A list of a command and arguments for it. - * Ouput + * Ouputs * cmd_return (dict) * rc (int) = Return code. * stdout (str bytes) = Standard output. @@ -42,20 +42,20 @@ The "Card Games Converter" (CGC) is a utility for converting pictures of cards i * image_path (str) = The full image path to use. * Ouput * boolean = If this method was successful. -* image_rotate_by_dimensions = Rotate an image if the width is greater than the height. This allows for stacking of images for a printable page of 8 cards. +* convert_rotate_by_dimensions = Rotate an image if the width is greater than the height. This allows for stacking of images for a printable page of 8 cards. * Input * image_path (src) = The full path to the image. * Ouput * boolean = If this method was successful. * convert_image_density = Convert a single image to a specific physical size density based on the PPI. - * Input + * Inputs * image_path_src (str) = The full path to the source image to convert. * image_path_dest (str) = The full path to the destination image to save as. * ppi (int) = The desired pixels per inch density. * Ouput * boolean = If this method was successful. * convert_merge = Merge one or more images together either vertically or horizontally. - * Input + * Inputs * convert_merge_method (str) = Append the images together in the "vertical" or "horizontal" direction * images_paths (list) = A list of all of the full image paths to append together. * merged_image_name (str) = The full image path where the result will be saved to. @@ -81,6 +81,23 @@ The "Card Games Converter" (CGC) is a utility for converting pictures of cards i * None * Ouput * boolean = If this method was successful. +* cache_mode_check = Check to see what cache back-end should be used and then call it. + * Input + * cache_mode (str) = The cache mode to use: "name" or "sha512". + * Output + * boolean = If this method was successful. +* cache_mode_name = Cache back-end based on file names. + * Inputs + * src_dir (str) = The source directory to scan. + * dest_dir (str) = The destination directory to compare the source against. + * Output + * list = A list of cards that are missing. +* cache_mode_sha512 = Cache back-end based on SHA512 checksums. + * Inputs + * src_dir (str) = The source directory to scan. + * dest_dir (str) = The destination directory to compare the source against. + * Output + * list = A list of cards that are missing or do not have matching SHA512 checksums. # CLI Arguments (cgc-cli) @@ -91,16 +108,16 @@ The "Card Games Converter" (CGC) is a utility for converting pictures of cards i * --ppi-width = The desired width in inches. * --single = Process a single source image instead of an entire directory. * --no-clean = Do not clean up temporary files when complete. -* --cache {name|sha256} = The cache mode to use. Requires the use of `--no-clean`. +* --cache {name|sha512} = The cache mode to use. Requires the use of `--no-clean`. * name = Use the image name to see if a temporary modified image exists. - * sha256 = Use a checksum to see if an image has been modified already. + * sha512 = Use a checksum to see if an image has been modified already. # Milestones * 1.0.0 = All required functions are written and working. * 1.1.0 = Tests are written and all relevant exceptions are added to the code. * 1.2.0 = Programs works as a CLI utility with arguments. -* 1.3.0 = Caching is supported. Processing of individual images can be skipped by comparing the original and processed images. The check can use a name or a SHA256 checksum. +* 1.3.0 = Caching is supported. Processing of individual images can be skipped by comparing the original and processed images. The check can use a name or a SHA512 checksum. * 1.4.0 = Parallel processing is added. * 1.5.0 = Image rotating and density resizing is handled by the Python PIL library instead of the `convert` command. * 1.6.0 = Pip package support. @@ -113,17 +130,37 @@ The "Card Games Converter" (CGC) is a utility for converting pictures of cards i | 1.0.0 | 40 | 20 | | 1.1.0 | 8 | 10 | | 1.2.0 | 8 | 2 | -| 1.3.0 | 4 | | +| 1.3.0 | 4 | 5 | | 1.4.0 | 4 | | | 1.5.0 | 8 | | | 1.6.0 | 4 | | | 2.0.0 | 40 | | +# Cache Benchmarking + +## CGC 1.3.0 + +Fedora 28, Python 3.6.6, ImageMagick 6.9.9.38 + +The Linux kernel I/O cache is first flushed before starting to test to prevent inaccurate and faster-than-expected results. + +Commands: `$ echo 3 | sudo tee /proc/sys/vm/drop_caches && sync && time ./cgc-cli.py $ARGS_CGC` + +| Description | Cache Type | Real Time | +| ----------- | ---------- | --------- | +| 100 cards | none | 0m35.940s | +| 100 cards | name | 0m17.027s | +| 100 cards | sha512 | 0m17.500s | +| 1000 cards | none | 5m54.514s | +| 1000 cards | name | 2m56.743s | +| 1000 cards | sha512 | 2m57.687s | + # Lessons Learned * Methods need to be as small as possible to abide by modular OOP best practices. -* All function inputs and outputs need to be defined in the TDD before creating the program. The TDD serves a purpose of being pseudocode code. The extra time put into planning leads to faster development time. +* All function inputs and outputs need to be defined in the TDD before creating the program, even if a program will be a small personal project. The TDD serves a purpose of being pseudocode code. The extra time put into planning leads to faster development time. * When creating a new method, the related docstrings and a unit test should also be created at the same time. This avoids time wasted on troubleshooting later on. +* Development time estimates should have more buffer time to account for documenting, testing, and unknown unknown issues. # TDD Revision History @@ -157,3 +194,13 @@ The "Card Games Converter" (CGC) is a utility for converting pictures of cards i * Completed milestone `1.2.0`. * Split and rename "convert_batch_individual" into two separate functions: "convert_batch_directory" which now loops over images and converts them using common logic from "convert_single". * Remove unused `--ppi-size` CLI argument. +* 2018-09-19 + * Update function definitions to include methods used for caching. +* 2018-10-06 + * Use SHA512 instead of SHA256 for checksum caching. +* 2018-10-12.1 + * Added cache benchmarking tests and results. + * Added development time considerations to lessons learned. + * Completed milestone `1.3.0`. +* 2018-10-12.2 + * Updated variables names used in cache functions. diff --git a/tests.py b/tests.py index 476d442..90ed6ec 100755 --- a/tests.py +++ b/tests.py @@ -79,12 +79,12 @@ def test_convert_rotate(self): (not return_status): self.assertTrue(False) - def test_image_rotate_by_dimensions(self): + def test_convert_rotate_by_dimensions(self): rotate_image = self.cgc.tmp_dest_dir + "/rotate.jpg" copyfile(self.last_image_card, rotate_image) # Return a list of: height, width rotate_image_original = self.cgc.image_info(rotate_image) - return_status = self.cgc.image_rotate_by_dimensions(rotate_image) + return_status = self.cgc.convert_rotate_by_dimensions(rotate_image) rotate_image_new = self.cgc.image_info(rotate_image) if not return_status: @@ -128,16 +128,27 @@ def test_convert_merge(self): remove(cards_merged_full_path) - def test_convert_single(self): + def test_convert_single(self, cache_mode=None): + + if cache_mode == "name" or cache_mode == "sha512": + self.cgc.cache_mode = cache_mode + test_image_src = self.cgc.tmp_src_dir + "/single.jpg" test_image_dest = self.cgc.tmp_dir_individual + "/single.jpg" copyfile(self.cgc.tmp_src_dir + "/1.jpg", test_image_src) if not self.cgc.convert_single(test_image_src): self.assertTrue(False) + return False remove(test_image_src) remove(test_image_dest) + return True + + def test_convert_single_cache(self): + + for cache_mode in ["name", "sha512"]: + self.assertTrue(self.test_convert_single(cache_mode)) def test_convert_batch_directory(self): return_status = self.cgc.convert_batch_directory(self.cards_source_dir) @@ -163,6 +174,7 @@ def test_convert_batch_append_all(self): elif return_status == False: self.assertTrue(False) + def tearDown(self): rmtree(self.cards_source_dir) rmtree(self.cgc.tmp_dest_dir)