LukeShortCloud · LukeShortCloud · Oct 12, 2018 · Sep 20, 2018 · Sep 20, 2018 · Sep 20, 2018
diff --git a/.pylintrc b/.pylintrc
@@ -0,0 +1,2 @@
+[DESIGN]
+max-attributes=10
diff --git a/.travis.yml b/.travis.yml
@@ -8,5 +8,7 @@ before_install:
   - sudo apt-get install -y imagemagick
 install:
   - pip install -r requirements.txt
+  - pip install pylint
 script:
+  - pylint cgc.py
   - ./tests.py
diff --git a/README.md b/README.md
@@ -55,6 +55,17 @@ Printable pages of cards with the correct size and pixel density will be created
 
 This utility avoids the need to use the "Printable PDFs" provided for some IDC expansions. Ink and paper are not wasted, a person can print the exact cards they want, and this addresses how not every expansion has "Printable PDFs" available.
 
+## Caching
+
+The cache modes decreases the amount of time to re-process similar images. It was introduced in CGC 1.3.0 and is disabled by default because the cache methods could be unreliable in unknown edge case scenarios. Using cache mode requires to first run CGC at least once.
+
+* name = A simple cache to see if an image name has already been processed.
+* sha512 = A checksum check to see if an image has been modified already.
+
+```
+$ ./cgc-cli.py --cache name
+```
+
 # Developers
 
 Refer to the technical design document for more information about the development of CGC.

diff --git a/cgc-cli.py b/cgc-cli.py
@@ -12,15 +12,19 @@ def main():
     log_level_arg = "INFO"
     tmp_dest_dir_arg = "/tmp/cgc"
     parser = ArgumentParser()
-    parser.add_argument("--src", help="the source directory.")
-    parser.add_argument("--dest", help="the destination directory.")
-    parser.add_argument("--ppi-height", help="the desired height in inches.",
+    parser.add_argument("--src", help="the source directory")
+    parser.add_argument("--dest", help="the destination directory")
+    parser.add_argument("--ppi-height", help="the desired height in inches",
                         type=int)
-    parser.add_argument("--ppi-width", help="the desired width in inches.",
+    parser.add_argument("--ppi-width", help="the desired width in inches",
                         type=int)
     parser.add_argument("--single", help="convert a single card to a" + \
                         " printable format.")
-    parser.add_argument("-v", help="verbose logging.", action="store_true")
+    parser.add_argument("--cache", help="the cache mode to use: name or sha512",
+                        type=str)
+    parser.add_argument("-v", help="verbose logging", action="store_true")
+    parser.add_argument("--version", help="display the CGC version",
+                        action="store_true")
     args = parser.parse_args()
 
     if args.v:
@@ -33,6 +37,9 @@ def main():
     # to create the necessary directories.
     cgc = CGC(tmp_dest_dir=tmp_dest_dir_arg, log_level=log_level_arg)
 
+    if args.version:
+        print(cgc.get_version())
+
     if args.src:
         cgc.tmp_src_dir = args.src
 
@@ -42,6 +49,16 @@ def main():
     if args.ppi_width:
         cgc.height_physical_width = args.ppi_width
 
+    if args.cache:
+
+        if args.cache not in ["name", "sha512"]:
+            logging.warn("Invalid cache mode specified. Use name or sha512. "
+                         "No cache will be used.")
+        else:
+            cgc.cache_mode = args.cache
+
+    # The last argument to process is to see what action should be done
+    # (processing one or all cards).
     if args.single:
         cgc.convert_single(args.single)
     else:

diff --git a/cgc.py b/cgc.py
@@ -8,13 +8,16 @@
 from os import listdir, makedirs
 from os.path import basename, exists, isdir
 from math import ceil
+from hashlib import sha512
 # image processing library
 from PIL import Image
 
 
 class CGC:
     """CGC provides methods for reformatting cards into printable sheets."""
 
+    cgc_version = "1.3.0"
+
     def __init__(self, tmp_dest_dir="/tmp/cgc", height_physical_inches=2.5,
                  width_physical_inches=3.5, log_level="INFO"):
         """Initialize CGC by creating temporary directories
@@ -25,6 +28,7 @@ def __init__(self, tmp_dest_dir="/tmp/cgc", height_physical_inches=2.5,
             width_physical_inches (int)
         """
         logging.basicConfig(level=log_level)
+        self.cache_mode = None
         self.height_physical_inches = height_physical_inches
         self.width_physical_inches = width_physical_inches
         self.tmp_src_dir = "/tmp/cards"
@@ -46,6 +50,11 @@ def __init__(self, tmp_dest_dir="/tmp/cgc", height_physical_inches=2.5,
             except IOError as e:
                 logging.critical("Failed to make temporary directories.\n%s", e)
 
+    @classmethod
+    def get_version(cls):
+        """Returns the CGC version string."""
+        return cls.cgc_version
+
     @staticmethod
     def find_first_image(images_dir):
         """Locate the first image in a directory.
@@ -137,7 +146,7 @@ def convert_rotate(self, image_path_src, image_path_dest, degrees="90"):
 
         return True
 
-    def image_rotate_by_dimensions(self, image_path):
+    def convert_rotate_by_dimensions(self, image_path):
         """Rotate an image only if the width is greater than the height.
 
         Args:
@@ -175,6 +184,111 @@ def convert_image_density(self, image_path_src, image_path_dest, ppi):
 
         return True
 
+    @staticmethod
+    def listdir_full_path(src):
+        """Return a list of full paths to each file in a directory.
+
+        Args:
+            src (str): The source directory to look for files in.
+
+        Yields:
+            list: The full path of each file in a directory.
+        """
+
+        for file in listdir(src):
+            yield src + "/" + file
+
+    def cache_mode_name(self, src_dir=None, dest_dir=None):
+        """Use a cache by comparing file names from a source and destination
+        directory. If the file name from the source directory is missing in the
+        destination then it will be returned. It is assumed that those file
+        images need to be proccessed.
+
+        Args:
+            None
+
+        Returns:
+            list: The full path to each file that is missing in the destination
+                  directory.
+        """
+
+        # These variables cannot be assigned as arugments because the "self"
+        # variable is not available yet during the function initialization.
+        if src_dir is None:
+            src_dir = self.tmp_src_dir
+
+        if dest_dir is None:
+            dest_dir = self.tmp_dir_individual
+
+        dest_full_paths = list(self.listdir_full_path(dest_dir))
+        files_cache_invalid = []
+        src_file_found = False
+
+        for src_file in listdir(src_dir):
+
+            for dest_full_path in dest_full_paths:
+
+                if dest_full_path.endswith(src_file):
+                    src_file_found = True
+
+            if not src_file_found:
+                files_cache_invalid.append(src_dir + "/" + src_file)
+
+        logging.debug("Cache is invalid for: %s", files_cache_invalid)
+        return files_cache_invalid
+
+    def cache_mode_sha512(self, src_dir=None, dest_dir=None):
+        """Use a cache by comparing SHA512 checksums for each file from a
+        source and destination directory. If the checksum is the same then
+        the destination file might not have been processed yet. It is assumed
+        that those file images need to be proccessed.
+
+        Args:
+            None
+
+        Returns:
+            list: The full path to each file that has the same checksum in the
+                  source and destination directory.
+        """
+
+        if src_dir is None:
+            src_dir = self.tmp_src_dir
+
+        if dest_dir is None:
+            dest_dir = self.tmp_dir_individual
+
+        dest_full_paths = list(self.listdir_full_path(dest_dir))
+        files_cache_invalid = []
+        src_hash = ""
+        dest_hash = ""
+
+        for src_file in listdir(src_dir):
+            src_full_path = src_dir + "/" + src_file
+
+            for dest_full_path in dest_full_paths:
+
+                if dest_full_path.endswith(src_file):
+
+                    with open(dest_full_path, "rb") as dest_full_path_file:
+                        dest_hash = sha512(dest_full_path_file.read()).hexdigest()
+                        logging.debug("dest_full_path: %s", dest_full_path)
+                        logging.debug("dest_hash: %s", dest_hash)
+
+                    with open(src_full_path, "rb") as src_full_path_file:
+                        src_hash = sha512(src_full_path_file.read()).hexdigest()
+                        logging.debug("src_full_path: %s", src_full_path)
+                        logging.debug("src_hash: %s", src_hash)
+
+                    if src_hash == dest_hash:
+                        # Assume that if the file is exactly the same it probably
+                        # needs to be processed.
+                        files_cache_invalid.append(src_full_path)
+
+        if files_cache_invalid != []:
+            logging.debug("Cache is invalid for: %s", files_cache_invalid)
+
+        return files_cache_invalid
+
     def convert_merge(self, convert_merge_method, image_paths,
                       merged_image_name="out.jpg"):
         """Merge one or more images either vertically or horizontally.
@@ -231,15 +345,15 @@ def convert_single(self, image_path_src, ppi=None):
                                           image_path_dest, ppi):
             return False
 
-        if not self.image_rotate_by_dimensions(image_path_dest):
+        if not self.convert_rotate_by_dimensions(image_path_dest):
             return False
 
         return True
 
     def convert_batch_directory(self, images_dir):
         """Convert an entire directory from a specified path to be
         a different density and rotate them if needed. (Both the
-        "convert_image_density" and "image_rotate_by_dimensions" methods
+        "convert_image_density" and "convert_rotate_by_dimensions" methods
         are used on each image.
 
         Args:
@@ -251,9 +365,18 @@ def convert_batch_directory(self, images_dir):
         first_image = self.find_first_image(images_dir)
         first_image_info = self.image_info(first_image)
         ppi = self.calc_ppi(first_image_info)
+        image_paths_src = []
+
+        if self.cache_mode == "name":
+            image_paths_src = self.cache_mode_name()
+        elif self.cache_mode == "sha512":
+            image_paths_src = self.cache_mode_sha512()
+        else:
+
+            for image in listdir(images_dir):
+                image_paths_src.append(images_dir + "/" + image)
 
-        for image in listdir(images_dir):
-            image_path_src = images_dir + "/" + image
+        for image_path_src in image_paths_src:
 
             if not isdir(image_path_src):
                 self.convert_single(image_path_src, ppi)