Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

speed up tile download with concurrent processing #182

Merged
merged 7 commits into from
Aug 31, 2020
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions label_maker/images.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# pylint: disable=unused-argument
"""Generate an .npz file containing arrays for training machine learning algorithms"""

import concurrent.futures
from os import makedirs, path as op
from random import shuffle
import time

import numpy as np

from label_maker.utils import get_image_function

def download_images(dest_folder, classes, imagery, ml_type, background_ratio, imagery_offset=False, **kwargs):
def download_images(dest_folder, classes, imagery, ml_type, background_ratio, threadcount, imagery_offset=False, **kwargs):
"""Download satellite images specified by a URL and a label.npz file
Parameters
------------
Expand All @@ -28,6 +30,8 @@ def download_images(dest_folder, classes, imagery, ml_type, background_ratio, im
background_ratio: float
Determines the number of background images to download in single class problems. Ex. A value
of 1 will download an equal number of background images to class images.
threadcount: int
Number of threads to use for faster parallel image download
imagery_offset: list
An optional list of integers representing the number of pixels to offset imagery. Ex. [15, -5] will
move the images 15 pixels right and 5 pixels up relative to the requested tile bounds
Expand Down Expand Up @@ -72,5 +76,9 @@ def class_test(value):
image_function = get_image_function(imagery)
kwargs['imagery_offset'] = imagery_offset

for tile in tiles:
image_function(tile, imagery, tiles_dir, kwargs)
t = time.perf_counter()
with concurrent.futures.ThreadPoolExecutor(max_workers=threadcount) as executor:
{executor.submit(image_function, tile, imagery, tiles_dir, kwargs): tile for tile in tiles}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this can be a list comprehension instead of dict

executor.shutdown(wait=True)
elapsed_time = time.perf_counter() - t
print(elapsed_time)
9 changes: 7 additions & 2 deletions label_maker/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def parse_args(args):
subparsers.add_parser('download', parents=[pparser], help='', formatter_class=dhf)
l = subparsers.add_parser('labels', parents=[pparser], help='', formatter_class=dhf)
p = subparsers.add_parser('preview', parents=[pparser], help='', formatter_class=dhf)
subparsers.add_parser('images', parents=[pparser], help='', formatter_class=dhf)
i = subparsers.add_parser('images', parents=[pparser], help='', formatter_class=dhf)
subparsers.add_parser('package', parents=[pparser], help='', formatter_class=dhf)

# labels has an optional parameter
Expand All @@ -60,6 +60,10 @@ def parse_args(args):
p.add_argument('-n', '--number', default=5, type=int,
help='number of examples images to create per class')

#images has optional parameter
i.add_argument('-t', '--threadcount', default=50, type=int,
help= 'thread count to use')

# turn namespace into dictinary
parsed_args = vars(parser.parse_args(args))

Expand Down Expand Up @@ -109,7 +113,8 @@ def cli():
number = args.get('number')
preview(dest_folder=dest_folder, number=number, **config)
elif cmd == 'images':
download_images(dest_folder=dest_folder, **config)
threadcount = args.get('threadcount')
download_images(dest_folder=dest_folder, threadcount=threadcount, **config)
elif cmd == 'package':
package_directory(dest_folder=dest_folder, **config)

Expand Down