Merge branch 'master' of https://github.com/tbepler/topaz

nysbc · May 14, 2020 · 301c7d6 · 301c7d6
2 parents d0e16d3 + 50edb44
commit 301c7d6
Show file tree

Hide file tree

Showing 7 changed files with 205 additions and 166 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,3 +1,4 @@
 include README.md
 include topaz/gui/topaz.html
 include topaz/pretrained/denoise/*.sav
+include topaz/pretrained/detector/*.sav
diff --git a/README.md b/README.md
@@ -5,6 +5,7 @@ A pipeline for particle detection in cryo-electron microscopy images using convo
 - Improvements to the pretrained denoising models
 - Topaz now includes pretrained particle picking models
 - Updated tutorials
+- Updated GUI to include denoising commands
 - Denoising paper preprint is available [here](https://www.biorxiv.org/content/10.1101/838920v1)
 
 ## New in v0.2.2

diff --git a/setup.py b/setup.py
@@ -1,7 +1,7 @@
 from setuptools import setup, find_packages
 
 name = 'topaz-em'
-version = '0.2.3'
+version = '0.2.4a'
 
 description = 'Particle picking with positive-unlabeled CNNs'
 long_description = 'Particle picking software for single particle cryo-electron microscopy using convolutional neural networks and positive-unlabeled learning. Includes methods for micrograph denoising.'

diff --git a/topaz/_version.py b/topaz/_version.py
@@ -1 +1 @@
-__version__ = "0.2.3"
+__version__ = "0.2.4a"
diff --git a/topaz/commands/denoise.py b/topaz/commands/denoise.py
@@ -42,6 +42,7 @@ def add_arguments(parser):
     parser.add_argument('-b', '--dir-b', nargs='+', help='directory of training images part B')
     parser.add_argument('--hdf', help='path to HDF5 file containing training image stack as an alternative to dirA/dirB')
     parser.add_argument('--preload', action='store_true', help='preload micrographs into RAM')
+    parser.add_argument('--holdout', type=float, default=0.1, help='fraction of training micrograph pairs to holdout for validation (default: 0.1)')
 
     parser.add_argument('--lowpass', type=float, default=1, help='lowpass filter micrographs by this amount (in pixels) before applying the denoising filter. uses a hard lowpass filter (i.e. sinc) (default: no lowpass filtering)')
     parser.add_argument('--gaussian', type=float, default=0, help='Gaussian filter micrographs with this standard deviation (in pixels) before applying the denoising filter (default: 0)')
@@ -76,7 +77,7 @@ def add_arguments(parser):
 from topaz.utils.image import save_image
 
 
-def make_paired_images_datasets(dir_a, dir_b, crop, random=np.random, preload=False, cutoff=0):
+def make_paired_images_datasets(dir_a, dir_b, crop, random=np.random, holdout=0.1, preload=False, cutoff=0):
     # train denoising model
     # make the dataset
     A = []
@@ -87,7 +88,7 @@ def make_paired_images_datasets(dir_a, dir_b, crop, random=np.random, preload=Fa
         B.append(dir_b + os.sep + name)
 
     # randomly hold out some image pairs for validation
-    n = int(0.1*len(A))
+    n = int(holdout*len(A))
     order = random.permutation(len(A))
 
     A_train = []
@@ -110,7 +111,7 @@ def make_paired_images_datasets(dir_a, dir_b, crop, random=np.random, preload=Fa
     return dataset_train, dataset_val
 
 
-def make_images_datasets(dir_a, dir_b, crop, random=np.random, cutoff=0):
+def make_images_datasets(dir_a, dir_b, crop, random=np.random, holdout=0.1, cutoff=0):
     # train denoising model
     # make the dataset
     paths = []
@@ -122,7 +123,7 @@ def make_images_datasets(dir_a, dir_b, crop, random=np.random, cutoff=0):
             paths.append(path)
 
     # randomly hold out some image pairs for validation
-    n = int(0.1*len(paths))
+    n = int(holdout*len(paths))
     order = random.permutation(len(paths))
 
     path_train = []
@@ -229,7 +230,7 @@ def __getitem__(self, i): # retrieve the i'th image pair
         return x
 
 
-def make_hdf5_datasets(path, paired=True, preload=False, cutoff=0):
+def make_hdf5_datasets(path, paired=True, preload=False, holdout=0.1, cutoff=0):
 
     # open the hdf5 dataset
     import h5py
@@ -242,7 +243,7 @@ def make_hdf5_datasets(path, paired=True, preload=False, cutoff=0):
     N = len(dataset) # number of image pairs
     if paired:
         N = N//2
-    n = int(0.1*N)
+    n = int(holdout*N)
     split = 2*(N-n)
 
     if paired:
@@ -317,6 +318,7 @@ def main(args):
         method = args.method
         paired = (method == 'noise2noise')
         preload = args.preload
+        holdout = args.holdout # fraction of image pairs to holdout for validation
 
         if args.hdf is None: #use dirA/dirB
             crop = args.crop
@@ -331,13 +333,15 @@ def main(args):
                 if paired:
                     dataset_train, dataset_val = make_paired_images_datasets(dir_a, dir_b, crop
                                                                             , random=random
+                                                                            , holdout=holdout
                                                                             , preload=preload 
                                                                             , cutoff=cutoff
                                                                             )
                 else:
                     dataset_train, dataset_val = make_images_datasets(dir_a, dir_b, crop
                                                                      , cutoff=cutoff
-                                                                     , random=random)
+                                                                     , random=random
+                                                                     , holdout=holdout)
                 dset_train.append(dataset_train)
                 dset_val.append(dataset_val)
 
@@ -357,6 +361,7 @@ def main(args):
         else: # make HDF datasets
             dataset_train, dataset_val = make_hdf5_datasets(args.hdf, paired=paired
                                                            , cutoff=cutoff
+                                                           , holdout=holdout
                                                            , preload=preload)
             shuffle = preload
 
@@ -431,6 +436,8 @@ def main(args):
                 torch.save(model, path)
                 if use_cuda:
                     model.cuda()
+
+        models = [model]
 
     else: # load the saved model(s)
         models = []

diff --git a/topaz/commands/train.py b/topaz/commands/train.py
@@ -266,6 +266,10 @@ def load_data(train_images, train_targets, test_images, test_targets, radius
     num_micrographs = sum(len(train_images[k]) for k in train_images.keys())
     num_particles = len(train_targets)
     report('Loaded {} training micrographs with {} labeled particles'.format(num_micrographs, num_particles))
+    if num_particles == 0:
+        print('ERROR: no training particles specified. Check that micrograph names in the particles file match those in the micrographs file/directory.', file=sys.stderr)
+        raise Exception('No training particles.')
+
 
     train_images, train_targets = match_images_targets(train_images, train_targets, radius)