From a499196b55cd0f70e15bd9e1c117bb29f4d7ea42 Mon Sep 17 00:00:00 2001 From: tbepler Date: Mon, 25 Nov 2019 13:07:37 -0500 Subject: [PATCH 1/5] updated topaz gui --- README.md | 1 + topaz/gui/topaz.html | 340 +++++++++++++++++++++++-------------------- 2 files changed, 184 insertions(+), 157 deletions(-) diff --git a/README.md b/README.md index f40ff99..deba8f6 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ A pipeline for particle detection in cryo-electron microscopy images using convo - Improvements to the pretrained denoising models - Topaz now includes pretrained particle picking models - Updated tutorials +- Updated GUI to include denoising commands - Denoising paper preprint is available [here](https://www.biorxiv.org/content/10.1101/838920v1) ## New in v0.2.2 diff --git a/topaz/gui/topaz.html b/topaz/gui/topaz.html index 8826bc1..6dcf1c6 100644 --- a/topaz/gui/topaz.html +++ b/topaz/gui/topaz.html @@ -201,14 +201,13 @@ $('.denoise_micrographs_inputs').each(function() { $(this).on('keyup', function() { var input7 = $('#input7').val() == '' ? '/path/to/input/images/*.mrc' : $('#input7').val(); - var model7 = $('#model7').val() == '' ? 'L2' : $('#model7').val(); + var model7 = $('#model7').val() == '' ? 'unet' : $('#model7').val(); var device7 = $('#device7').val() == '' ? '0' : $('#device7').val(); var format7 = $('#format7').val() == '' ? 'mrc' : $('#format7').val(); - var bin7 = $('#bin7').val() == '' ? '1' : $('#bin7').val(); var patchsize7 = $('#patchsize7').val() == '' ? '1536' : $('#patchsize7').val(); var patchpadding7 = $('#patchpadding7').val() == '' ? '384' : $('#patchpadding7').val(); - var output7 = $('#output7').val() == '' ? '/path/to/output/folder' : $('#output7').val(); - $('#result7').html("topaz denoise " + input7 + " --model " + model7 + " --device " + device7 + " --format " + format7 + " --bin " + bin7 + " --patch-size " + patchsize7 + " --patch-padding " + patchpadding7 + " --normalize --output " + output7); + var output7 = $('#output7').val() == '' ? '/path/to/output/folder/' : $('#output7').val(); + $('#result7').html("topaz denoise " + input7 + " --model " + model7 + " --device " + device7 + " --format " + format7 + " --patch-size " + patchsize7 + " --patch-padding " + patchpadding7 + " --normalize --output " + output7); }); }); }); @@ -216,29 +215,34 @@ $('.denoise_stack_inputs').each(function() { $(this).on('keyup', function() { var input8 = $('#input8').val() == '' ? '/path/to/particle/stack.mrcs' : $('#input8').val(); - var model8 = $('#model8').val() == '' ? 'L2' : $('#model8').val(); + var model8 = $('#model8').val() == '' ? 'unet' : $('#model8').val(); var device8 = $('#device8').val() == '' ? '0' : $('#device8').val(); var format8 = $('#format8').val() == '' ? 'mrc' : $('#format8').val(); - var bin8 = $('#bin8').val() == '' ? '1' : $('#bin8').val(); var output8 = $('#output8').val() == '' ? '/path/to/denoised/stack.mrcs' : $('#output8').val(); - $('#result8').html("topaz denoise " + input8 + " --model " + model8 + " --device " + device8 + " --format " + format8 + " --bin " + bin8 + " --stack --normalize --output " + output8); + $('#result8').html("topaz denoise " + input8 + " --model " + model8 + " --device " + device8 + " --format " + format8 + " --stack --normalize --output " + output8); }); }); }); - /*$( document ).ready(function() { + $( document ).ready(function() { $('.denoise_training_inputs').each(function() { $(this).on('keyup', function() { - var coordinateinput9 = $('#coordinateinput9').val() == '' ? '/path/to/Topaz/coordinate/file.txt' : $('#coordinateinput9').val(); - var imageroot9 = $('#imageroot9').val() == '' ? '/path/to/full/micrographs/' : $('#imageroot9').val(); - var boxsize9 = $('#boxsize9').val() == '' ? '192' : $('#boxsize9').val(); - var threshold9 = $('#threshold9').val() == '' ? '-9999' : $('#threshold9').val(); - var resize9 = $('#resize9').val() == '' ? '-1' : $('#resize9').val(); - var imageext9 = $('#imageext9').val() == '' ? 'mrc' : $('#imageext9').val(); - var output9 = $('#output9').val() == '' ? '/path/to/output/particles.mrcs' : $('#output9').val(); - $('#result9').html("topaz particle_stack " + coordinateinput9 + " --image-root " + imageroot9 + " --size " + boxsize9 + " --threshold " + threshold9 + " --resize " + resize9 + " --image-ext ." + imageext9 + " --output " + output9); + var dira10 = $('#dira10').val() == '' ? '/path/to/input/images/half1/' : $('#dira10').val(); + var dirb10 = $('#dirb10').val() == '' ? '/path/to/input/images/half2/' : $('#dirb10').val(); + var method10 = $('#method10').val() == '' ? 'noise2noise' : $('#method10').val(); + var arch10 = $('#arch10').val() == '' ? 'unet' : $('#arch10').val(); + var optim10 = $('#optim10').val() == '' ? 'adagrad' : $('#optim10').val(); + var lr10 = $('#lr10').val() == '' ? '0.001' : $('#lr10').val(); + var criteria10 = $('#criteria10').val() == '' ? 'L2' : $('#criteria10').val(); + var crop10 = $('#crop10').val() == '' ? '800' : $('#crop10').val(); + var batchsize10 = $('#batchsize10').val() == '' ? '4' : $('#batchsize10').val(); + var numepochs10 = $('#numepochs10').val() == '' ? '100' : $('#numepochs10').val(); + var device10 = $('#device10').val() == '' ? '0' : $('#device10').val(); + var numworkers10 = $('#numworkers10').val() == '' ? '16' : $('#numworkers10').val(); + var output10 = $('#output10').val() == '' ? '/path/to/output/folder/and/file' : $('#output10').val(); + $('#result10').html("topaz denoise --dir-a " + dira10 + " --dir-b " + dirb10 + " --method " + method10 + " --arch " + arch10 + " --optim " + optim10 + " --lr " + lr10 + " --criteria " + criteria10 + " --crop " + crop10 + " --batch-size " + batchsize10 + " --num-epochs " + numepochs10 + " --device " + device10 + " --num-workers " + numworkers10 + " --save-prefix " + output10); }); }); - });*/ + }); @@ -478,7 +482,7 @@ display: block; background-color: inherit; color: black; - padding: 12px 16px; + padding: 9.75px 16px; width: 100%; border: none; outline: none; @@ -808,6 +812,8 @@ .via_logo { background:url(); display:inline-block; width:250px; height:100px;} + .denoising_logo { background:url(); display:inline-block; width:78px; height:75px;} + .preprocessing_logo { background:url(); display:inline-block; width:54px; height:75px;} .picking_logo { background:url(); display:inline-block; width:76px; height:75px;} @@ -905,6 +911,7 @@
+ @@ -925,13 +932,14 @@

Welcome!

This GUI will allow you to pick training particles and make Topaz commands. Here is the general workflow:

-
-
+
+
+ @@ -939,17 +947,125 @@ + Denoise
images
Normalize &
downsample images
Pick training
particles, or
analyze picks
Train a
neural network
Extract particle
coordinates
-
-

Citation: Bepler, T., Morin, A., Rapp, M., Brasch, J., Shapiro, L., Noble, A.J., Berger, B. (2019). Positive-unlabeled convolutional neural networks for particle picking in cryo-electron micrographs. Nature Methods

+
+

Citations: Bepler, T., Morin, A., Rapp, M., Brasch, J., Shapiro, L., Noble, A.J., Berger, B. (2019). Positive-unlabeled convolutional neural networks for particle picking in cryo-electron micrographs. Nature Methods +

Bepler, T., Noble, A.J., Berger, B. (2019). Topaz-Denoise: general deep denoising models for cryoEM. bioRxiv

+
+
+ +
+

   Topaz-Denoise

+
+ +
Denoise Micrographs +
+

+

Create a command for denoising micrographs.

+ Parameters:            + Must modify + Might modify + Rarely modify +

+
+
+ + + + + +
Input micrographs
Output folder
Model
GPU/CPU device
Format
+
Advanced options
+
+ + + +
Patch size
Patch padding
+
+ +

Command:


+
+ topaz denoise /path/to/input/images/*.mrc --model unet --device 0 --format mrc --patch-size 1536 --patch-padding 384 --normalize --output /path/to/output/folder/ +
+

+
+ +
Denoise Particle Stack +
+

+

Create a command for denoising a particle or micrograph stack.

+ Parameters:            + Must modify + Might modify + Rarely modify +

+
+
+ + + + + +
Input stack
Output stack
Model
GPU/CPU device
Format
+

Command:


+
+ topaz denoise /path/to/particle/stack.mrcs --model unet --device 0 --format mrc --stack --normalize --output /path/to/denoised/stack.mrcs +
+

+
+ +
Train New Denoising Model +
+

+

Create a command for training a Topaz-Denoise model.

+ Parameters:            + Must modify + Might modify + Rarely modify +

+
+
+ + + + + + + +
Input folder #1
Input folder #2
Output files
Model architecture
Training criteria
GPU/CPU device
Number of CPUs
+
Advanced options
+
+ + + + + + + +
Training method
Optimizer
Learning rate
Crop size
Batch size
Number of epochs
+
+ +

Command:


+
+ topaz denoise --dir-a /path/to/input/images/half1/ --dir-b /path/to/input/images/half2/ --method noise2noise --arch unet --optim adagrad --lr 0.001 --criteria L2 --crop 800 --batch-size 4 --num-epochs 100 --device 0 --num-workers 16 --save-prefix /path/to/output/folder/and/file +
+

+


+
+
Analyzing Topaz picks

- • Import your Topaz particle picks as a CSV file and review the picks.

-   â—¦ Press L on your keyboard to show the particle number, then the Up key will toggle the Topaz particle scores. Zoom-in on the image with + to inspect.

+ Import your Topaz particle picks as a CSV file and review the picks.

+   ◦ Press L on your keyboard to show the particle number, then the Up key will toggle the Topaz particle scores. Zoom-in on the image with + to inspect.

Or

- • Load your micrographs and particles in .star format into Relion. Picks may be colored based on Topaz score by using the AutopickFigureOfMerit column in the .star file. + • Load your micrographs and particles in .star format into Relion. Picks may be colored based on Topaz score by using the AutopickFigureOfMerit column in the .star file.

@@ -1594,7 +1714,7 @@

File Not Found

    Topaz Training Command Generator

-

Create a command for training a network using particle picks. +

Create a command for training a model using particle picks.



Parameters:            Must modify Might modify @@ -1604,13 +1724,13 @@

File Not Found


- - + + - + @@ -1684,7 +1804,7 @@

File Not Found

Rarely modify



Training images folder
Training particles
Output
Particle radius
Output
Particle radius
Autoencoder
Number of epochs
GPU/CPU device
Number of CPUs
CNN model
CNN model
Method
Num particles/image
K-fold
- + @@ -1737,9 +1857,9 @@

Convert and/or Threshold Pick Coordinates


Input micrographs
Input micrographs
Model
Output filenames
Particle radius
- - - + + +
Input file
Output file
Threshold
Upscale picks
Downscale picks
Threshold
Upscale picks
Downscale picks


Command:


@@ -1759,9 +1879,9 @@

Extract Particles as an MRC Stack


- + - + --> @@ -1775,100 +1895,6 @@

Extract Particles as an MRC Stack

--> -
Denoise Micrographs -
-

-

Create a command for denoising micrographs using an implementation of Noise2Noise.

- Parameters:            - Must modify - Might modify - Rarely modify -

-
-
Input picks
Input micrographs
Output stack
Output stack
Boxsize
Threshold
Threshold
Image extension
Resize particles
- - - - - - -
Input micrographs
Output folder
Model
GPU/CPU device
Format
Bin micrographs
-
Advanced options
-
- - - -
Patch size
Patch padding
-
- -

Command:


-
- topaz denoise /path/to/input/images/*.mrc --model L2 --device 0 --format mrc --bin 1 --patch-size 1536 --patch-padding 384 --normalize --output /path/to/output/folder -
-
- - -
Denoise Particle Stack -
-

-

Create a command for denoising a particle stack using an implementation of Noise2Noise.

- Parameters:            - Must modify - Might modify - Rarely modify -

-
-
- - - - - - -
Input stack
Output stack
Model
GPU/CPU device
Format
Bin Stack
-

Command:


-
- topaz denoise /path/to/particle/stack.mrcs --model L2 --device 0 --format mrc --bin 1 --stack --normalize --output /path/to/denoised/stack.mrcs -
-

-
- - -
Convert | Threshold Picks

@@ -1882,19 +1908,19 @@

Extract Particles as an MRC Stack


- - - + + +
Input file
Output file
Threshold
Upscale picks
Downscale picks
Threshold
Upscale picks
Downscale picks

Specific filetype options
- - - - - - + + + + + +
Voltage
Detector pixelsize
Magnification
Amplitude contrast
Box size
Image extension
Voltage
Detector pixelsize
Magnification
Amplitude contrast
Box size
Image extension
@@ -1917,9 +1943,9 @@

Extract Particles as an MRC Stack


- + - + From 55ec396ba1ae232769456d67fc62d8a3a34f23e9 Mon Sep 17 00:00:00 2001 From: Tristan Bepler Date: Mon, 25 Nov 2019 13:38:16 -0500 Subject: [PATCH 2/5] manifest updated --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index 46ce159..e747191 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include README.md include topaz/gui/topaz.html include topaz/pretrained/denoise/*.sav +include topaz/pretrained/detector/*.sav From fcf999920bd4a3a212b5de7a93e61daa5ed93b0c Mon Sep 17 00:00:00 2001 From: Jay Jaewon Yoo <39362841+JayJaewonYoo@users.noreply.github.com> Date: Tue, 3 Dec 2019 14:34:45 -0500 Subject: [PATCH 3/5] Store trained model in models for denoise_image function (#41) --- topaz/commands/denoise.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/topaz/commands/denoise.py b/topaz/commands/denoise.py index 7b1d40b..5a3d1f1 100644 --- a/topaz/commands/denoise.py +++ b/topaz/commands/denoise.py @@ -431,6 +431,8 @@ def main(args): torch.save(model, path) if use_cuda: model.cuda() + + models = [model] else: # load the saved model(s) models = [] From 6ca7cfd47b343d1f614ce16d4d13de633da4fc1e Mon Sep 17 00:00:00 2001 From: Tristan Bepler Date: Tue, 3 Dec 2019 14:46:22 -0500 Subject: [PATCH 4/5] updated denoise training to allow fraction of image pairs used for validation to be specified on the command line --- setup.py | 2 +- topaz/_version.py | 2 +- topaz/commands/denoise.py | 19 ++++++++++++------- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/setup.py b/setup.py index b81d848..41ca3fa 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages name = 'topaz-em' -version = '0.2.3' +version = '0.2.4a' description = 'Particle picking with positive-unlabeled CNNs' long_description = 'Particle picking software for single particle cryo-electron microscopy using convolutional neural networks and positive-unlabeled learning. Includes methods for micrograph denoising.' diff --git a/topaz/_version.py b/topaz/_version.py index d31c31e..eb1b266 100644 --- a/topaz/_version.py +++ b/topaz/_version.py @@ -1 +1 @@ -__version__ = "0.2.3" +__version__ = "0.2.4a" diff --git a/topaz/commands/denoise.py b/topaz/commands/denoise.py index 5a3d1f1..86e2cd5 100644 --- a/topaz/commands/denoise.py +++ b/topaz/commands/denoise.py @@ -42,6 +42,7 @@ def add_arguments(parser): parser.add_argument('-b', '--dir-b', nargs='+', help='directory of training images part B') parser.add_argument('--hdf', help='path to HDF5 file containing training image stack as an alternative to dirA/dirB') parser.add_argument('--preload', action='store_true', help='preload micrographs into RAM') + parser.add_argument('--holdout', type=float, default=0.1, help='fraction of training micrograph pairs to holdout for validation (default: 0.1)') parser.add_argument('--lowpass', type=float, default=1, help='lowpass filter micrographs by this amount (in pixels) before applying the denoising filter. uses a hard lowpass filter (i.e. sinc) (default: no lowpass filtering)') parser.add_argument('--gaussian', type=float, default=0, help='Gaussian filter micrographs with this standard deviation (in pixels) before applying the denoising filter (default: 0)') @@ -76,7 +77,7 @@ def add_arguments(parser): from topaz.utils.image import save_image -def make_paired_images_datasets(dir_a, dir_b, crop, random=np.random, preload=False, cutoff=0): +def make_paired_images_datasets(dir_a, dir_b, crop, random=np.random, holdout=0.1, preload=False, cutoff=0): # train denoising model # make the dataset A = [] @@ -87,7 +88,7 @@ def make_paired_images_datasets(dir_a, dir_b, crop, random=np.random, preload=Fa B.append(dir_b + os.sep + name) # randomly hold out some image pairs for validation - n = int(0.1*len(A)) + n = int(holdout*len(A)) order = random.permutation(len(A)) A_train = [] @@ -110,7 +111,7 @@ def make_paired_images_datasets(dir_a, dir_b, crop, random=np.random, preload=Fa return dataset_train, dataset_val -def make_images_datasets(dir_a, dir_b, crop, random=np.random, cutoff=0): +def make_images_datasets(dir_a, dir_b, crop, random=np.random, holdout=0.1, cutoff=0): # train denoising model # make the dataset paths = [] @@ -122,7 +123,7 @@ def make_images_datasets(dir_a, dir_b, crop, random=np.random, cutoff=0): paths.append(path) # randomly hold out some image pairs for validation - n = int(0.1*len(paths)) + n = int(holdout*len(paths)) order = random.permutation(len(paths)) path_train = [] @@ -229,7 +230,7 @@ def __getitem__(self, i): # retrieve the i'th image pair return x -def make_hdf5_datasets(path, paired=True, preload=False, cutoff=0): +def make_hdf5_datasets(path, paired=True, preload=False, holdout=0.1, cutoff=0): # open the hdf5 dataset import h5py @@ -242,7 +243,7 @@ def make_hdf5_datasets(path, paired=True, preload=False, cutoff=0): N = len(dataset) # number of image pairs if paired: N = N//2 - n = int(0.1*N) + n = int(holdout*N) split = 2*(N-n) if paired: @@ -317,6 +318,7 @@ def main(args): method = args.method paired = (method == 'noise2noise') preload = args.preload + holdout = args.holdout # fraction of image pairs to holdout for validation if args.hdf is None: #use dirA/dirB crop = args.crop @@ -331,13 +333,15 @@ def main(args): if paired: dataset_train, dataset_val = make_paired_images_datasets(dir_a, dir_b, crop , random=random + , holdout=holdout , preload=preload , cutoff=cutoff ) else: dataset_train, dataset_val = make_images_datasets(dir_a, dir_b, crop , cutoff=cutoff - , random=random) + , random=random + , holdout=holdout) dset_train.append(dataset_train) dset_val.append(dataset_val) @@ -357,6 +361,7 @@ def main(args): else: # make HDF datasets dataset_train, dataset_val = make_hdf5_datasets(args.hdf, paired=paired , cutoff=cutoff + , holdout=holdout , preload=preload) shuffle = preload From 50edb44ffd2a3921be530331fc42e9865e8ddbd8 Mon Sep 17 00:00:00 2001 From: tbepler Date: Mon, 10 Feb 2020 12:15:17 -0500 Subject: [PATCH 5/5] added error message when topaz does not detect any training particles in the input --- topaz/commands/train.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/topaz/commands/train.py b/topaz/commands/train.py index 6cad0e8..6df8675 100644 --- a/topaz/commands/train.py +++ b/topaz/commands/train.py @@ -266,6 +266,10 @@ def load_data(train_images, train_targets, test_images, test_targets, radius num_micrographs = sum(len(train_images[k]) for k in train_images.keys()) num_particles = len(train_targets) report('Loaded {} training micrographs with {} labeled particles'.format(num_micrographs, num_particles)) + if num_particles == 0: + print('ERROR: no training particles specified. Check that micrograph names in the particles file match those in the micrographs file/directory.', file=sys.stderr) + raise Exception('No training particles.') + train_images, train_targets = match_images_targets(train_images, train_targets, radius)
Input picks
Input micrographs
Output stack
Output stack
Boxsize
Threshold
Threshold
Image extension
Resize particles