Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tem with install dependancies #47

Merged
merged 12 commits into from
Jan 24, 2022
Merged
86 changes: 76 additions & 10 deletions simSPI/tem.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Wrapper for the TEM Simulator."""
import random
import string
import subprocess
from pathlib import Path

import numpy as np
Expand All @@ -23,7 +24,13 @@ class TEMSimulator:
"""

def __init__(self, path_config, sim_config):
self.output_path_dict = self.generate_path_dict(path_config)

with open(path_config, "r") as stream:
parsed_path_config = yaml.safe_load(stream)

self.output_path_dict = self.generate_path_dict(**parsed_path_config)
self.output_path_dict["local_sim_dir"] = parsed_path_config["local_sim_dir"]

self.sim_dict = self.get_config_from_yaml(sim_config)

self.parameter_dict = io.fill_parameters_dictionary(
Expand Down Expand Up @@ -136,7 +143,7 @@ def classify_input_config(raw_params):
return classified_sim_params

@staticmethod
def generate_path_dict(pdb_file, output_dir=None, mrc_keyword=None):
def generate_path_dict(pdb_file, output_dir=None, mrc_keyword=None, **kwargs):
"""Return the paths to pdb, crd, log, inp, and h5 files as strings.

Parameters
Expand All @@ -147,6 +154,9 @@ def generate_path_dict(pdb_file, output_dir=None, mrc_keyword=None):
Relative path to output directory
mrc_keyword : str, (default = None)
user-specified keyword appended to output files
kwargs
Arbitrary keyword arguments.

Comment on lines +157 to +159
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this? I don't see them passed in the function?

Returns
-------
path_dict : dict of type str to str
Expand Down Expand Up @@ -175,18 +185,49 @@ def generate_path_dict(pdb_file, output_dir=None, mrc_keyword=None):
mrc_keyword = str(Path(pdb_file).stem) + "".join(
random.choices(string.ascii_uppercase + string.digits, k=5)
)
output_file_path = output_dir + mrc_keyword

path_dict["pdb_file"] = pdb_file
path_dict["crd_file"] = output_file_path + ".txt"
path_dict["mrc_file"] = output_file_path + ".mrc"
path_dict["log_file"] = output_file_path + ".log"
path_dict["inp_file"] = output_file_path + ".inp"
path_dict["h5_file"] = output_file_path + ".h5"
path_dict["h5_file_noisy"] = output_file_path + "-noisy.h5"
path_dict["pdb_file"] = str(Path(pdb_file))
path_dict["crd_file"] = str(Path(output_dir, mrc_keyword + ".txt"))
path_dict["mrc_file"] = str(Path(output_dir, mrc_keyword + ".mrc"))
path_dict["log_file"] = str(Path(output_dir, mrc_keyword + ".log"))
path_dict["inp_file"] = str(Path(output_dir, mrc_keyword + ".inp"))
path_dict["h5_file"] = str(Path(output_dir, mrc_keyword + ".h5"))
path_dict["h5_file_noisy"] = str(Path(output_dir, mrc_keyword + "-noisy.h5"))

return path_dict

def run(self, pad=5, export_particles=False):
"""Run TEM simulator on input file and produce particle stacks with metadata.

Parameters
----------
pad : double, (default = 5)
Pad to be added to maximal dimension of the object read from pdb_file
Comment on lines +204 to +205
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it in pixel or physical (Angstrom) unit?

export_particles : boolean, (default = False)
Particle data exported to .h5 if True.

Returns
-------
particles : arr
Individual particle data extracted from micrograph
"""
self.create_crd_file(pad)
self.write_inp_file()

micrograph_data = self.get_image_data()
particle_data = self.extract_particles(micrograph_data, pad=pad)

if "other" in self.parameter_dict and (
self.parameter_dict["other"].get("signal_to_noise") is not None
or self.parameter_dict["other"].get("signal_to_noise_db") is not None
):
particle_data = self.apply_gaussian_noise(particle_data)

if export_particles:
self.export_particle_stack(particle_data)

return particle_data

def create_crd_file(self, pad):
"""Format and write molecular model data to crd_file for use in TEM-simulator.

Expand Down Expand Up @@ -222,6 +263,31 @@ def write_inp_file(self):
inp_file=self.output_path_dict["inp_file"], dict_params=self.parameter_dict
)

def get_image_data(self):
"""Run simulator and return data.

Returns
-------
List containing parsed .mrc data from Simulator

Raises
------
subprocess.CalledProcessError
Raised if shell commmand exits with non zero code.

Notes
-----
This method requires a local tem_sim installation to run.
"""
sim_executable = f"{self.output_path_dict['local_sim_dir']}"
input_file_arg = f"{self.output_path_dict['inp_file']}"
subprocess.run([sim_executable, input_file_arg], check=True)

data = io.mrc2data(self.output_path_dict["mrc_file"])
micrograph = data[0, ...]

return micrograph

def extract_particles(self, micrograph, pad):
"""Extract particle data from micrograph.

Expand Down
8 changes: 4 additions & 4 deletions tests/test_files/path_config.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pdb_dir: './test_files'
micrograph_keyword: '_randomrot'
pdb_keyword: '4v6x'
# absolute paths
pdb_file: './test_files/4v6x.pdb'
mrc_keyword: '_randomrot'
output_dir: './test_files'
simulator_dir: '../../../../TEM-simulator/src/TEM-simulator'
local_sim_dir: '../../../../TEM-simulator/src/TEM-simulator'
31 changes: 31 additions & 0 deletions tests/test_tem.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,3 +246,34 @@ def test_export_particle_stack(sample_class, sample_resources):
sample_class.export_particle_stack(particles)
assert os.path.isfile(sample_class.output_path_dict["h5_file"])
assert os.path.isfile(sample_class.output_path_dict["h5_file_noisy"])


def test_get_image_data(sample_class):
"""Test whether mrc data is generated from local tem installation.

Notes
-----
This test requires a local TEM sim installation to run.
"""
sample_class.write_inp_file()
data = sample_class.get_image_data()
assert os.path.isfile(sample_class.output_path_dict["log_file"])
assert os.path.isfile(sample_class.output_path_dict["mrc_file"])
assert data.shape == (4092, 5760)


def test_run(sample_class):
"""Test whether run returns and exports particles with expected shape.

Notes
-----
This test requires a local TEM sim installation to run.
"""
particles = sample_class.run(export_particles=True)
assert particles.shape == (
35,
809,
809,
)
assert os.path.isfile(sample_class.output_path_dict["h5_file"])
assert os.path.isfile(sample_class.output_path_dict["h5_file_noisy"])