diff --git a/api/security.py b/api/security.py index 7aab1b3b..9c95d86f 100644 --- a/api/security.py +++ b/api/security.py @@ -25,7 +25,8 @@ @cache class CachedContent: - """A static class managing caches proposals/visits for each user. + """ + A static class managing caches proposals/visits for each user. Proposals should be collected when has_expired() returns True. Content can be written (when the cache for the user has expired) and read using the set/get methods. @@ -185,9 +186,22 @@ def ping_configured_connector() -> bool: class ISpyBSafeQuerySet(viewsets.ReadOnlyModelViewSet): + """ + This ISpyBSafeQuerySet, which inherits from the DRF viewsets.ReadOnlyModelViewSet, + is used for all views that need to yield (filter) view objects based on a + user's proposal membership. This requires the view to define the property + "filter_permissions" to enable this class to navigate to the view object's Project + (proposal/visit). + + As the ISpyBSafeQuerySet is based on a ReadOnlyModelViewSet, which only provides + implementations for list() and retrieve() methods, the user will need to provide + "mixins" for any additional methods the view needs to support (PATCH, PUT, DELETE). + """ + def get_queryset(self): """ - Optionally restricts the returned purchases to a given proposals + Restricts the returned records to those that belong to proposals + the user has access to. Without a user only 'open' proposals are returned. """ # The list of proposals this user can have proposal_list = self.get_proposals_for_user(self.request.user) @@ -199,7 +213,7 @@ def get_queryset(self): # Must have a foreign key to a Project for this filter to work. # get_q_filter() returns a Q expression for filtering - q_filter = self.get_q_filter(proposal_list) + q_filter = self._get_q_filter(proposal_list) return self.queryset.filter(q_filter).distinct() def _get_open_proposals(self): @@ -267,8 +281,8 @@ def _get_proposals_for_user_from_ispyb(self, user): return cached_prop_ids def _get_proposals_from_connector(self, user, conn): - """Updates the USER_LIST_DICT with the results of a query - and marks it as populated. + """ + Updates the user's proposal cache with the results of a query """ assert user assert conn @@ -327,8 +341,19 @@ def _get_proposals_from_connector(self, user, conn): ) CachedContent.set_content(user.username, prop_id_set) + def user_is_member_of_any_given_proposals(self, user, proposals): + """ + Returns true if the user has access to any proposal in the given + proposals list.Only one needs to match for permission to be granted. + We 'restrict_to_membership' to only consider proposals the user + has explicit membership. + """ + user_proposals = self.get_proposals_for_user(user, restrict_to_membership=True) + return any(proposal in user_proposals for proposal in proposals) + def get_proposals_for_user(self, user, restrict_to_membership=False): - """Returns a list of proposals that the user has access to. + """ + Returns a list of proposals that the user has access to. If 'restrict_to_membership' is set only those proposals/visits where the user is a member of the visit will be returned. Otherwise the 'public' @@ -367,7 +392,7 @@ def get_proposals_for_user(self, user, restrict_to_membership=False): # Return the set() as a list() return list(proposals) - def get_q_filter(self, proposal_list): + def _get_q_filter(self, proposal_list): """Returns a Q expression representing a (potentially complex) table filter.""" if self.filter_permissions: # Q-filter is based on the filter_permissions string diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index 0ccf7555..5db2ef7a 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -7,20 +7,21 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Tuple +import numpy as np from openpyxl.utils import get_column_letter os.environ.setdefault("DJANGO_SETTINGS_MODULE", "fragalysis.settings") import django django.setup() -from django.conf import settings - -logger = logging.getLogger(__name__) +from django.conf import settings +from django.core.exceptions import MultipleObjectsReturned from django.core.files.base import ContentFile from django.core.files.storage import default_storage +from django.db.models import F, TextField, Value +from django.db.models.expressions import Func from rdkit import Chem -from rdkit.Chem import Crippen, Descriptors from viewer.models import ( Compound, @@ -34,7 +35,13 @@ TextScoreValues, User, ) -from viewer.utils import add_props_to_sdf_molecule, is_url, word_count +from viewer.utils import add_props_to_sdf_molecule, alphanumerator, is_url, word_count + +logger = logging.getLogger(__name__) + + +# maximum distance between corresponding atoms in poses +_DIST_LIMIT = 0.5 def dataType(a_str: str) -> str: @@ -132,6 +139,7 @@ def __init__( version, zfile, zfile_hashvals, + computed_set_name, ): self.user_id = user_id self.sdf_filename = sdf_filename @@ -141,6 +149,7 @@ def __init__( self.version = version self.zfile = zfile self.zfile_hashvals = zfile_hashvals + self.computed_set_name = computed_set_name def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None: for key in zfile_hashvals.keys(): @@ -254,41 +263,49 @@ def get_site_observation( return site_obvs - def create_mol(self, inchi, long_inchi=None, name=None) -> Compound: + def create_mol(self, inchi, target, name=None) -> Compound: # check for an existing compound, returning a Compound - if long_inchi: - cpd = Compound.objects.filter(long_inchi=long_inchi) - sanitized_mol = Chem.MolFromInchi(long_inchi, sanitize=True) - else: - cpd = Compound.objects.filter(inchi=inchi) - sanitized_mol = Chem.MolFromInchi(inchi, sanitize=True) - - new_mol = cpd[0] if len(cpd) != 0 else Compound() - new_mol.smiles = Chem.MolToSmiles(sanitized_mol) - new_mol.inchi = inchi - if long_inchi: - new_mol.long_inchi = long_inchi - new_mol.identifier = name - - # descriptors - new_mol.mol_log_p = Crippen.MolLogP(sanitized_mol) - new_mol.mol_wt = float(Chem.rdMolDescriptors.CalcExactMolWt(sanitized_mol)) - new_mol.heavy_atom_count = Chem.Lipinski.HeavyAtomCount(sanitized_mol) - new_mol.heavy_atom_mol_wt = float(Descriptors.HeavyAtomMolWt(sanitized_mol)) - new_mol.nhoh_count = Chem.Lipinski.NHOHCount(sanitized_mol) - new_mol.no_count = Chem.Lipinski.NOCount(sanitized_mol) - new_mol.num_h_acceptors = Chem.Lipinski.NumHAcceptors(sanitized_mol) - new_mol.num_h_donors = Chem.Lipinski.NumHDonors(sanitized_mol) - new_mol.num_het_atoms = Chem.Lipinski.NumHeteroatoms(sanitized_mol) - new_mol.num_rot_bonds = Chem.Lipinski.NumRotatableBonds(sanitized_mol) - new_mol.num_val_electrons = Descriptors.NumValenceElectrons(sanitized_mol) - new_mol.ring_count = Chem.Lipinski.RingCount(sanitized_mol) - new_mol.tpsa = Chem.rdMolDescriptors.CalcTPSA(sanitized_mol) - - # make sure there is an id so inspirations can be added - new_mol.save() - - return new_mol + + sanitized_mol = Chem.MolFromInchi(inchi, sanitize=True) + Chem.RemoveStereochemistry(sanitized_mol) + inchi = Chem.inchi.MolToInchi(sanitized_mol) + inchi_key = Chem.InchiToInchiKey(inchi) + + try: + # NB! Max said there could be thousands of compounds per + # target so this distinct() here may become a problem + + # fmt: off + cpd = Compound.objects.filter( + computedmolecule__computed_set__target=target, + ).distinct().get( + inchi_key=inchi_key, + ) + # fmt: on + except Compound.DoesNotExist: + cpd = Compound( + smiles=Chem.MolToSmiles(sanitized_mol), + inchi=inchi, + inchi_key=inchi_key, + current_identifier=name, + ) + cpd.save() + except MultipleObjectsReturned as exc: + # NB! when processing new uploads, Compound is always + # fetched by inchi_key, so this shouldn't ever create + # duplicates. Ands LHS uploads do not create inchi_keys, + # so under normal operations duplicates should never + # occur. However there's nothing in the db to prevent + # this, so adding a catch clause and writing a meaningful + # message + logger.error( + 'Duplicate compounds for target %s with inchi key %s.', + target.title, + inchi_key, + ) + raise MultipleObjectsReturned from exc + + return cpd def set_props(self, cpd, props, compound_set) -> List[ScoreDescription]: if 'ref_mols' and 'ref_pdb' not in list(props.keys()): @@ -322,13 +339,10 @@ def set_mol( smiles = Chem.MolToSmiles(mol) inchi = Chem.inchi.MolToInchi(mol) molecule_name = mol.GetProp('_Name') - long_inchi = None - if len(inchi) > 255: - long_inchi = inchi - inchi = inchi[:254] + version = mol.GetProp('version') compound: Compound = self.create_mol( - inchi, name=molecule_name, long_inchi=long_inchi + inchi, compound_set.target, name=molecule_name ) insp = mol.GetProp('ref_mols') @@ -353,12 +367,7 @@ def set_mol( 'No matching molecules found for inspiration frag ' + i ) - if qs.count() > 1: - ids = [m.cmpd.id for m in qs] - ind = ids.index(max(ids)) - ref = qs[ind] - elif qs.count() == 1: - ref = qs[0] + ref = qs.order_by('-cmpd_id').first() insp_frags.append(ref) @@ -385,11 +394,60 @@ def set_mol( # Need a ComputedMolecule before saving. # Check if anything exists already... - existing_computed_molecules = ComputedMolecule.objects.filter( - molecule_name=molecule_name, smiles=smiles, computed_set=compound_set + + # I think, realistically, I only need to check compound + # fmt: off + qs = ComputedMolecule.objects.filter( + compound=compound, + ).annotate( + # names come in format: + # target_name-sequential number-sequential letter, + # e.g. A71EV2A-1-a, hence grabbing the 3rd column + suffix=Func( + F('name'), + Value('-'), + Value(3), + function='split_part', + output_field=TextField(), + ), ) - computed_molecule: Optional[ComputedMolecule] = None + if qs.exists(): + suffix = next( + alphanumerator(start_from=qs.order_by('-suffix').first().suffix) + ) + else: + suffix = 'a' + + # distinct is ran on indexed field, so shouldn't be a problem + number = ComputedMolecule.objects.filter( + computed_set__target=compound_set.target, + ).values('id').distinct().count() + 1 + # fmt: on + + name = f'v{number}{suffix}' + + existing_computed_molecules = [] + for k in qs: + kmol = Chem.MolFromMolBlock(k.sdf_info) + if kmol: + # find distances between corresponding atoms of the + # two conformers. if any one exceeds the _DIST_LIMIT, + # consider it to be a new ComputedMolecule + _, _, atom_map = Chem.rdMolAlign.GetBestAlignmentTransform(mol, kmol) + molconf = mol.GetConformer() + kmolconf = kmol.GetConformer() + small_enough = True + for mol_atom, kmol_atom in atom_map: + molpos = np.array(molconf.GetAtomPosition(mol_atom)) + kmolpos = np.array(kmolconf.GetAtomPosition(kmol_atom)) + distance = np.linalg.norm(molpos - kmolpos) + if distance >= _DIST_LIMIT: + small_enough = False + break + if small_enough: + existing_computed_molecules.append(k) + if len(existing_computed_molecules) == 1: logger.info( 'Using existing ComputedMolecule %s', existing_computed_molecules[0] @@ -400,10 +458,10 @@ def set_mol( for exist in existing_computed_molecules: logger.info('Deleting ComputedMolecule %s', exist) exist.delete() - computed_molecule = ComputedMolecule() - if not computed_molecule: + computed_molecule = ComputedMolecule(name=name) + else: logger.info('Creating new ComputedMolecule') - computed_molecule = ComputedMolecule() + computed_molecule = ComputedMolecule(name=name) if isinstance(ref_so, SiteObservation): code = ref_so.code @@ -414,18 +472,20 @@ def set_mol( pdb_info = ref_so lhs_so = None - assert computed_molecule + # I don't quite understand why the overwrite of existing + # compmol.. but this is how it was, not touching it now + # update: I think it's about updating metadata. moving + # name attribute out so it won't get overwritten computed_molecule.compound = compound - computed_molecule.computed_set = compound_set computed_molecule.sdf_info = Chem.MolToMolBlock(mol) computed_molecule.site_observation_code = code computed_molecule.reference_code = code computed_molecule.molecule_name = molecule_name - computed_molecule.name = f"{target}-{computed_molecule.identifier}" computed_molecule.smiles = smiles computed_molecule.pdb = lhs_so # TODO: this is wrong computed_molecule.pdb_info = pdb_info + computed_molecule.version = version # Extract possible reference URL and Rationale # URLs have to be valid URLs and rationals must contain more than one word ref_url: Optional[str] = ( @@ -447,6 +507,8 @@ def set_mol( # Done computed_molecule.save() + compound_set.computed_molecules.add(computed_molecule) + # No update the molecule in the original file... add_props_to_sdf_molecule( sdf_file=filename, @@ -530,50 +592,51 @@ def task(self) -> ComputedSet: ) # Do we have any existing ComputedSets? - # Ones with the same method and upload date? - today: datetime.date = datetime.date.today() - existing_sets: List[ComputedSet] = ComputedSet.objects.filter( - method=truncated_submitter_method, upload_date=today - ).all() - # If so, find the one with the highest ordinal. - latest_ordinal: int = 0 - for exiting_set in existing_sets: - assert exiting_set.md_ordinal > 0 - if exiting_set.md_ordinal > latest_ordinal: - latest_ordinal = exiting_set.md_ordinal - if latest_ordinal: - logger.info( - 'Found existing ComputedSets for method "%s" on %s (%d) with ordinal=%d', - truncated_submitter_method, - str(today), - len(existing_sets), - latest_ordinal, + try: + computed_set = ComputedSet.objects.get(name=self.computed_set_name) + # refresh some attributes + computed_set.md_ordinal = F('md_ordinal') + 1 + computed_set.upload_date = datetime.date.today() + computed_set.save() + except ComputedSet.DoesNotExist: + # no, create new + + today: datetime.date = datetime.date.today() + new_ordinal: int = 1 + try: + target = Target.objects.get(title=self.target) + except Target.DoesNotExist as exc: + # probably wrong target name supplied + logger.error('Target %s does not exist', self.target) + raise Target.DoesNotExist from exc + + cs_name: str = ( + f'{truncated_submitter_method}-{str(today)}-' + + f'{get_column_letter(new_ordinal)}' ) - # ordinals are 1-based - new_ordinal: int = latest_ordinal + 1 - - # The computed set "name" consists of the "method", - # today's date and a 2-digit ordinal. The ordinal - # is used to distinguish between computed sets uploaded - # with the same method on the same day. - assert new_ordinal > 0 - cs_name: str = f"{truncated_submitter_method}-{str(today)}-{get_column_letter(new_ordinal)}" - logger.info('Creating new ComputedSet "%s"', cs_name) - - computed_set: ComputedSet = ComputedSet() - computed_set.name = cs_name - computed_set.md_ordinal = new_ordinal - computed_set.upload_date = today - computed_set.method = self.submitter_method[: ComputedSet.LENGTH_METHOD] - computed_set.target = Target.objects.get(title=self.target) - computed_set.spec_version = float(self.version.strip('ver_')) - if self.user_id: - computed_set.owner_user = User.objects.get(id=self.user_id) - else: - # The User ID may only be None if AUTHENTICATE_UPLOAD is False. - # Here the ComputedSet owner will take on a default (anonymous) value. - assert settings.AUTHENTICATE_UPLOAD is False - computed_set.save() + logger.info('Creating new ComputedSet "%s"', cs_name) + + computed_set = ComputedSet( + name=cs_name, + md_ordinal=new_ordinal, + upload_date=today, + method=self.submitter_method[: ComputedSet.LENGTH_METHOD], + target=target, + spec_version=float(self.version.strip('ver_')), + ) + if self.user_id: + try: + computed_set.owner_user = User.objects.get(id=self.user_id) + except User.DoesNotExist as exc: + logger.error('User %s does not exist', self.user_id) + raise User.DoesNotExist from exc + + else: + # The User ID may only be None if AUTHENTICATE_UPLOAD is False. + # Here the ComputedSet owner will take on a default (anonymous) value. + assert settings.AUTHENTICATE_UPLOAD is False + + computed_set.save() # check compound set folder exists. cmp_set_folder = os.path.join( diff --git a/viewer/filters.py b/viewer/filters.py index d5e62e03..d0df524a 100644 --- a/viewer/filters.py +++ b/viewer/filters.py @@ -1,3 +1,5 @@ +import logging + import django_filters from django_filters import rest_framework as filters @@ -12,6 +14,8 @@ XtalformSite, ) +logger = logging.getLogger(__name__) + class SnapshotFilter(filters.FilterSet): session_project = django_filters.CharFilter( diff --git a/viewer/migrations/0056_compound_inchi_key.py b/viewer/migrations/0056_compound_inchi_key.py new file mode 100644 index 00000000..d0591072 --- /dev/null +++ b/viewer/migrations/0056_compound_inchi_key.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.25 on 2024-06-11 13:10 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0055_merge_20240516_1003'), + ] + + operations = [ + migrations.AddField( + model_name='compound', + name='inchi_key', + field=models.CharField(blank=True, db_index=True, max_length=80), + ), + ] diff --git a/viewer/migrations/0057_auto_20240612_1348.py b/viewer/migrations/0057_auto_20240612_1348.py new file mode 100644 index 00000000..1dd8e5ac --- /dev/null +++ b/viewer/migrations/0057_auto_20240612_1348.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.25 on 2024-06-12 13:48 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0056_compound_inchi_key'), + ] + + operations = [ + migrations.AddField( + model_name='computedmolecule', + name='version', + field=models.PositiveSmallIntegerField(default=1), + ), + migrations.AlterField( + model_name='compound', + name='inchi_key', + field=models.CharField(blank=True, db_index=True, max_length=27), + ), + ] diff --git a/viewer/migrations/0058_auto_20240614_1016.py b/viewer/migrations/0058_auto_20240614_1016.py new file mode 100644 index 00000000..8e296ed6 --- /dev/null +++ b/viewer/migrations/0058_auto_20240614_1016.py @@ -0,0 +1,35 @@ +# Generated by Django 3.2.25 on 2024-06-14 10:16 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0057_auto_20240612_1348'), + ] + + operations = [ + migrations.CreateModel( + name='ComputedSetComputedMolecule', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('computed_molecule', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='viewer.computedmolecule')), + ('computed_set', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='viewer.computedset')), + ], + ), + migrations.AddConstraint( + model_name='computedsetcomputedmolecule', + constraint=models.UniqueConstraint(fields=('computed_set', 'computed_molecule'), name='unique_computedsetcomputedmolecule'), + ), + migrations.RemoveField( + model_name='computedmolecule', + name='computed_set', + ), + migrations.AddField( + model_name='computedset', + name='computed_molecules', + field=models.ManyToManyField(related_name='computed_set', through='viewer.ComputedSetComputedMolecule', to='viewer.ComputedMolecule'), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index 0d38914f..85678ad0 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -123,7 +123,12 @@ def __str__(self) -> str: return f"{self.title}" def __repr__(self) -> str: - return "" % (self.id, self.title, self.project_id) + return "" % ( + self.id, + self.title, + self.display_name, + self.project_id, + ) class ExperimentUpload(models.Model): @@ -256,6 +261,7 @@ class Compound(models.Model): ) description = models.TextField(blank=True, null=True) comments = models.TextField(blank=True, null=True) + inchi_key = models.CharField(db_index=True, max_length=27, blank=True) objects = models.Manager() filter_manager = CompoundDataManager() @@ -946,6 +952,12 @@ class ComputedSet(models.Model): upload_datetime = models.DateTimeField( null=True, help_text="The datetime the upload was completed" ) + computed_molecules = models.ManyToManyField( + "ComputedMolecule", + through="ComputedSetComputedMolecule", + through_fields=("computed_set", "computed_molecule"), + related_name="computed_set", + ) def __str__(self) -> str: target_title: str = self.target.title if self.target else "None" @@ -973,7 +985,6 @@ class ComputedMolecule(models.Model): null=True, blank=True, ) - computed_set = models.ForeignKey(ComputedSet, on_delete=models.CASCADE) name = models.CharField( max_length=50, help_text="A combination of Target and Identifier" ) @@ -1016,6 +1027,7 @@ class ComputedMolecule(models.Model): max_length=255, help_text="Link to pdb file; user-uploaded pdb or pdb.experiment.pdb_info", ) + version = models.PositiveSmallIntegerField(null=False, default=1) def __str__(self) -> str: return f"{self.smiles}" @@ -1030,6 +1042,24 @@ def __repr__(self) -> str: ) +class ComputedSetComputedMolecule(models.Model): + computed_set = models.ForeignKey(ComputedSet, null=False, on_delete=models.CASCADE) + computed_molecule = models.ForeignKey( + ComputedMolecule, null=False, on_delete=models.CASCADE + ) + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=[ + "computed_set", + "computed_molecule", + ], + name="unique_computedsetcomputedmolecule", + ), + ] + + class ScoreDescription(models.Model): """The names and descriptions of scores that the user uploads with each computed set molecule.""" diff --git a/viewer/permissions.py b/viewer/permissions.py new file mode 100644 index 00000000..95c975e6 --- /dev/null +++ b/viewer/permissions.py @@ -0,0 +1,54 @@ +from rest_framework import permissions +from rest_framework.exceptions import PermissionDenied + +from api.security import ISpyBSafeQuerySet + +_ISPYB_SAFE_QUERY_SET = ISpyBSafeQuerySet() + + +class IsObjectProposalMember(permissions.BasePermission): + """ + Custom permissions to only allow write-access to objects (changes) by users + who are members of the object's proposals. This permissions class should be used + in any view that needs to restrict object modifications to users who are members of + at least one of the object's proposals. This class can be used for objects that + either have one proposal or many. + + If the object has no proposals, the user is granted access. + """ + + def has_object_permission(self, request, view, obj): + # Here we check that the user has access to any proposal the object belongs to. + # Firstly, users must be authenticated + if not request.user.is_authenticated: + return False + # Protect ourselves from views that do not (oddly) + # have a property called 'filter_permissions'... + if not hasattr(view, "filter_permissions"): + raise AttributeError( + "The view object must define a 'filter_permissions' property" + ) + # The object's proposal records (one or many) can be obtained via + # the view's 'filter_permissions' property. A standard + # django property reference, e.g. 'target__project_id'. + object_proposals = [] + attr_value = getattr(obj, view.filter_permissions) + if attr_value.__class__.__name__ == "ManyRelatedManager": + # Potential for many proposals... + object_proposals = [p.title for p in attr_value.all()] + else: + # Only one proposal... + object_proposals = [attr_value.title] + # Now we have the proposals the object belongs to + # has the user been associated (in IPSpyB) with any of them? + if ( + object_proposals + and not _ISPYB_SAFE_QUERY_SET.user_is_member_of_any_given_proposals( + user=request.user, proposals=object_proposals + ) + ): + raise PermissionDenied( + detail="Your authority to access this object has not been given" + ) + # User is a member of at least one of the object's proposals... + return True diff --git a/viewer/target_loader.py b/viewer/target_loader.py index f0c01bb8..8a3cbb15 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1,11 +1,9 @@ import contextlib import functools import hashlib -import itertools import logging import math import os -import string import tarfile import uuid from collections.abc import Callable @@ -13,7 +11,7 @@ from enum import Enum from pathlib import Path from tempfile import TemporaryDirectory -from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, TypeVar +from typing import Any, Dict, Iterable, List, Optional, Tuple, TypeVar import yaml from celery import Task @@ -46,6 +44,7 @@ XtalformQuatAssembly, XtalformSite, ) +from viewer.utils import alphanumerator logger = logging.getLogger(__name__) @@ -277,29 +276,6 @@ def calculate_sha256(filepath) -> str: return sha256_hash.hexdigest() -def alphanumerator(start_from: str = "") -> Generator[str, None, None]: - """Return alphabetic generator (A, B .. AA, AB...) starting from a specified point.""" - - # since product requries finite maximum return string length set - # to 10 characters. that should be enough for fragalysis (and to - # cause database issues) - generator = ( - "".join(word) - for word in itertools.chain.from_iterable( - itertools.product(string.ascii_lowercase, repeat=i) for i in range(1, 11) - ) - ) - - # Drop values until the starting point is reached - if start_from is not None and start_from != '': - start_from = start_from.lower() - generator = itertools.dropwhile(lambda x: x != start_from, generator) # type: ignore[assignment] - # and drop one more, then it starts from after the start from as it should - _ = next(generator) - - return generator - - def strip_version(s: str, separator: str = "/") -> Tuple[str, int]: # format something like XX01ZVNS2B-x0673/B/501/1 # remove tailing '1' diff --git a/viewer/tasks.py b/viewer/tasks.py index cd360587..2274fdd4 100644 --- a/viewer/tasks.py +++ b/viewer/tasks.py @@ -86,6 +86,7 @@ def process_compound_set(validate_output): logger.warning('process_compound_set() EXIT params=%s (not validated)', params) return process_stage, validate_dict, validated + computed_set_name = params.get('update', None) submitter_name, submitter_method, blank_version = blank_mol_vals(params['sdf']) zfile, zfile_hashvals = PdbOps().run(params) @@ -100,6 +101,7 @@ def process_compound_set(validate_output): version=blank_version, zfile=zfile, zfile_hashvals=zfile_hashvals, + computed_set_name=computed_set_name, ) compound_set = save_mols.task() @@ -186,6 +188,7 @@ def validate_compound_set(task_params): 'sdf': sdf_file, 'target': target, 'pdb_zip': zfile, + 'update': update, } # Protect ourselves from an empty, blank or missing SD file. diff --git a/viewer/utils.py b/viewer/utils.py index b7b72feb..e170a0ee 100644 --- a/viewer/utils.py +++ b/viewer/utils.py @@ -4,13 +4,15 @@ Collection of technical methods tidied up in one location. """ import fnmatch +import itertools import json import logging import os import shutil +import string import tempfile from pathlib import Path -from typing import Dict, Optional +from typing import Dict, Generator, Optional from urllib.parse import urlparse from django.conf import settings @@ -405,3 +407,26 @@ def restore_curated_tags(filename: str) -> None: except IntegrityError as exc: logger.error(exc) + + +def alphanumerator(start_from: str = "") -> Generator[str, None, None]: + """Return alphabetic generator (A, B .. AA, AB...) starting from a specified point.""" + + # since product requries finite maximum return string length set + # to 10 characters. that should be enough for fragalysis (and to + # cause database issues) + generator = ( + "".join(word) + for word in itertools.chain.from_iterable( + itertools.product(string.ascii_lowercase, repeat=i) for i in range(1, 11) + ) + ) + + # Drop values until the starting point is reached + if start_from is not None and start_from != '': + start_from = start_from.lower() + generator = itertools.dropwhile(lambda x: x != start_from, generator) # type: ignore[assignment] + # and drop one more, then it starts from after the start from as it should + _ = next(generator) + + return generator diff --git a/viewer/views.py b/viewer/views.py index 93d6b12c..f1b58e35 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -24,7 +24,7 @@ from django.shortcuts import get_object_or_404, redirect, render from django.urls import reverse from django.views import View -from rest_framework import permissions, status, viewsets +from rest_framework import mixins, permissions, status, viewsets from rest_framework.exceptions import ParseError from rest_framework.parsers import BaseParser from rest_framework.response import Response @@ -35,6 +35,7 @@ from api.utils import get_highlighted_diffs, get_params, pretty_request from service_status.models import Service from viewer import filters, models, serializers +from viewer.permissions import IsObjectProposalMember from viewer.squonk2_agent import ( AccessParams, CommonParams, @@ -83,6 +84,107 @@ _SQ2A: Squonk2Agent = get_squonk2_agent() +# --------------------------- +# ENTRYPOINT FOR THE FRONTEND +# --------------------------- + + +def react(request): + """We "START HERE". This is the first API call that the front-end calls.""" + + discourse_api_key = settings.DISCOURSE_API_KEY + + # Start building the context that will be passed to the template + context = {'legacy_url': settings.LEGACY_URL} + + # Is the Squonk2 Agent configured? + logger.info("Checking whether Squonk2 is configured...") + sq2_rv = _SQ2A.configured() + if sq2_rv.success: + logger.info("Squonk2 is configured") + context['squonk_available'] = 'true' + else: + logger.info("Squonk2 is NOT configured") + context['squonk_available'] = 'false' + + context['discourse_available'] = 'true' if discourse_api_key else 'false' + user = request.user + if user.is_authenticated: + context['discourse_host'] = '' + context['user_present_on_discourse'] = 'false' + # If user is authenticated and a discourse api key is available, then check discourse to + # see if user is set up and set up flag in context. + if discourse_api_key: + context['discourse_host'] = settings.DISCOURSE_HOST + _, _, user_id = check_discourse_user(user) + if user_id: + context['user_present_on_discourse'] = 'true' + + # User is authenticated, so if Squonk can be called + # return the Squonk UI URL + # so the f/e knows where to go. + context['squonk_ui_url'] = '' + if sq2_rv.success and check_squonk_active(request): + context['squonk_ui_url'] = _SQ2A.get_ui_url() + + return render(request, "viewer/react_temp.html", context) + + +def save_pdb_zip(pdb_file): + zf = zipfile.ZipFile(pdb_file) + zip_lst = zf.namelist() + zfile = {} + zfile_hashvals: Dict[str, str] = {} + print(zip_lst) + for filename in zip_lst: + # only handle pdb files + if filename.split('.')[-1] == 'pdb': + f = filename.split('/')[0] + save_path = os.path.join(settings.MEDIA_ROOT, 'tmp', f) + if default_storage.exists(f): + rand_str = uuid.uuid4().hex + pdb_path = default_storage.save( + save_path.replace('.pdb', f'-{rand_str}.pdb'), + ContentFile(zf.read(filename)), + ) + # Test if Protein object already exists + # code = filename.split('/')[-1].replace('.pdb', '') + # test_pdb_code = filename.split('/')[-1].replace('.pdb', '') + # test_prot_objs = Protein.objects.filter(code=test_pdb_code) + # + # if len(test_prot_objs) > 0: + # # make a unique pdb code as not to overwrite existing object + # rand_str = uuid.uuid4().hex + # test_pdb_code = f'{code}#{rand_str}' + # zfile_hashvals[code] = rand_str + # + # fn = test_pdb_code + '.pdb' + # + # pdb_path = default_storage.save('tmp/' + fn, + # ContentFile(zf.read(filename))) + else: + pdb_path = default_storage.save( + save_path, ContentFile(zf.read(filename)) + ) + test_pdb_code = pdb_path.split('/')[-1].replace('.pdb', '') + zfile[test_pdb_code] = pdb_path + + # Close the zip file + if zf: + zf.close() + + return zfile, zfile_hashvals + + +def save_tmp_file(myfile): + """Save file in temporary location for validation/upload processing""" + + name = myfile.name + path = default_storage.save('tmp/' + name, ContentFile(myfile.read())) + tmp_file = str(os.path.join(settings.MEDIA_ROOT, path)) + + return tmp_file + class CompoundIdentifierTypeView(viewsets.ModelViewSet): queryset = models.CompoundIdentifierType.objects.all() @@ -190,30 +292,29 @@ class ProjectView(ISpyBSafeQuerySet): filter_permissions = "" -class TargetView(ISpyBSafeQuerySet): - """Targets (api/targets)""" - +class TargetView(mixins.UpdateModelMixin, ISpyBSafeQuerySet): queryset = models.Target.objects.filter() serializer_class = serializers.TargetSerializer filter_permissions = "project_id" filterset_fields = ("title",) + permission_classes = [IsObjectProposalMember] def patch(self, request, pk): try: target = self.queryset.get(pk=pk) except models.Target.DoesNotExist: + msg = f"Target pk={pk} does not exist" + logger.warning(msg) return Response( - {"message": f"Target pk={pk} does not exist"}, + {"message": msg}, status=status.HTTP_404_NOT_FOUND, ) serializer = self.serializer_class(target, data=request.data, partial=True) if serializer.is_valid(): - logger.debug("serializer data: %s", serializer.validated_data) - serializer.save() + _ = serializer.save() return Response(serializer.data, status=status.HTTP_200_OK) else: - logger.debug("serializer error: %s", serializer.errors) return Response( {"message": "wrong parameters"}, status=status.HTTP_400_BAD_REQUEST ) @@ -228,109 +329,6 @@ class CompoundView(ISpyBSafeQuerySet): filterset_class = filters.CompoundFilter -def react(request): - """We "START HERE". This is the first API call that the front-end calls.""" - - discourse_api_key = settings.DISCOURSE_API_KEY - - context = {} - - # Legacy URL (a n optional prior stack) - # May be blank ('') - context['legacy_url'] = settings.LEGACY_URL - - # Is the Squonk2 Agent configured? - logger.info("Checking whether Squonk2 is configured...") - sq2_rv = _SQ2A.configured() - if sq2_rv.success: - logger.info("Squonk2 is configured") - context['squonk_available'] = 'true' - else: - logger.info("Squonk2 is NOT configured") - context['squonk_available'] = 'false' - - if discourse_api_key: - context['discourse_available'] = 'true' - else: - context['discourse_available'] = 'false' - - user = request.user - if user.is_authenticated: - context['discourse_host'] = '' - context['user_present_on_discourse'] = 'false' - # If user is authenticated and a discourse api key is available, then check discourse to - # see if user is set up and set up flag in context. - if discourse_api_key: - context['discourse_host'] = settings.DISCOURSE_HOST - _, _, user_id = check_discourse_user(user) - if user_id: - context['user_present_on_discourse'] = 'true' - - # If user is authenticated Squonk can be called then return the Squonk host - # so the Frontend can navigate to it - context['squonk_ui_url'] = '' - if sq2_rv.success and check_squonk_active(request): - context['squonk_ui_url'] = _SQ2A.get_ui_url() - - return render(request, "viewer/react_temp.html", context) - - -def save_pdb_zip(pdb_file): - zf = zipfile.ZipFile(pdb_file) - zip_lst = zf.namelist() - zfile = {} - zfile_hashvals: Dict[str, str] = {} - print(zip_lst) - for filename in zip_lst: - # only handle pdb files - if filename.split('.')[-1] == 'pdb': - f = filename.split('/')[0] - save_path = os.path.join(settings.MEDIA_ROOT, 'tmp', f) - if default_storage.exists(f): - rand_str = uuid.uuid4().hex - pdb_path = default_storage.save( - save_path.replace('.pdb', f'-{rand_str}.pdb'), - ContentFile(zf.read(filename)), - ) - # Test if Protein object already exists - # code = filename.split('/')[-1].replace('.pdb', '') - # test_pdb_code = filename.split('/')[-1].replace('.pdb', '') - # test_prot_objs = Protein.objects.filter(code=test_pdb_code) - # - # if len(test_prot_objs) > 0: - # # make a unique pdb code as not to overwrite existing object - # rand_str = uuid.uuid4().hex - # test_pdb_code = f'{code}#{rand_str}' - # zfile_hashvals[code] = rand_str - # - # fn = test_pdb_code + '.pdb' - # - # pdb_path = default_storage.save('tmp/' + fn, - # ContentFile(zf.read(filename))) - else: - pdb_path = default_storage.save( - save_path, ContentFile(zf.read(filename)) - ) - test_pdb_code = pdb_path.split('/')[-1].replace('.pdb', '') - zfile[test_pdb_code] = pdb_path - - # Close the zip file - if zf: - zf.close() - - return zfile, zfile_hashvals - - -def save_tmp_file(myfile): - """Save file in temporary location for validation/upload processing""" - - name = myfile.name - path = default_storage.save('tmp/' + name, ContentFile(myfile.read())) - tmp_file = str(os.path.join(settings.MEDIA_ROOT, path)) - - return tmp_file - - class UploadCSet(APIView): """Render and control viewer/upload-cset.html - a page allowing upload of computed sets. Validation and upload tasks are defined in `viewer.compound_set_upload`, `viewer.sdf_check` and `viewer.tasks` and the task @@ -459,7 +457,6 @@ def post(self, request): request.session[_SESSION_ERROR], ) return redirect('viewer:upload_cset') - # You cannot validate or upload a set # unless the user is part of the Target's project (proposal) # even if the target is 'open'. @@ -571,7 +568,34 @@ def post(self, request): assert selected_set written_sdf_filename = selected_set.written_sdf_filename selected_set_name = selected_set.name + + # related objects: + # - ComputedSetComputedMolecule + # - ComputedMolecule + # - NumericalScoreValues + # - TextScoreValues + # - ComputedMolecule_computed_inspirations + # - Compound + + # all but ComputedMolecule are handled automatically + # but (because of the m2m), have to delete those + # separately + + # select ComputedMolecule objects that are in this set + # and not in any other sets + # fmt: off + selected_set.computed_molecules.exclude( + pk__in=models.ComputedMolecule.objects.filter( + computed_set__in=models.ComputedSet.objects.filter( + target=selected_set.target, + ).exclude( + pk=selected_set.pk, + ), + ), + ).delete() + # fmt: on selected_set.delete() + # ...and the original (expected) file if os.path.isfile(written_sdf_filename): os.remove(written_sdf_filename) @@ -987,6 +1011,8 @@ class SessionProjectsView(viewsets.ModelViewSet): """ queryset = models.SessionProject.objects.filter() + filter_permissions = "target__project_id" + filterset_fields = '__all__' def get_serializer_class(self): """Determine which serializer to use based on whether the request is a GET or a POST, PUT or PATCH request @@ -1003,9 +1029,6 @@ def get_serializer_class(self): # (POST, PUT, PATCH) return serializers.SessionProjectWriteSerializer - filter_permissions = "target_id__project_id" - filterset_fields = '__all__' - class SessionActionsView(viewsets.ModelViewSet): """View to retrieve information about actions relating to sessions_project (GET).