From 6afddf9fe5d5caebe68550685518ee4a28cdc9ed Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 10 Apr 2024 14:43:20 +0100 Subject: [PATCH] stashing Changes so far: - removed endpoint FirstAssemblyview - moved the functionality to template_protein field in TargetSerializer - removed TargetMoleculesserializer - removed sequences field from TargetSerializer This is a result of Boris' comment in github (https://github.com/m2ms/fragalysis-frontend/issues/1373#issuecomment-2047417958) where he said the field isn't used and template_protein field is not used. Looking at the code where this may be used, revealed that Targetmoleculesserializer can be removed as well NB! they're not removed-removed right now, only commented in. This commit can be used to restore the code. --- api/urls.py | 12 +- viewer/serializers.py | 634 ++++++++++++++++++++++++------------------ viewer/views.py | 32 +-- 3 files changed, 378 insertions(+), 300 deletions(-) diff --git a/api/urls.py b/api/urls.py index c2c84a39..2199fe11 100644 --- a/api/urls.py +++ b/api/urls.py @@ -88,9 +88,9 @@ viewer_views.SessionProjectTagView, basename='session_project_tag', ) -router.register( - "target_molecules", viewer_views.TargetMoleculesView, basename='target_molecules' -) +# router.register( +# "target_molecules", viewer_views.TargetMoleculesView, basename='target_molecules' +# ) # Download a zip file of the requested contents router.register( @@ -133,9 +133,9 @@ router.register("job_callback", viewer_views.JobCallBackView, basename='job_callback') router.register("job_config", viewer_views.JobConfigView, basename='job_config') router.register("job_override", viewer_views.JobOverrideView, basename='job_override') -router.register( - "first_assembly", viewer_views.FirstAssemblyView, basename='first_assembly' -) +# router.register( +# "first_assembly", viewer_views.FirstAssemblyView, basename='first_assembly' +# ) from rest_framework import response, schemas diff --git a/viewer/serializers.py b/viewer/serializers.py index 9e2ee6a2..205f947e 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -1,13 +1,15 @@ import logging -import os + +# import os from pathlib import Path from urllib.parse import urljoin import yaml from django.conf import settings from django.contrib.auth.models import User -from django.contrib.postgres.aggregates import ArrayAgg -from django.db.models import F + +# from django.contrib.postgres.aggregates import ArrayAgg +# from django.db.models import F from frag.network.decorate import get_3d_vects_for_mol, get_vect_indices_for_mol from frag.network.query import get_full_graph from rdkit import Chem @@ -16,8 +18,10 @@ from api.security import ISpyBSafeQuerySet from api.utils import draw_mol, validate_tas -from scoring.models import SiteObservationGroup + +# from scoring.models import SiteObservationGroup from viewer import models +from viewer.target_loader import XTALFORMS_FILE from viewer.target_set_upload import sanitize_mol from viewer.utils import get_https_host @@ -32,98 +36,98 @@ class Meta: fields = "__all__" -def get_protein_sequences(pdb_block): - sequence_list = [] - aa = { - 'CYS': 'C', - 'ASP': 'D', - 'SER': 'S', - 'GLN': 'Q', - 'LYS': 'K', - 'ILE': 'I', - 'PRO': 'P', - 'THR': 'T', - 'PHE': 'F', - 'ASN': 'N', - 'GLY': 'G', - 'HIS': 'H', - 'LEU': 'L', - 'ARG': 'R', - 'TRP': 'W', - 'ALA': 'A', - 'VAL': 'V', - 'GLU': 'E', - 'TYR': 'Y', - 'MET': 'M', - } - - current_chain = 'A' - current_sequence = '' - current_number = 0 - - for line in pdb_block.split('\n'): - if line[0:4] == 'ATOM': - residue = line[17:20].strip() - chain = line[21].strip() - n = int(line[22:26].strip()) - - if chain != current_chain: - chain_dict = {'chain': current_chain, 'sequence': current_sequence} - sequence_list.append(chain_dict) - current_sequence = '' - current_chain = chain - if not n == current_number: - if n == current_number + 1: - try: - seqres = aa[residue] - except: - seqres = 'X' - current_sequence += seqres - else: - if not current_number == 0: - gap = n - current_number - gap_str = '' - for _ in range(gap): - gap_str += 'X' - current_sequence += gap_str - current_number = n - - if not sequence_list: - chain_dict = {'chain': current_chain, 'sequence': current_sequence} - sequence_list.append(chain_dict) - - return sequence_list - - -def protein_sequences(obj): - """Common enabler code for Target-related serializers""" - proteins = models.SiteObservation.filter_manager.by_target(target=obj) - protein_file = None - for protein in proteins: - if protein.apo_file: - if not os.path.isfile(protein.apo_file.path): - continue - protein_file = protein.apo_file - break - if not protein_file: - return [{'chain': '', 'sequence': ''}] - - protein_file.open(mode='r') - pdb_block = protein_file.read() - protein_file.close() - - sequences = get_protein_sequences(pdb_block) - return sequences - - -def template_protein(obj): - """Common enabler code for Target-related serializers""" - - proteins = models.SiteObservation.filter_manager.by_target(target=obj) - for protein in proteins: - if protein.apo_file: - return protein.apo_file.url - return "NOT AVAILABLE" +# def get_protein_sequences(pdb_block): +# sequence_list = [] +# aa = { +# 'CYS': 'C', +# 'ASP': 'D', +# 'SER': 'S', +# 'GLN': 'Q', +# 'LYS': 'K', +# 'ILE': 'I', +# 'PRO': 'P', +# 'THR': 'T', +# 'PHE': 'F', +# 'ASN': 'N', +# 'GLY': 'G', +# 'HIS': 'H', +# 'LEU': 'L', +# 'ARG': 'R', +# 'TRP': 'W', +# 'ALA': 'A', +# 'VAL': 'V', +# 'GLU': 'E', +# 'TYR': 'Y', +# 'MET': 'M', +# } + +# current_chain = 'A' +# current_sequence = '' +# current_number = 0 + +# for line in pdb_block.split('\n'): +# if line[0:4] == 'ATOM': +# residue = line[17:20].strip() +# chain = line[21].strip() +# n = int(line[22:26].strip()) + +# if chain != current_chain: +# chain_dict = {'chain': current_chain, 'sequence': current_sequence} +# sequence_list.append(chain_dict) +# current_sequence = '' +# current_chain = chain +# if not n == current_number: +# if n == current_number + 1: +# try: +# seqres = aa[residue] +# except: +# seqres = 'X' +# current_sequence += seqres +# else: +# if not current_number == 0: +# gap = n - current_number +# gap_str = '' +# for _ in range(gap): +# gap_str += 'X' +# current_sequence += gap_str +# current_number = n + +# if not sequence_list: +# chain_dict = {'chain': current_chain, 'sequence': current_sequence} +# sequence_list.append(chain_dict) + +# return sequence_list + + +# def protein_sequences(obj): +# """Common enabler code for Target-related serializers""" +# proteins = models.SiteObservation.filter_manager.by_target(target=obj) +# protein_file = None +# for protein in proteins: +# if protein.apo_file: +# if not os.path.isfile(protein.apo_file.path): +# continue +# protein_file = protein.apo_file +# break +# if not protein_file: +# return [{'chain': '', 'sequence': ''}] + +# protein_file.open(mode='r') +# pdb_block = protein_file.read() +# protein_file.close() + +# sequences = get_protein_sequences(pdb_block) +# return sequences + + +# def template_protein(obj): +# """Common enabler code for Target-related serializers""" + +# proteins = models.SiteObservation.filter_manager.by_target(target=obj) +# for protein in proteins: +# if protein.apo_file: +# return protein.apo_file.url +# return "NOT AVAILABLE" class CompoundIdentifierTypeSerializer(serializers.ModelSerializer): @@ -142,10 +146,80 @@ class TargetSerializer(serializers.ModelSerializer): template_protein = serializers.SerializerMethodField() zip_archive = serializers.SerializerMethodField() metadata = serializers.SerializerMethodField() - sequences = serializers.SerializerMethodField() + # sequences = serializers.SerializerMethodField() def get_template_protein(self, obj): - return template_protein(obj) + exp_upload = ( + models.ExperimentUpload.objects.filter( + target=obj, + ) + .order_by('-commit_datetime') + .first() + ) + + yaml_path = ( + Path(settings.MEDIA_ROOT) + .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY) + .joinpath(exp_upload.task_id) + ) + + # add unpacked zip directory + yaml_path = [d for d in list(yaml_path.glob("*")) if d.is_dir()][0] + + # add upload_[d] dir + yaml_path = next(yaml_path.glob("upload_*")) + + # last components of path, need for reconstruction later + comps = yaml_path.parts[-2:] + + # and the file itself + yaml_path = yaml_path.joinpath(XTALFORMS_FILE) + logger.debug("assemblies path: %s", yaml_path) + if yaml_path.is_file(): + with open(yaml_path, "r", encoding="utf-8") as file: + contents = yaml.safe_load(file) + try: + assemblies = contents["assemblies"] + except KeyError: + logger.error("No 'assemblies' section in '%s'", XTALFORMS_FILE) + return '' + + try: + first = list(assemblies.values())[0] + except IndexError: + logger.error("No assemblies in 'assemblies' section") + return '' + + try: + reference = first["reference"] + except KeyError: + logger.error("No assemblies in 'assemblies' section") + return '' + + ref_path = ( + Path(settings.TARGET_LOADER_MEDIA_DIRECTORY) + .joinpath(exp_upload.task_id) + .joinpath(comps[0]) + .joinpath(comps[1]) + .joinpath("crystallographic_files") + .joinpath(reference) + .joinpath(f"{reference}.pdb") + ) + logger.debug('ref_path: %s', ref_path) + if Path(settings.MEDIA_ROOT).joinpath(ref_path).is_file(): + request = self.context.get('request', None) + if request is not None: + return request.build_absolute_uri( + Path(settings.MEDIA_URL).joinpath(ref_path) + ) + else: + return '' + else: + logger.error("Reference pdb file doesn't exist") + return '' + else: + logger.error("'%s' missing", XTALFORMS_FILE) + return '' def get_zip_archive(self, obj): # The if-check is because the filefield in target has null=True. @@ -159,8 +233,8 @@ def get_metadata(self, obj): return urljoin(get_https_host(self.context["request"]), obj.metadata.url) return - def get_sequences(self, obj): - return protein_sequences(obj) + # def get_sequences(self, obj): + # return protein_sequences(obj) class Meta: model = models.Target @@ -175,7 +249,7 @@ class Meta: "metadata", "zip_archive", "upload_status", - "sequences", + # "sequences", ) extra_kwargs = { "id": {"read_only": True}, @@ -186,7 +260,7 @@ class Meta: "metadata": {"read_only": True}, "zip_archive": {"read_only": True}, "upload_status": {"read_only": True}, - "sequences": {"read_only": True}, + # "sequences": {"read_only": True}, } @@ -733,181 +807,187 @@ class Meta: fields = '__all__' -class FirstAssemblySerializer(serializers.ModelSerializer): - target = serializers.IntegerField() - pdb_file = serializers.SerializerMethodField() - - def get_pdb_file(self, obj): - exp_upload = ( - models.ExperimentUpload.objects.filter( - target__id=obj.target, - ) - .order_by('-commit_datetime') - .first() - ) - - yaml_path = ( - Path(settings.MEDIA_ROOT) - .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY) - .joinpath(exp_upload.task_id) - ) - - # add unpacked zip directory - yaml_path = [d for d in list(yaml_path.glob("*")) if d.is_dir()][0] - - # add upload_[d] dir - yaml_path = next(yaml_path.glob("upload_*")) - - # last components of path, need for reconstruction later - comps = yaml_path.parts[-2:] - - # and the file itself - yaml_path = yaml_path.joinpath('assemblies.yaml') - logger.debug("assemblies path: %s", yaml_path) - if yaml_path.is_file(): - with open(yaml_path, "r", encoding="utf-8") as file: - contents = yaml.safe_load(file) - try: - assemblies = contents["assemblies"] - except KeyError: - logger.error("No 'assemblies' section in 'assemblies.yaml'") - return '' - - try: - first = list(assemblies.values())[0] - except IndexError: - logger.error("No assemblies in 'assemblies' section") - return '' - - try: - reference = first["reference"] - except KeyError: - logger.error("No assemblies in 'assemblies' section") - return '' - - ref_path = ( - Path(settings.TARGET_LOADER_MEDIA_DIRECTORY) - .joinpath(exp_upload.task_id) - .joinpath(comps[0]) - .joinpath(comps[1]) - .joinpath("crystallographic_files") - .joinpath(reference) - .joinpath(f"{reference}.pdb") - ) - logger.debug('ref_path: %s', ref_path) - if Path(settings.MEDIA_ROOT).joinpath(ref_path).is_file(): - return str(ref_path) - else: - logger.error("Reference pdb file doesn't exist") - return '' - else: - logger.error("'assemblies.yaml' missing") - return '' - - class Meta: - model = models.QuatAssembly - fields = ('target', 'pdb_file') - - -class TargetMoleculesSerializer(serializers.ModelSerializer): - template_protein = serializers.SerializerMethodField() - zip_archive = serializers.SerializerMethodField() - metadata = serializers.SerializerMethodField() - sequences = serializers.SerializerMethodField() - molecules = serializers.SerializerMethodField() - tags_info = serializers.SerializerMethodField() - tag_categories = serializers.SerializerMethodField() - - def get_template_protein(self, obj): - return template_protein(obj) - - def get_zip_archive(self, obj): - # The if-check is because the filefield in target has null=True. - # Note that this link will not work on local - if hasattr(obj, 'zip_archive') and obj.zip_archive.name: - return urljoin(get_https_host(self.context["request"]), obj.zip_archive.url) - return - - def get_metadata(self, obj): - if hasattr(obj, 'metadata') and obj.metadata.name: - return urljoin(get_https_host(self.context["request"]), obj.metadata.url) - return - - def get_sequences(self, obj): - return protein_sequences(obj) - - def get_molecules(self, obj): - mols = models.SiteObservation.objects.filter( - experiment__experiment_upload__target__id=obj.id - ).annotate( - # NB! some of the fields are just renamed here, avoiding - # that would simplify things here and remove some lines of - # code. but that means front-end code needs to know about - # the changes - protein_code=F('code'), - molecule_protein=F('experiment__pdb_info'), - sdf_info=F('ligand_mol_file'), - lig_id=F('seq_id'), - tags_set=ArrayAgg("siteobservationtag__pk"), - ) - fields = [ - "id", - "smiles", - "cmpd", - "protein_code", - "molecule_protein", - "lig_id", - "chain_id", - "sdf_info", - "tags_set", - ] - - logger.debug("%s", mols) - logger.debug("%s", mols.values(*fields)) - - molecules = [ - {'data': k, 'tags_set': k['tags_set']} for k in mols.values(*fields) - ] - - return molecules - - def get_tags_info(self, obj): - tags = models.SiteObservationTag.objects.filter(target_id=obj.id) - tags_info = [] - for tag in tags: - tag_data = models.SiteObservationTag.objects.filter(id=tag.id).values() - tag_coords = SiteObservationGroup.objects.filter( - id=tag.mol_group_id - ).values('x_com', 'y_com', 'z_com') - tag_dict = {'data': tag_data, 'coords': tag_coords} - tags_info.append(tag_dict) - - return tags_info - - def get_tag_categories(self, obj): - tag_categories = ( - models.TagCategory.objects.filter(siteobservationtag__target_id=obj.id) - .distinct() - .values() - ) - return tag_categories - - class Meta: - model = models.Target - fields = ( - "id", - "title", - "project_id", - "default_squonk_project", - "template_protein", - "metadata", - "zip_archive", - "upload_status", - "sequences", - "molecules", - "tags_info", - "tag_categories", - ) +# class FirstAssemblySerializer(serializers.ModelSerializer): +# target = serializers.IntegerField() +# pdb_file = serializers.SerializerMethodField() + +# def get_pdb_file(self, obj): +# exp_upload = ( +# models.ExperimentUpload.objects.filter( +# target__id=obj.target, +# ) +# .order_by('-commit_datetime') +# .first() +# ) + +# yaml_path = ( +# Path(settings.MEDIA_ROOT) +# .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY) +# .joinpath(exp_upload.task_id) +# ) + +# # add unpacked zip directory +# yaml_path = [d for d in list(yaml_path.glob("*")) if d.is_dir()][0] + +# # add upload_[d] dir +# yaml_path = next(yaml_path.glob("upload_*")) + +# # last components of path, need for reconstruction later +# comps = yaml_path.parts[-2:] + +# # and the file itself +# yaml_path = yaml_path.joinpath(XTALFORMS_FILE) +# logger.debug("assemblies path: %s", yaml_path) +# if yaml_path.is_file(): +# with open(yaml_path, "r", encoding="utf-8") as file: +# contents = yaml.safe_load(file) +# try: +# assemblies = contents["assemblies"] +# except KeyError: +# logger.error("No 'assemblies' section in '%s'", XTALFORMS_FILE) +# return '' + +# try: +# first = list(assemblies.values())[0] +# except IndexError: +# logger.error("No assemblies in 'assemblies' section") +# return '' + +# try: +# reference = first["reference"] +# except KeyError: +# logger.error("No assemblies in 'assemblies' section") +# return '' + +# ref_path = ( +# Path(settings.TARGET_LOADER_MEDIA_DIRECTORY) +# .joinpath(exp_upload.task_id) +# .joinpath(comps[0]) +# .joinpath(comps[1]) +# .joinpath("crystallographic_files") +# .joinpath(reference) +# .joinpath(f"{reference}.pdb") +# ) +# logger.debug('ref_path: %s', ref_path) +# if Path(settings.MEDIA_ROOT).joinpath(ref_path).is_file(): +# request = self.context.get('request', None) +# if request is not None: +# return request.build_absolute_uri( +# Path(settings.MEDIA_URL).joinpath(ref_path) +# ) +# else: +# return '' +# else: +# logger.error("Reference pdb file doesn't exist") +# return '' +# else: +# logger.error("'%s' missing", XTALFORMS_FILE) +# return '' + +# class Meta: +# model = models.QuatAssembly +# fields = ('target', 'pdb_file') + + +# class TargetMoleculesSerializer(serializers.ModelSerializer): +# template_protein = serializers.SerializerMethodField() +# zip_archive = serializers.SerializerMethodField() +# metadata = serializers.SerializerMethodField() +# sequences = serializers.SerializerMethodField() +# molecules = serializers.SerializerMethodField() +# tags_info = serializers.SerializerMethodField() +# tag_categories = serializers.SerializerMethodField() + +# def get_template_protein(self, obj): +# return template_protein(obj) + +# def get_zip_archive(self, obj): +# # The if-check is because the filefield in target has null=True. +# # Note that this link will not work on local +# if hasattr(obj, 'zip_archive') and obj.zip_archive.name: +# return urljoin(get_https_host(self.context["request"]), obj.zip_archive.url) +# return + +# def get_metadata(self, obj): +# if hasattr(obj, 'metadata') and obj.metadata.name: +# return urljoin(get_https_host(self.context["request"]), obj.metadata.url) +# return + +# def get_sequences(self, obj): +# return protein_sequences(obj) + +# def get_molecules(self, obj): +# mols = models.SiteObservation.objects.filter( +# experiment__experiment_upload__target__id=obj.id +# ).annotate( +# # NB! some of the fields are just renamed here, avoiding +# # that would simplify things here and remove some lines of +# # code. but that means front-end code needs to know about +# # the changes +# protein_code=F('code'), +# molecule_protein=F('experiment__pdb_info'), +# sdf_info=F('ligand_mol_file'), +# lig_id=F('seq_id'), +# tags_set=ArrayAgg("siteobservationtag__pk"), +# ) +# fields = [ +# "id", +# "smiles", +# "cmpd", +# "protein_code", +# "molecule_protein", +# "lig_id", +# "chain_id", +# "sdf_info", +# "tags_set", +# ] + +# logger.debug("%s", mols) +# logger.debug("%s", mols.values(*fields)) + +# molecules = [ +# {'data': k, 'tags_set': k['tags_set']} for k in mols.values(*fields) +# ] + +# return molecules + +# def get_tags_info(self, obj): +# tags = models.SiteObservationTag.objects.filter(target_id=obj.id) +# tags_info = [] +# for tag in tags: +# tag_data = models.SiteObservationTag.objects.filter(id=tag.id).values() +# tag_coords = SiteObservationGroup.objects.filter( +# id=tag.mol_group_id +# ).values('x_com', 'y_com', 'z_com') +# tag_dict = {'data': tag_data, 'coords': tag_coords} +# tags_info.append(tag_dict) + +# return tags_info + +# def get_tag_categories(self, obj): +# tag_categories = ( +# models.TagCategory.objects.filter(siteobservationtag__target_id=obj.id) +# .distinct() +# .values() +# ) +# return tag_categories + +# class Meta: +# model = models.Target +# fields = ( +# "id", +# "title", +# "project_id", +# "default_squonk_project", +# "template_protein", +# "metadata", +# "zip_archive", +# "upload_status", +# "sequences", +# "molecules", +# "tags_info", +# "tag_categories", +# ) class DownloadStructuresSerializer(serializers.Serializer): diff --git a/viewer/views.py b/viewer/views.py index 857c83f3..edff517a 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -1368,16 +1368,16 @@ class SessionProjectTagView(viewsets.ModelViewSet): filterset_fields = ('id', 'tag', 'category', 'target', 'session_projects') -class TargetMoleculesView(ISpyBSafeQuerySet): - """Retrieve all Molecules and Tag information relating - to a Target. The idea is that a single call can return all target related - information needed by the React front end in a single call. - """ +# class TargetMoleculesView(ISpyBSafeQuerySet): +# """Retrieve all Molecules and Tag information relating +# to a Target. The idea is that a single call can return all target related +# information needed by the React front end in a single call. +# """ - queryset = models.Target.objects.all() - serializer_class = serializers.TargetMoleculesSerializer - filter_permissions = "project_id" - filterset_fields = ("title",) +# queryset = models.Target.objects.all() +# serializer_class = serializers.TargetMoleculesSerializer +# filter_permissions = "project_id" +# filterset_fields = ("title",) class DownloadStructures(ISpyBSafeQuerySet): @@ -1712,14 +1712,12 @@ class XtalformSites(viewsets.ModelViewSet): http_method_names = ('get',) -class FirstAssemblyView(viewsets.ModelViewSet): - queryset = models.QuatAssembly.filter_manager.filter_qs() - serializer_class = serializers.FirstAssemblySerializer - permission_class = [permissions.IsAuthenticated] - filterset_class = filters.AssemblyFilter - # filterset_fields = ("target", "project") - # filter_permissions = "target__project_id" - http_method_names = ('get',) +# class FirstAssemblyView(viewsets.ModelViewSet): +# queryset = models.QuatAssembly.filter_manager.filter_qs() +# serializer_class = serializers.FirstAssemblySerializer +# permission_class = [permissions.IsAuthenticated] +# filterset_class = filters.AssemblyFilter +# http_method_names = ('get',) class JobFileTransferView(viewsets.ModelViewSet):