Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add ids to export #745

Merged
merged 19 commits into from
Jan 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion hawc/apps/animal/exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,15 @@ def _get_header_row(self):

def _get_data_rows(self):
rows = []
identifiers_df = Study.identifiers_df(self.queryset, "animal_group__experiment__study_id")
for obj in self.queryset:
ser = obj.get_json(json_encode=False)
row = []
row.extend(Study.flat_complete_data_row(ser["animal_group"]["experiment"]["study"]))
row.extend(
Study.flat_complete_data_row(
ser["animal_group"]["experiment"]["study"], identifiers_df
)
)
row.extend(models.Experiment.flat_complete_data_row(ser["animal_group"]["experiment"]))
row.extend(models.AnimalGroup.flat_complete_data_row(ser["animal_group"]))
ser_dosing_regime = ser["animal_group"]["dosing_regime"]
Expand Down
5 changes: 4 additions & 1 deletion hawc/apps/epi/exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@ def _get_header_row(self):

def _get_data_rows(self):
rows = []
identifiers_df = Study.identifiers_df(self.queryset, "study_population__study_id")
for obj in self.queryset:
ser = obj.get_json(json_encode=False)
row = []
row.extend(Study.flat_complete_data_row(ser["study_population"]["study"]))
row.extend(
Study.flat_complete_data_row(ser["study_population"]["study"], identifiers_df)
)
row.extend(models.StudyPopulation.flat_complete_data_row(ser["study_population"]))
row.extend(models.Outcome.flat_complete_data_row(ser))
for res in ser["results"]:
Expand Down
3 changes: 2 additions & 1 deletion hawc/apps/epimeta/exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,11 @@ def _get_header_row(self):

def _get_data_rows(self):
rows = []
identifiers_df = Study.identifiers_df(self.queryset, "protocol__study_id")
for obj in self.queryset:
ser = obj.get_json(json_encode=False)
row = []
row.extend(Study.flat_complete_data_row(ser["protocol"]["study"]))
row.extend(Study.flat_complete_data_row(ser["protocol"]["study"], identifiers_df))
row.extend(models.MetaProtocol.flat_complete_data_row(ser["protocol"]))
row.extend(models.MetaResult.flat_complete_data_row(ser))

Expand Down
7 changes: 6 additions & 1 deletion hawc/apps/epiv2/exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,14 @@ def _get_header_row(self):

def _get_data_rows(self):
rows = []
identifiers_df = Study.identifiers_df(self.queryset, "design__study_id")
for obj in self.queryset:
row = []
row.extend(Study.flat_complete_data_row(obj.design.study.get_json(json_encode=False)))
row.extend(
Study.flat_complete_data_row(
obj.design.study.get_json(json_encode=False), identifiers_df
)
)
row.extend(obj.design.flat_complete_data_row())
row.extend(obj.exposure_level.chemical.flat_complete_data_row())
row.extend(obj.exposure_level.exposure_measurement.flat_complete_data_row())
Expand Down
13 changes: 11 additions & 2 deletions hawc/apps/invitro/exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from ..common.helper import FlatFileExporter
from ..materialized.models import FinalRiskOfBiasScore
from ..study.models import Study


def getDose(ser, tag):
Expand Down Expand Up @@ -36,6 +37,9 @@ def _get_header_row(self):

header = [
"study id",
"study hero_id",
"study pubmed_id",
"study doi",
"study name",
"study identifier",
"study published",
Expand Down Expand Up @@ -107,6 +111,8 @@ def _get_header_row(self):
def _get_data_rows(self):
rows = []

identifiers_df = Study.identifiers_df(self.queryset, "experiment__study_id")

for obj in self.queryset:
ser = obj.get_json(json_encode=False)

Expand All @@ -129,14 +135,17 @@ def _get_data_rows(self):
bm_types = [bm["benchmark"] for bm in ser["benchmarks"]]
bm_values = [bm["value"] for bm in ser["benchmarks"]]

study_id = ser["experiment"]["study"]["id"]
row = [
ser["experiment"]["study"]["id"],
study_id,
identifiers_df["hero_id"].get(study_id),
identifiers_df["pubmed_id"].get(study_id),
identifiers_df["doi"].get(study_id),
ser["experiment"]["study"]["short_citation"],
ser["experiment"]["study"]["study_identifier"],
ser["experiment"]["study"]["published"],
]

study_id = ser["experiment"]["study"]["id"]
study_robs = [
self.rob_data[(study_id, metric_id)] for metric_id in self.rob_headers.keys()
]
Expand Down
9 changes: 3 additions & 6 deletions hawc/apps/lit/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,15 +102,12 @@ def create(self, validated_data):


class IdentifiersSerializer(serializers.ModelSerializer):
def to_representation(self, instance):
ret = super().to_representation(instance)
ret["database"] = instance.get_database_display()
ret["url"] = instance.get_url()
return ret
database = serializers.CharField(source="get_database_display")
url = serializers.CharField(source="get_url")

class Meta:
model = models.Identifiers
fields = "__all__"
fields = ["id", "unique_id", "database", "url"]


class ReferenceTagsSerializer(serializers.ModelSerializer):
Expand Down
1 change: 0 additions & 1 deletion hawc/apps/riskofbias/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ class RiskOfBiasAssessmentViewset(
lookup_value_regex = re_digits

def get_queryset(self):

perms = self.get_obj_perms()
if not perms["edit"]:
return self.model.objects.published(self.assessment)
Expand Down
33 changes: 32 additions & 1 deletion hawc/apps/study/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import logging
import os
from typing import Optional

import pandas as pd
from django.apps import apps
from django.core.exceptions import MultipleObjectsReturned, ObjectDoesNotExist
from django.db import models
Expand Down Expand Up @@ -197,6 +199,9 @@ def get_study_type(self):
def flat_complete_header_row():
return (
"study-id",
"study-hero_id",
"study-pubmed_id",
"study-doi",
"study-url",
"study-short_citation",
"study-full_citation",
Expand All @@ -216,9 +221,19 @@ def flat_complete_header_row():
)

@staticmethod
def flat_complete_data_row(ser):
def flat_complete_data_row(ser, identifiers_df: Optional[pd.DataFrame] = None) -> tuple:
try:
ident_row = (
identifiers_df.loc[ser["id"]] if isinstance(identifiers_df, pd.DataFrame) else None
)
except KeyError:
ident_row = None
return (
ser["id"],
# IDs can come from identifiers data frame if exists, else check study serializer
ident_row.hero_id if ident_row is not None else ser.get("hero_id", None),
ident_row.pubmed_id if ident_row is not None else ser.get("pubmed_id", None),
ident_row.doi if ident_row is not None else ser.get("doi", None),
ser["url"],
ser["short_citation"],
ser["full_citation"],
Expand All @@ -237,6 +252,22 @@ def flat_complete_data_row(ser):
ser["published"],
)

@classmethod
def identifiers_df(cls, qs: models.QuerySet, relation: str) -> pd.DataFrame:
"""Returns a data frame with reference identifiers for each study in the QuerySet

Args:
qs (models.QuerySet): A QuerySet of an model with a relation to the study
relation (str): The relation string to the `Study.study_id` for this QuerySet

Returns:
pd.DataFrame: A data frame an index of study/reference id, and columns for identifiers
"""
study_ids = qs.values_list(relation, flat=True)
studies = cls.objects.filter(id__in=study_ids)
identifiers_df = Reference.objects.identifiers_dataframe(studies)
return identifiers_df.set_index("reference_id")

@classmethod
def delete_caches(cls, ids):
SerializerHelper.delete_caches(cls, ids)
Expand Down
17 changes: 17 additions & 0 deletions hawc/apps/study/serializers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional, Union

from django.core.exceptions import ObjectDoesNotExist, ValidationError
from django.db import transaction
from rest_framework import exceptions, serializers
Expand Down Expand Up @@ -83,6 +85,21 @@ class VerboseStudySerializer(StudySerializer):
identifiers = IdentifiersSerializer(many=True)
tags = ReferenceTagsSerializer()

def _get_identifier(
self, identifiers: list, key: str, to_int: bool
) -> Optional[Union[int, str]]:
for identifier in identifiers:
if identifier["database"] == key:
value = identifier["unique_id"]
return int(value) if to_int else value

def to_representation(self, instance):
ret = super().to_representation(instance)
ret["hero_id"] = self._get_identifier(ret["identifiers"], "HERO", True)
ret["pubmed_id"] = self._get_identifier(ret["identifiers"], "PubMed", True)
ret["doi"] = self._get_identifier(ret["identifiers"], "DOI", False)
return ret

def get_riskofbiases(self, study):
return FinalRiskOfBiasSerializer(study.get_final_qs(), many=True).data

Expand Down
Loading