Skip to content

Commit

Permalink
Long reference export (#1098)
Browse files Browse the repository at this point in the history
* generalize identifier annotation

* create a new long exporter type

* add new option on the download page

* add a test
  • Loading branch information
shapiromatron authored Sep 18, 2024
1 parent 1e1e763 commit b94f08b
Show file tree
Hide file tree
Showing 7 changed files with 120 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ <h2>{{object}} downloads</h2>
<h6 class="mb-1 font-size-regular font-weight-bold">Literature Review</h6>
<div class="list-group my-2">
{% url 'lit:api:assessment-reference-export' assessment.pk as the_url %}
{% include "assessment/fragments/downloads_select.html" with link=the_url format="xlsx" name='references' text="Reference Export" help_text="All references and tags." allow_unpublished=False %}
{% include "assessment/fragments/downloads_select.html" with link=the_url format="xlsx" name='references' text="Reference Export" help_text="All references and tags. Wide format is one row per reference (and one column per tag); long format is one row per reference-tag combination." allow_unpublished=False reference_long=True %}
{% if obj_perms.edit and assessment.literature_settings.conflict_resolution %}
{% url 'lit:api:assessment-user-tag-export' assessment.pk as the_url %}
{% include "assessment/fragments/downloads_select.html" with link=the_url format="xlsx" name='references-usertags' text='User Tag Export' help_text="All references and tags, including user tags. Team members only." allow_unpublished=False %}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<div class="d-flex align-items-center flex-wrap flex-md-nowrap">
<div class="input-group input-group-sm flex-shrink-0 px-2" style="width: fit-content;" role="group" aria-label="Downlink link button group with format select">
<div class="input-group-prepend">
<a class="btn btn-primary" type="button" id="{{name}}-url" href="{{link}}?format={{format}}{% if allow_unpublished %}&unpublished=false{% endif %}"><i class="fa fa-download" aria-hidden="true"></i>&nbsp;Download</a>
<a class="btn btn-primary" type="button" id="{{name}}-url" href="{{link}}?format={{format}}{% if allow_unpublished %}&unpublished=false{% endif %}{% if reference_long %}&export_format=wide{% endif %}"><i class="fa fa-download" aria-hidden="true"></i>&nbsp;Download</a>
</div>
<select id="{{name}}" class="custom-select" required>
<option selected value="xlsx">xlsx</option>
Expand All @@ -13,6 +13,12 @@
<option value="json">json</option>
<option value="html">html</option>
</select>
{% if reference_long %}
<select class="custom-select reference-format-selector" required>
<option selected value="wide">wide</option>
<option value="long">long</option>
</select>
{% endif %}
{% if allow_unpublished %}
<div class="input-group-append">
<div class="input-group-text">
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,13 @@
}
return false;
});
$('.reference-format-selector').each(function(){
const selector = $(this);
selector.on('change', function(event){
const a = $(event.target).parent().find('a'),
url = a.attr("href").replace(/export_format=(\w+)/, `export_format=${this.value}`)
a.attr("href", url)
}).trigger('change')
});
});
</script>
12 changes: 7 additions & 5 deletions hawc/apps/lit/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,11 +205,13 @@ def reference_export(self, request, pk):
raise ValidationError(fs.errors)

tags = models.ReferenceFilterTag.get_all_tags(assessment.id)
Exporter = (
exports.TableBuilderFormat
if request.query_params.get("export_format") == "table-builder"
else exports.ReferenceFlatComplete
)
match request.query_params.get("export_format"):
case "table-builder":
Exporter = exports.TableBuilderFormat
case "long":
Exporter = exports.ReferenceTagLongExport
case _:
Exporter = exports.ReferenceFlatComplete
export = Exporter(
queryset=fs.qs,
filename=f"references-{assessment.name}",
Expand Down
53 changes: 53 additions & 0 deletions hawc/apps/lit/exports.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd
from django.db.models import QuerySet
from django.utils.html import strip_tags

from ..common.helper import FlatFileExporter
Expand Down Expand Up @@ -129,3 +131,54 @@ def _get_data_rows(self):
]
for ref in self.queryset
]


def _long_export(qs: QuerySet, assessment_id: int) -> pd.DataFrame:
"""
One row per reference-tag combination, including references with no tag.
Args:
qs (QuerySet): A Reference queryset
assessment_id (int): Assessment ID
"""

tags = models.ReferenceFilterTag.as_dataframe(assessment_id).rename(
columns=dict(name="tag_name", nested_name="tag_nested_name")
)
refs = (
pd.DataFrame(
qs.filter(assessment=assessment_id)
.with_identifiers()
.values("id", "pubmed_id", "hero_id", "doi", "authors_short", "year", "title")
)
.rename(columns=dict(authors_short="authors"))
.astype(
dict(
year=pd.Int64Dtype(),
pubmed_id=pd.Int64Dtype(),
hero_id=pd.Int64Dtype(),
)
)
)
ref_tags = pd.DataFrame(
models.Reference.objects.tag_pairs(
models.Reference.objects.filter(assessment=assessment_id)
)
)
return (
refs.merge(
ref_tags.merge(tags, left_on="tag_id", right_on="id", how="left").drop(
columns=["id", "depth"]
),
left_on="id",
right_on="reference_id",
how="left",
)
.drop(columns=["reference_id"])
.astype(dict(tag_id=pd.Int64Dtype()))
)


class ReferenceTagLongExport(FlatFileExporter):
def build_df(self) -> pd.DataFrame:
return _long_export(qs=self.queryset, assessment_id=self.kwargs["assessment"].id)
49 changes: 30 additions & 19 deletions hawc/apps/lit/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,35 @@ def full_text_search(self, search_text: str):
def in_workflow(self, workflow: "Workflow"):
return self.filter(workflow.reference_filter())

def with_identifiers(self):
Identifiers = apps.get_model("lit", "Identifiers")
return self.annotate(
pubmed_id=Cast(
models.Subquery(
Identifiers.objects.filter(
references=models.OuterRef("id"),
database=constants.ReferenceDatabase.PUBMED,
).values("unique_id")[:1]
),
models.IntegerField(),
),
hero_id=Cast(
models.Subquery(
Identifiers.objects.filter(
references=models.OuterRef("id"),
database=constants.ReferenceDatabase.HERO,
).values("unique_id")[:1]
),
models.IntegerField(),
),
doi=models.Subquery(
Identifiers.objects.filter(
references=models.OuterRef("id"),
database=constants.ReferenceDatabase.DOI,
).values("unique_id")[:1]
),
)


class ReferenceManager(BaseManager):
assessment_relation = "assessment"
Expand Down Expand Up @@ -1045,7 +1074,6 @@ def identifiers_dataframe(self, qs: QuerySet) -> pd.DataFrame:
return df

def heatmap_dataframe(self, assessment_id: int) -> pd.DataFrame:
Identifiers = apps.get_model("lit", "Identifiers")
ReferenceFilterTag = apps.get_model("lit", "ReferenceFilterTag")
ReferenceTags = apps.get_model("lit", "ReferenceTags")

Expand All @@ -1060,26 +1088,9 @@ def heatmap_dataframe(self, assessment_id: int) -> pd.DataFrame:
year="year",
journal="journal",
)
pubmed_qs = models.Subquery(
Identifiers.objects.filter(
references=models.OuterRef("id"), database=constants.ReferenceDatabase.PUBMED
).values("unique_id")[:1]
)
hero_qs = models.Subquery(
Identifiers.objects.filter(
references=models.OuterRef("id"), database=constants.ReferenceDatabase.HERO
).values("unique_id")[:1]
)
doi_qs = models.Subquery(
Identifiers.objects.filter(
references=models.OuterRef("id"), database=constants.ReferenceDatabase.DOI
).values("unique_id")[:1]
)
qs = (
self.filter(assessment_id=assessment_id)
.annotate(pubmed_id=Cast(pubmed_qs, models.IntegerField()))
.annotate(hero_id=Cast(hero_qs, models.IntegerField()))
.annotate(doi=doi_qs)
.with_identifiers()
.values_list(*values.keys())
.order_by("id")
)
Expand Down
14 changes: 14 additions & 0 deletions tests/hawc/apps/lit/test_exports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import pytest

from hawc.apps.assessment.models import Assessment
from hawc.apps.lit import exports, models


@pytest.mark.django_db
class TestReferenceTagLongExport:
def test_success(self):
exporter = exports.ReferenceTagLongExport(
queryset=models.Reference.objects.filter(assessment_id=2),
assessment=Assessment.objects.get(id=2),
)
assert exporter.build_df().shape == (5, 10)

0 comments on commit b94f08b

Please sign in to comment.