Skip to content

Commit

Permalink
Merge pull request #133 from GSA/harvest-email-on-errors
Browse files Browse the repository at this point in the history
Harvest Email Notification Frequency
  • Loading branch information
btylerburton authored Feb 12, 2025
2 parents 1f3bf48 + da1a471 commit 0940f20
Show file tree
Hide file tree
Showing 10 changed files with 92 additions and 5 deletions.
8 changes: 8 additions & 0 deletions app/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ class HarvestSourceForm(FlaskForm):
source_type = SelectField(
"Source Type", choices=["document", "waf"], validators=[DataRequired()]
)
notification_frequency = SelectField(
"Notification Frequency",
choices=[
"on_error",
"always",
],
validators=[DataRequired()],
)


class OrganizationForm(FlaskForm):
Expand Down
1 change: 1 addition & 0 deletions app/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ def make_new_source_contract(form):
"frequency": form.frequency.data,
"schema_type": form.schema_type.data,
"source_type": form.source_type.data,
"notification_frequency": form.notification_frequency.data,
}


Expand Down
6 changes: 5 additions & 1 deletion app/static/data/glossary.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,5 +30,9 @@
{
"term": "Record Error",
"definition": "also known as \"non-critical\" errors are ones which occur at the record level after the harvester has determined which operations to run. examples include validation and transformation failure."
},
{
"term": "Notification Frequency",
"definition": "when notification emails are sent out. Choices being \"on_error\", and \"always\"."
}
]
]
8 changes: 8 additions & 0 deletions database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ class HarvestSource(db.Model):
jobs = db.relationship(
"HarvestJob", backref="source", cascade="all, delete-orphan", lazy=True
)
notification_frequency = db.Column(
db.Enum(
"on_error",
"always",
name="notification_frequency",
),
nullable=False,
)


class HarvestJob(db.Model):
Expand Down
6 changes: 5 additions & 1 deletion harvester/harvest.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class HarvestSource:
"source_type",
"id", # db guuid
"notification_emails",
"notification_frequency",
],
repr=False,
)
Expand Down Expand Up @@ -418,7 +419,10 @@ def do_report(self) -> None:
self._report = job_status

if hasattr(self, "notification_emails") and self.notification_emails:
self.send_notification_emails(results)
if self.notification_frequency == "always" or (
self.notification_frequency == "on_error" and results["status"]["error"]
):
self.send_notification_emails(results)

def send_notification_emails(self, results: dict) -> None:
"""Send harvest report emails to havest source POCs"""
Expand Down
2 changes: 2 additions & 0 deletions migrations/script.py.mako
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ down_revision = ${repr(down_revision)}
branch_labels = ${repr(branch_labels)}
depends_on = ${repr(depends_on)}


def upgrade():
${upgrades if upgrades else "pass"}


def downgrade():
${downgrades if downgrades else "pass"}
38 changes: 38 additions & 0 deletions migrations/versions/c04b66d4d5b9_add_notification_frequency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Add notification_frequency
Revision ID: c04b66d4d5b9
Revises:
Create Date: 2025-02-07 09:55:49.013498
"""

import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "c04b66d4d5b9"
down_revision = None
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("harvest_source", schema=None) as batch_op:
batch_op.add_column(
sa.Column(
"notification_frequency",
sa.Enum("on_error", "always", name="notification_frequency"),
nullable=False,
)
)

# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table("harvest_source", schema=None) as batch_op:
batch_op.drop_column("notification_frequency")

# ### end Alembic commands ###
9 changes: 9 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def source_data_dcatus_2(organization_data: dict) -> dict:
"url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus_2.json",
"schema_type": "dcatus1.1: federal",
"source_type": "document",
"notification_frequency": "always",
}


Expand All @@ -137,6 +138,7 @@ def source_data_dcatus_same_title(organization_data: dict) -> dict:
"url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus_same_title.json",
"schema_type": "dcatus1.1: federal",
"source_type": "document",
"notification_frequency": "always",
}


Expand All @@ -151,6 +153,7 @@ def source_data_waf_csdgm(organization_data: dict) -> dict:
"url": f"{HARVEST_SOURCE_URL}/waf/",
"schema_type": "csdgm",
"source_type": "waf",
"notification_frequency": "always",
}


Expand All @@ -165,6 +168,7 @@ def source_data_waf_iso19115_2(organization_data: dict) -> dict:
"url": f"{HARVEST_SOURCE_URL}/iso_2_waf/",
"schema_type": "iso19115_2",
"source_type": "waf",
"notification_frequency": "always",
}


Expand All @@ -179,6 +183,7 @@ def source_data_dcatus_invalid(organization_data: dict) -> dict:
"url": f"{HARVEST_SOURCE_URL}/dcatus/missing_title.json",
"schema_type": "dcatus1.1: federal",
"source_type": "document",
"notification_frequency": "always",
}


Expand All @@ -193,6 +198,7 @@ def source_data_dcatus_single_record(organization_data: dict) -> dict:
"url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus_single_record.json",
"schema_type": "dcatus1.1: federal",
"source_type": "document",
"notification_frequency": "on_error",
}


Expand All @@ -207,6 +213,7 @@ def source_data_dcatus_single_record_non_federal(organization_data: dict) -> dic
"url": f"{HARVEST_SOURCE_URL}/dcatus/dcatus_single_record_non-federal.json",
"schema_type": "dcatus1.1: non-federal",
"source_type": "document",
"notification_frequency": "always",
}


Expand All @@ -221,6 +228,7 @@ def source_data_dcatus_bad_url(organization_data: dict) -> dict:
"url": f"{HARVEST_SOURCE_URL}/dcatus/bad_url.json",
"schema_type": "dcatus1.1: federal",
"source_type": "document",
"notification_frequency": "always",
}


Expand All @@ -235,6 +243,7 @@ def source_data_dcatus_invalid_records(organization_data) -> dict:
"url": "http://localhost/dcatus/missing_title.json",
"schema_type": "dcatus1.1: federal",
"source_type": "document",
"notification_frequency": "always",
}


Expand Down
3 changes: 2 additions & 1 deletion tests/fixtures.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
"frequency": "daily",
"url": "http://localhost:80/dcatus/dcatus.json",
"schema_type": "dcatus1.1: federal",
"source_type": "document"
"source_type": "document",
"notification_frequency": "always"
}
],
"job": [
Expand Down
16 changes: 14 additions & 2 deletions tests/integration/harvest_job_flows/test_harvest_job_full_flow.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
import os
from unittest.mock import patch
from unittest.mock import MagicMock, patch

from harvester.harvest import harvest_job_starter
from harvester.utils.general_utils import download_file
Expand All @@ -10,8 +10,10 @@

class TestHarvestJobFullFlow:
@patch("harvester.harvest.ckan")
@patch("harvester.harvest.HarvestSource.send_notification_emails")
def test_harvest_single_record_created(
self,
send_notification_emails_mock: MagicMock,
CKANMock,
interface,
organization_data,
Expand All @@ -38,21 +40,25 @@ def test_harvest_single_record_created(
harvest_job = interface.get_harvest_job(job_id)
assert harvest_job.status == "complete"
assert harvest_job.records_added == len(records_to_add)
# assert that send_notification_emails is not called because email has no errors
assert send_notification_emails_mock.called is False

@patch("harvester.harvest.ckan")
@patch("harvester.harvest.HarvestSource.send_notification_emails")
def test_multiple_harvest_jobs(
self,
send_notification_emails_mock: MagicMock,
CKANMock,
interface,
organization_data,
source_data_dcatus,
):

CKANMock.action.package_create.return_value = {"id": 1234}
CKANMock.action.dataset_purge.return_value = {"ok"}
CKANMock.action.package_update.return_value = {"ok"}

interface.add_organization(organization_data)

interface.add_harvest_source(source_data_dcatus)
harvest_job = interface.add_harvest_job(
{
Expand All @@ -69,6 +75,7 @@ def test_multiple_harvest_jobs(
)

assert len(records_from_db) == 7
assert send_notification_emails_mock.called

## create follow-up job
interface.update_harvest_source(
Expand All @@ -90,6 +97,7 @@ def test_multiple_harvest_jobs(
source_data_dcatus["id"]
)

assert send_notification_emails_mock.called
records_from_db_count = interface.get_latest_harvest_records_by_source_orm(
source_data_dcatus["id"],
count=True,
Expand All @@ -99,8 +107,10 @@ def test_multiple_harvest_jobs(

@patch("harvester.harvest.ckan")
@patch("harvester.harvest.download_file")
@patch("harvester.harvest.HarvestSource.send_notification_emails")
def test_harvest_record_errors_reported(
self,
send_notification_emails_mock: MagicMock,
download_file_mock,
CKANMock,
interface,
Expand Down Expand Up @@ -132,6 +142,8 @@ def test_harvest_record_errors_reported(
assert (
interface_errors[0][0].harvest_record_id == job_errors[0].harvest_record_id
)
# assert that send_notification_emails is called because of errors
assert send_notification_emails_mock.called

@patch("harvester.harvest.ckan")
@patch("harvester.utils.ckan_utils.uuid")
Expand Down

1 comment on commit 0940f20

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Title Coverage Tests Skipped Failures Errors Time
Unit tests Coverage 39 0 💤 0 ❌ 0 🔥 1.017s ⏱️
Integration Tests Coverage 72 0 💤 0 ❌ 0 🔥 4.636s ⏱️
Functional Tests Coverage 2 0 💤 0 ❌ 0 🔥 7.533s ⏱️

Please sign in to comment.