Skip to content

Commit

Permalink
Move lost and clearable job deltas to settings, include active unstar…
Browse files Browse the repository at this point in the history
…ted jobs in backlog
  • Loading branch information
davegaeddert committed Jan 18, 2024
1 parent a952311 commit 04b8fcf
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 7 deletions.
32 changes: 32 additions & 0 deletions bolt-jobs/bolt/jobs/admin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import timedelta

from bolt.admin import (
AdminModelDetailView,
AdminModelListView,
Expand All @@ -7,10 +9,32 @@
from bolt.admin.cards import Card
from bolt.admin.dates import DatetimeRangeAliases
from bolt.http import HttpResponseRedirect
from bolt.runtime import settings

from .models import Job, JobRequest, JobResult


def _td_format(td_object):
seconds = int(td_object.total_seconds())
periods = [
("year", 60 * 60 * 24 * 365),
("month", 60 * 60 * 24 * 30),
("day", 60 * 60 * 24),
("hour", 60 * 60),
("minute", 60),
("second", 1),
]

strings = []
for period_name, period_seconds in periods:
if seconds > period_seconds:
period_value, seconds = divmod(seconds, period_seconds)
has_s = "s" if period_value > 1 else ""
strings.append("%s %s%s" % (period_value, period_name, has_s))

return ", ".join(strings)


class SuccessfulJobsCard(Card):
title = "Successful Jobs"
text = "View"
Expand Down Expand Up @@ -45,6 +69,10 @@ class LostJobsCard(Card):
title = "Lost Jobs"
text = "View" # TODO make not required - just an icon?

def get_description(self):
delta = timedelta(seconds=settings.JOBS_LOST_AFTER)
return f"Jobs are considered lost after {_td_format(delta)}"

def get_number(self):
return (
JobResult.objects.lost()
Expand Down Expand Up @@ -128,6 +156,10 @@ class ListView(AdminModelListView):
allow_global_search = False
default_datetime_range = DatetimeRangeAliases.LAST_7_DAYS

def get_description(self):
delta = timedelta(seconds=settings.JOBS_CLEARABLE_AFTER)
return f"Jobs are cleared after {_td_format(delta)}"

def get_initial_queryset(self):
queryset = super().get_initial_queryset()
if self.filter == "Successful":
Expand Down
6 changes: 3 additions & 3 deletions bolt-jobs/bolt/jobs/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import click

from bolt.runtime import settings
from bolt.utils import timezone

from .models import Job, JobRequest, JobResult
Expand Down Expand Up @@ -47,9 +48,8 @@ def worker(max_processes, max_jobs_per_process, stats_every):


@cli.command()
@click.option("--older-than", type=int, default=60 * 60 * 24 * 7)
def clear_completed(older_than):
cutoff = timezone.now() - datetime.timedelta(seconds=older_than)
def clear_completed():
cutoff = timezone.now() - datetime.timedelta(seconds=settings.JOBS_CLEARABLE_AFTER)
click.echo(f"Clearing jobs finished before {cutoff}")
results = (
JobResult.objects.exclude(ended_at__isnull=True)
Expand Down
2 changes: 2 additions & 0 deletions bolt-jobs/bolt/jobs/default_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
JOBS_CLEARABLE_AFTER: int = 60 * 60 * 24 * 7 # One week
JOBS_LOST_AFTER: int = 60 * 60 * 6 # Six hours
7 changes: 4 additions & 3 deletions bolt-jobs/bolt/jobs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import uuid

from bolt.db import models, transaction
from bolt.runtime import settings
from bolt.utils import timezone

from .jobs import load_job
Expand Down Expand Up @@ -84,12 +85,12 @@ def convert_to_job(self):

class JobQuerySet(models.QuerySet):
def mark_lost_jobs(self):
# Nothing should be pending after more than a 24 hrs... consider it lost
# Downside to these is that they are mark lost pretty late?
# Lost jobs are jobs that have been pending for too long,
# and probably never going to get picked up by a worker process.
# In theory we could save a timeout per-job and mark them timed-out more quickly,
# but if they're still running, we can't actually send a signal to cancel it...
now = timezone.now()
one_day_ago = now - datetime.timedelta(days=1)
one_day_ago = now - datetime.timedelta(seconds=settings.JOBS_LOST_AFTER)
lost_jobs = self.filter(
created_at__lt=one_day_ago
) # Doesn't matter whether it started or not -- it shouldn't take this long.
Expand Down
5 changes: 4 additions & 1 deletion bolt-jobs/bolt/jobs/workers.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ def maybe_check_job_results(self):

def log_stats(self):
num_proccesses = len(self.executor._processes)
num_backlog_jobs = JobRequest.objects.count()
num_backlog_jobs = (
JobRequest.objects.count()
+ Job.objects.filter(started_at__isnull=True).count()
)
if num_backlog_jobs > 0:
# Basically show how many jobs aren't about to be picked
# up in this same tick (so if there's 1, we don't really need to log that as a backlog)
Expand Down

0 comments on commit 04b8fcf

Please sign in to comment.