Skip to content

Commit

Permalink
streamline migration from 0.8.x #397
Browse files Browse the repository at this point in the history
  • Loading branch information
Her Email committed Nov 27, 2023
1 parent 84a02c3 commit f89479c
Show file tree
Hide file tree
Showing 18 changed files with 279 additions and 68 deletions.
4 changes: 4 additions & 0 deletions boofilsic/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,10 @@

# Timeout of requests to Mastodon, in seconds
MASTODON_TIMEOUT = env("NEODB_LOGIN_MASTODON_TIMEOUT", default=10) # type: ignore
TAKAHE_REMOTE_TIMEOUT = MASTODON_TIMEOUT

NEODB_USER_AGENT = f"NeoDB/{NEODB_VERSION} (+{SITE_INFO.get('site_url', 'undefined')})"
TAKAHE_USER_AGENT = NEODB_USER_AGENT

# Scope when creating Mastodon apps
# Alternatively, use "read write follow" to avoid re-authorize when migrating to a future version with more features
Expand Down
2 changes: 1 addition & 1 deletion catalog/sites/discogs.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def scrape(self):
def get_discogs_data(data_type: str, discogs_id):
if data_type not in ("releases", "masters"):
raise ValueError("data_type can only be in ('releases' or masters')")
user_agent_string = "Neodb/0.1"
user_agent_string = settings.NEODB_USER_AGENT
user_token = settings.DISCOGS_API_KEY
headers = {
"User-Agent": user_agent_string,
Expand Down
5 changes: 4 additions & 1 deletion catalog/sites/fedi.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ class FediverseInstance(AbstractSite):
"Performance": Performance,
"PerformanceProduction": PerformanceProduction,
}
request_header = {"User-Agent": "NeoDB/0.5", "Accept": "application/activity+json"}
request_header = {
"User-Agent": settings.NEODB_USER_AGENT,
"Accept": "application/activity+json",
}

@classmethod
def id_to_url(cls, id_value):
Expand Down
2 changes: 1 addition & 1 deletion catalog/sites/rss.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def parse_feed_from_url(url):
feed = pickle.load(open(_local_response_path + get_mock_file(url), "rb"))
else:
req = urllib.request.Request(url)
req.add_header("User-Agent", "NeoDB/0.5")
req.add_header("User-Agent", settings.NEODB_USER_AGENT)
try:
feed = podcastparser.parse(url, urllib.request.urlopen(req, timeout=3))
except:
Expand Down
6 changes: 3 additions & 3 deletions common/management/commands/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ def add_arguments(self, parser):

def handle(self, *args, **options):
if options["cancel"]:
JobManager.cancel()
JobManager.cancel_all()
if options["schedule"]:
JobManager.cancel() # cancel previously scheduled jobs if any
JobManager.schedule()
JobManager.cancel_all() # cancel previously scheduled jobs if any
JobManager.schedule_all()
if options["runonce"]:
for job_id in options["runonce"]:
run = JobManager.run(job_id)
Expand Down
1 change: 1 addition & 0 deletions common/management/commands/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def create_site(self, domain, service_domain):
domain=domain,
local=True,
service_domain=service_domain,
state="updated",
notes="NeoDB",
nodeinfo={},
)
Expand Down
10 changes: 5 additions & 5 deletions common/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ def register(cls, target):
return target

@classmethod
def schedule(cls):
def schedule_all(cls):
for j in cls.registry:
j.schedule()

@classmethod
def cancel(cls):
def cancel_all(cls):
for j in cls.registry:
j.cancel()

Expand All @@ -77,11 +77,11 @@ def get_scheduled_job_ids(cls):
return registry.get_job_ids()

@classmethod
def schedule_all(cls):
def reschedule_all(cls):
# TODO rewrite lazy import in a better way
from catalog.jobs import DiscoverGenerator, PodcastUpdater
from mastodon.jobs import MastodonSiteCheck
from users.jobs import MastodonUserSync

cls.cancel()
cls.schedule()
cls.cancel_all()
cls.schedule_all()
16 changes: 10 additions & 6 deletions common/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,15 @@ class Setup:
"""

def create_site(self, domain, service_domain):
TakaheDomain.objects.create(
TakaheDomain.objects.update_or_create(
domain=domain,
local=True,
service_domain=service_domain,
notes="NeoDB",
nodeinfo={},
defaults={
"local": True,
"service_domain": service_domain,
"notes": "NeoDB",
"nodeinfo": {},
"state": "updated",
},
)
TakaheConfig.objects.update_or_create(
key="public_timeline",
Expand Down Expand Up @@ -156,8 +159,9 @@ def run(self):
# Register cron jobs if not yet
if settings.DISABLE_CRON:
logger.info("Cron jobs are disabled.")
JobManager.cancel_all()
else:
JobManager.schedule_all()
JobManager.reschedule_all()

logger.info("Finished post-migration setup.")

Expand Down
6 changes: 4 additions & 2 deletions compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ x-shared:
NEODB_SEARCH_URL: ${NEODB_SEARCH_URL:-typesense://user:eggplant@typesense:8108/catalog}
NEODB_EMAIL_URL:
NEODB_EMAIL_FROM: no-reply@${NEODB_SITE_DOMAIN}
NEODB_FANOUT_LIMIT_DAYS:
TAKAHE_FANOUT_LIMIT_DAYS:
NEODB_DOWNLOADER_PROXY_LIST:
NEODB_DOWNLOADER_BACKUP_PROXY:
NEODB_DOWNLOADER_SAVE_DIR:
Expand All @@ -53,8 +55,8 @@ x-shared:
TAKAHE_MEDIA_BACKEND: local://www/media/
TAKAHE_MEDIA_ROOT: /www/media
TAKAHE_USE_PROXY_HEADERS: true
TAKAHE_STATOR_CONCURRENCY: 4
TAKAHE_STATOR_CONCURRENCY_PER_MODEL: 2
TAKAHE_STATOR_CONCURRENCY: ${TAKAHE_STATOR_CONCURRENCY:-4}
TAKAHE_STATOR_CONCURRENCY_PER_MODEL: ${TAKAHE_STATOR_CONCURRENCY_PER_MODEL:-2}
TAKAHE_DEBUG: ${NEODB_DEBUG:-True}
TAKAHE_VENV: /takahe-venv
SPOTIFY_API_KEY:
Expand Down
14 changes: 14 additions & 0 deletions journal/migrations/0020_shelflogentry_unique_shelf_log_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@

from django.db import migrations, models

_sql = """DELETE
FROM journal_shelflogentry a USING journal_shelflogentry b
WHERE a.ctid < b.ctid
AND a.item_id=b.item_id
AND a.owner_id=b.owner_id
AND a.timestamp=b.timestamp
AND a.shelf_type=b.shelf_type"""


class Migration(migrations.Migration):

Expand All @@ -10,6 +18,12 @@ class Migration(migrations.Migration):
]

operations = [
migrations.RunSQL("SET CONSTRAINTS ALL IMMEDIATE;"),
migrations.RunSQL(
sql=_sql,
reverse_sql=migrations.RunSQL.noop,
),
migrations.RunSQL("SET CONSTRAINTS ALL DEFERRED;"),
migrations.AddConstraint(
model_name="shelflogentry",
constraint=models.UniqueConstraint(
Expand Down
2 changes: 1 addition & 1 deletion mastodon/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
# GET
API_SEARCH = "/api/v2/search"

USER_AGENT = f"NeoDB/{settings.NEODB_VERSION} (+{settings.SITE_INFO.get('site_url', 'undefined')})"
USER_AGENT = settings.NEODB_USER_AGENT

get = functools.partial(requests.get, timeout=settings.MASTODON_TIMEOUT)
put = functools.partial(requests.put, timeout=settings.MASTODON_TIMEOUT)
Expand Down
88 changes: 88 additions & 0 deletions takahe/management/commands/backfill_takahe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from django.conf import settings
from django.contrib.contenttypes.models import ContentType
from django.core.management.base import BaseCommand
from django.db.models import Count, F
from loguru import logger
from tqdm import tqdm

from catalog.common import *
from catalog.common.models import *
from catalog.models import *
from journal.models import *
from takahe.utils import *
from users.models import APIdentity
from users.models import User as NeoUser


def content_type_id(cls):
return ContentType.objects.get(app_label="journal", model=cls.__name__.lower()).pk


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--verbose",
action="store_true",
)
parser.add_argument(
"--post",
action="store_true",
)
parser.add_argument(
"--like",
action="store_true",
)
parser.add_argument(
"--post-new",
action="store_true",
)
parser.add_argument("--start", default=0, action="store")
parser.add_argument("--count", default=0, action="store")

def process_post(self):
logger.info(f"Processing posts...")
qs = Piece.objects.filter(
polymorphic_ctype__in=[
content_type_id(ShelfMember),
content_type_id(Comment),
content_type_id(Review),
]
).order_by("id")
if self.starting_id:
qs = qs.filter(id__gte=self.starting_id)
tracker = tqdm(qs.iterator(), total=self.count_est or qs.count())
for p in tracker:
tracker.set_postfix_str(f"{p.id}")
if p.__class__ == ShelfMember:
mark = Mark(p.owner, p.item)
Takahe.post_mark(mark, self.post_new)
elif p.__class__ == Comment:
if p.item.__class__ in [PodcastEpisode, TVEpisode]:
Takahe.post_comment(p, self.post_new)
elif p.__class__ == Review:
Takahe.post_review(p, self.post_new)

def process_like(self):
logger.info(f"Processing likes...")
qs = Like.objects.order_by("id")
tracker = tqdm(qs)
for like in tracker:
post_id = like.target.latest_post_id
if post_id:
Takahe.like_post(post_id, like.owner.pk)
else:
logger.warning(f"Post not found for like {like.id}")

def handle(self, *args, **options):
self.verbose = options["verbose"]
self.post_new = options["post_new"]
self.starting_id = int(options["start"])
self.count_est = int(options["count"])

if options["post"]:
self.process_post()

if options["like"]:
self.process_like()

self.stdout.write(self.style.SUCCESS(f"Done."))
17 changes: 12 additions & 5 deletions takahe/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def fetch_webfinger_url(cls, domain: str) -> str:
based on probing host-meta.
"""
with httpx.Client(
timeout=settings.SETUP.REMOTE_TIMEOUT,
timeout=settings.TAKAHE_REMOTE_TIMEOUT,
headers={"User-Agent": settings.TAKAHE_USER_AGENT},
) as client:
try:
Expand Down Expand Up @@ -565,7 +565,7 @@ def fetch_webfinger(cls, handle: str) -> tuple[str | None, str | None]:

# Go make a Webfinger request
with httpx.Client(
timeout=settings.SETUP.REMOTE_TIMEOUT,
timeout=settings.TAKAHE_REMOTE_TIMEOUT,
headers={"User-Agent": settings.TAKAHE_USER_AGENT},
) as client:
try:
Expand Down Expand Up @@ -1066,6 +1066,7 @@ def edit_local(
attachments: list | None = None,
attachment_attributes: list | None = None,
type_data: dict | None = None,
edited: datetime.datetime | None = None,
):
with transaction.atomic():
# Strip all HTML and apply linebreaks filter
Expand Down Expand Up @@ -1099,6 +1100,12 @@ def edit_local(
self.state_changed = timezone.now()
self.state_next_attempt = None
self.state_locked_until = None
if edited and edited < timezone.now():
self.published = edited
if timezone.now() - edited > datetime.timedelta(
days=settings.FANOUT_LIMIT_DAYS
):
self.state = "edited_fanned_out" # add post quietly if it's old
self.save()

@classmethod
Expand All @@ -1109,13 +1116,13 @@ def mentions_from_content(cls, content, author) -> set[Identity]:
handle = handle.lower()
if "@" in handle:
username, domain = handle.split("@", 1)
local = False
else:
username = handle
domain = author.domain_id
local = author.local
identity = Identity.by_username_and_domain(
username=username,
domain=domain,
fetch=True,
username=username, domain=domain, fetch=True, local=local
)
if identity is not None:
mentions.add(identity)
Expand Down
8 changes: 6 additions & 2 deletions takahe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,11 @@ def post(
raise ValueError(f"Cannot find post to reply: {reply_to_pk}")
if post:
post.edit_local(
pre_conetent, content, visibility=visibility, type_data=data
pre_conetent,
content,
visibility=visibility,
type_data=data,
edited=post_time,
)
else:
post = Post.create_local(
Expand Down Expand Up @@ -435,7 +439,7 @@ def post_comment(comment, share_as_new_post: bool) -> Post | None:
comment.visibility, user.preference.mastodon_publish_public
)
existing_post = None if share_as_new_post else comment.latest_post
post = Takahe.post( # TODO post as Article?
post = Takahe.post(
comment.owner.pk,
pre_conetent,
content,
Expand Down
Loading

0 comments on commit f89479c

Please sign in to comment.