From 27eb7a6e3e71551599ca2d3023f1f132bcda050b Mon Sep 17 00:00:00 2001 From: Mark Street Date: Mon, 18 May 2026 14:25:40 +0100 Subject: [PATCH] Move housekeeping into coreapp and cleanup more stuff --- backend/coreapp/housekeeping.py | 174 ++++++++++++++ backend/coreapp/tests/test_housekeeping.py | 253 +++++++++++++++++++++ backend/housekeeping.py | 116 +--------- 3 files changed, 430 insertions(+), 113 deletions(-) create mode 100644 backend/coreapp/housekeeping.py create mode 100644 backend/coreapp/tests/test_housekeeping.py mode change 100755 => 100644 backend/housekeeping.py diff --git a/backend/coreapp/housekeeping.py b/backend/coreapp/housekeeping.py new file mode 100644 index 000000000..ec1a882ae --- /dev/null +++ b/backend/coreapp/housekeeping.py @@ -0,0 +1,174 @@ +import datetime +from typing import Protocol + +from django.apps import apps +from django.db.models import Exists, F, Manager, Model, OuterRef, Q, QuerySet, Subquery + + +class DjangoModel(Protocol): + objects: Manager[Model] + + +def get_model(model_name: str) -> type[DjangoModel]: + return apps.get_model("coreapp", model_name.capitalize()) + + +def perform_delete(qs: QuerySet[Model], dry_run: bool = False) -> int: + count = qs.count() + if dry_run: + return count + + deleted, _ = qs.delete() + return deleted + + +def remove_ownerless_scratches( + cutoff_datetime: datetime.datetime, dry_run: bool = False +) -> int: + Scratch = get_model("Scratch") + + to_delete = Scratch.objects.filter( + owner__isnull=True, creation_time__lt=cutoff_datetime + ) + return perform_delete(to_delete, dry_run=dry_run) + + +def remove_anonymous_profiles( + cutoff_datetime: datetime.datetime, dry_run: bool = False +) -> int: + Profile = get_model("Profile") + Scratch = get_model("Scratch") + + to_delete = Profile.objects.annotate( + has_scratch=Exists(Scratch.objects.filter(owner=OuterRef("pk"))) + ).filter(user__isnull=True, creation_date__lt=cutoff_datetime, has_scratch=False) + return perform_delete(to_delete, dry_run=dry_run) + + +def remove_orphan_contexts( + cutoff_datetime: datetime.datetime, dry_run: bool = False +) -> int: + Context = get_model("Context") + Scratch = get_model("Scratch") + + to_delete = Context.objects.annotate( + has_scratch=Exists(Scratch.objects.filter(context_fk=OuterRef("pk"))) + ).filter(has_scratch=False) + + return perform_delete(to_delete, dry_run=dry_run) + + +def remove_orphan_assemblies( + cutoff_datetime: datetime.datetime, dry_run: bool = False +) -> int: + Assembly = get_model("Assembly") + Scratch = get_model("Scratch") + + to_delete = Assembly.objects.annotate( + has_scratch=Exists(Scratch.objects.filter(target_assembly=OuterRef("pk"))) + ).filter(has_scratch=False) + + return perform_delete(to_delete, dry_run=dry_run) + + +def remove_orphan_asms( + cutoff_datetime: datetime.datetime, dry_run: bool = False +) -> int: + Asm = get_model("Asm") + Assembly = get_model("Assembly") + + to_delete = Asm.objects.annotate( + has_assembly=Exists(Assembly.objects.filter(source_asm=OuterRef("pk"))) + ).filter(has_assembly=False) + + return perform_delete(to_delete, dry_run=dry_run) + + +def unchanged_forks(cutoff_datetime: datetime.datetime) -> QuerySet[Model]: + Scratch = get_model("Scratch") + + return ( + Scratch.objects.filter( + parent__isnull=False, + children__isnull=True, + last_updated__lt=cutoff_datetime, + ) + .annotate( + parent_source=Subquery( + Scratch.objects.filter(pk=OuterRef("parent_id")).values("source_code")[ + :1 + ] + ), + parent_context=Subquery( + Scratch.objects.filter(pk=OuterRef("parent_id")).values("context_fk")[ + :1 + ] + ), + parent_compiler=Subquery( + Scratch.objects.filter(pk=OuterRef("parent_id")).values("compiler")[:1] + ), + parent_compiler_flags=Subquery( + Scratch.objects.filter(pk=OuterRef("parent_id")).values( + "compiler_flags" + )[:1] + ), + parent_diff_flags=Subquery( + Scratch.objects.filter(pk=OuterRef("parent_id")).values("diff_flags")[ + :1 + ] + ), + parent_preset=Subquery( + Scratch.objects.filter(pk=OuterRef("parent_id")).values("preset")[:1] + ), + ) + .filter( + source_code=F("parent_source"), + compiler=F("parent_compiler"), + compiler_flags=F("parent_compiler_flags"), + diff_flags=F("parent_diff_flags"), + ) + .filter( + Q(context_fk_id=F("parent_context")) + | Q(context_fk__isnull=True, parent_context__isnull=True), + Q(preset_id=F("parent_preset")) + | Q(preset__isnull=True, parent_preset__isnull=True), + ) + ) + + +def remove_unchanged_anonymous_forks( + cutoff_datetime: datetime.datetime, dry_run: bool = False +) -> int: + to_delete = unchanged_forks(cutoff_datetime).filter( + Q(owner__isnull=True) | Q(owner__user__isnull=True) + ) + return perform_delete(to_delete, dry_run=dry_run) + + +def remove_unchanged_same_author_forks( + cutoff_datetime: datetime.datetime, dry_run: bool = False +) -> int: + Scratch = get_model("Scratch") + + to_delete = ( + unchanged_forks(cutoff_datetime) + .filter(owner__isnull=False, owner__user__isnull=False) + .annotate( + parent_owner=Subquery( + Scratch.objects.filter(pk=OuterRef("parent_id")).values("owner")[:1] + ), + ) + .filter(owner_id=F("parent_owner")) + ) + return perform_delete(to_delete, dry_run=dry_run) + + +HOUSEKEEPING_TASKS = [ + ("Owner-less Scratches", remove_ownerless_scratches), + ("Scratch-less Profiles", remove_anonymous_profiles), + ("Orphan Contexts", remove_orphan_contexts), + ("Orphan Assemblies", remove_orphan_assemblies), + ("Orphan Asms", remove_orphan_asms), + ("Unchanged Anonymous Forks", remove_unchanged_anonymous_forks), + ("Unchanged Same-Author Forks", remove_unchanged_same_author_forks), +] diff --git a/backend/coreapp/tests/test_housekeeping.py b/backend/coreapp/tests/test_housekeeping.py new file mode 100644 index 000000000..8f595b4cf --- /dev/null +++ b/backend/coreapp/tests/test_housekeeping.py @@ -0,0 +1,253 @@ +import datetime + +from django.contrib.auth.models import User +from django.test import TestCase + +from coreapp.housekeeping import ( + remove_anonymous_profiles, + remove_orphan_asms, + remove_orphan_assemblies, + remove_orphan_contexts, + remove_ownerless_scratches, + remove_unchanged_anonymous_forks, + remove_unchanged_same_author_forks, +) +from coreapp.models.profile import Profile +from coreapp.models.scratch import Asm, Assembly, Context, Scratch + + +class HousekeepingTests(TestCase): + def setUp(self) -> None: + self.cutoff_datetime = datetime.datetime.now(datetime.UTC) - datetime.timedelta( + days=1 + ) + asm = Asm.objects.create(hash="target-asm", data="jr $ra\nnop") + self.assembly = Assembly.objects.create( + hash="target-assembly", + arch="mips", + source_asm=asm, + ) + context = Context.get_or_create_from_text("int x;") + assert context is not None + self.context = context + self.foo = self.create_profile("foo") + self.bar = self.create_profile("bar") + + def create_assembly(self, key: str, asm: Asm | None = None) -> Assembly: + source_asm = asm or Asm.objects.create(hash=f"{key}-asm", data="jr $ra\nnop") + return Assembly.objects.create( + hash=f"{key}-assembly", + arch="mips", + source_asm=source_asm, + ) + + def create_profile(self, username: str) -> Profile: + user = User.objects.create_user(username=username) + return Profile.objects.create(user=user) + + def create_scratch( + self, + *, + owner: Profile | None, + parent: Scratch | None = None, + source_code: str = "int func(void) { return 0; }", + context: Context | None = None, + compiler: str = "dummy", + compiler_flags: str = "-O2", + diff_flags: list[str] | None = None, + ) -> Scratch: + return Scratch.objects.create( + owner=owner, + parent=parent, + target_assembly=self.assembly, + source_code=source_code, + context_fk=context if context is not None else self.context, + compiler=compiler, + compiler_flags=compiler_flags, + diff_flags=diff_flags if diff_flags is not None else ["-m"], + platform="dummy", + ) + + def make_old(self, *scratches: Scratch) -> None: + Scratch.objects.filter(pk__in=[scratch.pk for scratch in scratches]).update( + last_updated=self.cutoff_datetime - datetime.timedelta(seconds=1) + ) + + def test_removes_ownerless_scratches_created_before_cutoff(self) -> None: + old_ownerless_scratch = self.create_scratch(owner=None) + old_owned_scratch = self.create_scratch(owner=self.foo) + new_ownerless_scratch = self.create_scratch(owner=None) + Scratch.objects.filter( + pk__in=[old_ownerless_scratch.pk, old_owned_scratch.pk] + ).update(creation_time=self.cutoff_datetime - datetime.timedelta(seconds=1)) + + deleted = remove_ownerless_scratches(self.cutoff_datetime) + + self.assertEqual(deleted, 1) + self.assertFalse(Scratch.objects.filter(pk=old_ownerless_scratch.pk).exists()) + self.assertTrue(Scratch.objects.filter(pk=old_owned_scratch.pk).exists()) + self.assertTrue(Scratch.objects.filter(pk=new_ownerless_scratch.pk).exists()) + + def test_dry_run_counts_ownerless_scratches_without_deleting(self) -> None: + old_ownerless_scratch = self.create_scratch(owner=None) + Scratch.objects.filter(pk=old_ownerless_scratch.pk).update( + creation_time=self.cutoff_datetime - datetime.timedelta(seconds=1) + ) + + deleted = remove_ownerless_scratches(self.cutoff_datetime, dry_run=True) + + self.assertEqual(deleted, 1) + self.assertTrue(Scratch.objects.filter(pk=old_ownerless_scratch.pk).exists()) + + def test_removes_scratchless_anonymous_profiles_created_before_cutoff(self) -> None: + old_scratchless_profile = Profile.objects.create(user=None) + old_profile_with_scratch = Profile.objects.create(user=None) + new_scratchless_profile = Profile.objects.create(user=None) + self.create_scratch(owner=old_profile_with_scratch) + Profile.objects.filter( + pk__in=[old_scratchless_profile.pk, old_profile_with_scratch.pk] + ).update(creation_date=self.cutoff_datetime - datetime.timedelta(seconds=1)) + + deleted = remove_anonymous_profiles(self.cutoff_datetime) + + self.assertEqual(deleted, 1) + self.assertFalse(Profile.objects.filter(pk=old_scratchless_profile.pk).exists()) + self.assertTrue(Profile.objects.filter(pk=old_profile_with_scratch.pk).exists()) + self.assertTrue(Profile.objects.filter(pk=new_scratchless_profile.pk).exists()) + self.assertTrue(Profile.objects.filter(pk=self.foo.pk).exists()) + + def test_removes_orphan_contexts(self) -> None: + orphan_context = Context.get_or_create_from_text("orphan context") + used_context = Context.get_or_create_from_text("used context") + assert orphan_context is not None + assert used_context is not None + self.create_scratch(owner=self.foo, context=used_context) + + deleted = remove_orphan_contexts(self.cutoff_datetime) + + self.assertEqual(deleted, 2) + self.assertFalse(Context.objects.filter(pk=orphan_context.pk).exists()) + self.assertFalse(Context.objects.filter(pk=self.context.pk).exists()) + self.assertTrue(Context.objects.filter(pk=used_context.pk).exists()) + + def test_removes_orphan_assemblies(self) -> None: + orphan_assembly = self.create_assembly("orphan") + live_assembly = self.create_assembly("live") + self.create_scratch(owner=self.foo).target_assembly = live_assembly + Scratch.objects.filter(owner=self.foo).update(target_assembly=live_assembly) + + deleted = remove_orphan_assemblies(self.cutoff_datetime) + + self.assertEqual(deleted, 2) + self.assertFalse(Assembly.objects.filter(pk=orphan_assembly.pk).exists()) + self.assertFalse(Assembly.objects.filter(pk=self.assembly.pk).exists()) + self.assertTrue(Assembly.objects.filter(pk=live_assembly.pk).exists()) + + def test_removes_orphan_asms(self) -> None: + orphan_asm = Asm.objects.create(hash="orphan-asm", data="jr $ra\nnop") + used_asm = Asm.objects.create(hash="used-asm", data="jr $ra\nnop") + self.create_assembly("used", asm=used_asm) + + deleted = remove_orphan_asms(self.cutoff_datetime) + + self.assertEqual(deleted, 1) + self.assertFalse(Asm.objects.filter(pk=orphan_asm.pk).exists()) + self.assertTrue(Asm.objects.filter(pk=used_asm.pk).exists()) + + def test_orphan_asm_cleanup_follows_orphan_assembly_cleanup(self) -> None: + orphan_assembly = self.create_assembly("orphan") + source_asm = orphan_assembly.source_asm + assert source_asm is not None + + deleted_asms_before_assemblies = remove_orphan_asms(self.cutoff_datetime) + deleted_assemblies = remove_orphan_assemblies(self.cutoff_datetime) + deleted_asms_after_assemblies = remove_orphan_asms(self.cutoff_datetime) + + self.assertEqual(deleted_asms_before_assemblies, 0) + self.assertEqual(deleted_assemblies, 2) + self.assertEqual(deleted_asms_after_assemblies, 2) + self.assertFalse(Assembly.objects.filter(pk=orphan_assembly.pk).exists()) + self.assertFalse(Asm.objects.filter(pk=source_asm.pk).exists()) + + def test_removes_unchanged_same_author_fork(self) -> None: + root = self.create_scratch(owner=self.foo) + fork = self.create_scratch(owner=self.bar, parent=root) + unchanged_same_author_fork = self.create_scratch(owner=self.bar, parent=fork) + self.make_old(root, fork, unchanged_same_author_fork) + + deleted = remove_unchanged_same_author_forks(self.cutoff_datetime) + + self.assertEqual(deleted, 1) + self.assertTrue(Scratch.objects.filter(pk=root.pk).exists()) + self.assertTrue(Scratch.objects.filter(pk=fork.pk).exists()) + self.assertFalse( + Scratch.objects.filter(pk=unchanged_same_author_fork.pk).exists() + ) + + def test_keeps_unchanged_fork_with_different_author(self) -> None: + root = self.create_scratch(owner=self.foo) + fork = self.create_scratch(owner=self.bar, parent=root) + self.make_old(root, fork) + + deleted = remove_unchanged_same_author_forks(self.cutoff_datetime) + + self.assertEqual(deleted, 0) + self.assertTrue(Scratch.objects.filter(pk=fork.pk).exists()) + + def test_keeps_same_author_fork_with_changes(self) -> None: + root = self.create_scratch(owner=self.foo) + fork = self.create_scratch( + owner=self.foo, + parent=root, + compiler_flags="-O2 -funroll-loops", + ) + self.make_old(root, fork) + + deleted = remove_unchanged_same_author_forks(self.cutoff_datetime) + + self.assertEqual(deleted, 0) + self.assertTrue(Scratch.objects.filter(pk=fork.pk).exists()) + + def test_keeps_unchanged_same_author_fork_with_children(self) -> None: + root = self.create_scratch(owner=self.foo) + fork = self.create_scratch(owner=self.foo, parent=root) + child = self.create_scratch( + owner=self.foo, + parent=fork, + source_code="int func(void) { return 1; }", + ) + self.make_old(root, fork, child) + + deleted = remove_unchanged_same_author_forks(self.cutoff_datetime) + + self.assertEqual(deleted, 0) + self.assertTrue(Scratch.objects.filter(pk=fork.pk).exists()) + self.assertTrue(Scratch.objects.filter(pk=child.pk).exists()) + + def test_existing_anonymous_fork_cleanup_still_removes_unchanged_forks( + self, + ) -> None: + root = self.create_scratch(owner=self.foo) + fork = self.create_scratch(owner=None, parent=root) + self.make_old(root, fork) + + deleted = remove_unchanged_anonymous_forks(self.cutoff_datetime) + + self.assertEqual(deleted, 1) + self.assertFalse(Scratch.objects.filter(pk=fork.pk).exists()) + + def test_keeps_unchanged_anonymous_fork_with_children(self) -> None: + root = self.create_scratch(owner=self.foo) + fork = self.create_scratch(owner=None, parent=root) + child = self.create_scratch( + owner=None, + parent=fork, + source_code="int func(void) { return 1; }", + ) + self.make_old(root, fork, child) + + deleted = remove_unchanged_anonymous_forks(self.cutoff_datetime) + + self.assertEqual(deleted, 0) + self.assertTrue(Scratch.objects.filter(pk=fork.pk).exists()) + self.assertTrue(Scratch.objects.filter(pk=child.pk).exists()) diff --git a/backend/housekeeping.py b/backend/housekeeping.py old mode 100755 new mode 100644 index 64c96732b..e1403fda8 --- a/backend/housekeeping.py +++ b/backend/housekeeping.py @@ -3,115 +3,8 @@ import argparse import datetime import os -from typing import Protocol import django -from django.apps import apps -from django.db.models import Exists, F, Manager, Model, OuterRef, Q, QuerySet, Subquery - - -class DjangoModel(Protocol): - objects: Manager[Model] - - -def get_model(model_name: str) -> type[DjangoModel]: - return apps.get_model("coreapp", model_name.capitalize()) - - -def perform_delete(qs: QuerySet[Model], dry_run: bool = False) -> int: - count = qs.count() - if dry_run: - return count - - deleted, _ = qs.delete() - return deleted - - -def remove_ownerless_scratches( - cutoff_datetime: datetime.datetime, dry_run: bool = False -) -> int: - Scratch = get_model("Scratch") - - to_delete = Scratch.objects.filter( - owner__isnull=True, creation_time__lt=cutoff_datetime - ) - return perform_delete(to_delete, dry_run=dry_run) - - -def remove_anonymous_profiles( - cutoff_datetime: datetime.datetime, dry_run: bool = False -) -> int: - Profile = get_model("Profile") - Scratch = get_model("Scratch") - - to_delete = Profile.objects.annotate( - has_scratch=Exists(Scratch.objects.filter(owner=OuterRef("pk"))) - ).filter(user__isnull=True, creation_date__lt=cutoff_datetime, has_scratch=False) - return perform_delete(to_delete, dry_run=dry_run) - - -def remove_orphan_contexts( - cutoff_datetime: datetime.datetime, dry_run: bool = False -) -> int: - Context = get_model("Context") - Scratch = get_model("Scratch") - - to_delete = Context.objects.annotate( - has_scratch=Exists(Scratch.objects.filter(context_fk=OuterRef("pk"))) - ).filter(has_scratch=False) - - return perform_delete(to_delete, dry_run=dry_run) - - -def remove_unchanged_anonymous_forks( - cutoff_datetime: datetime.datetime, dry_run: bool = False -) -> int: - Scratch = get_model("Scratch") - - to_delete = ( - Scratch.objects.filter( - parent__isnull=False, - last_updated__lt=cutoff_datetime, - ) - .filter(Q(owner__isnull=True) | Q(owner__user__isnull=True)) - .annotate( - parent_source=Subquery( - Scratch.objects.filter(pk=OuterRef("parent_id")).values("source_code")[ - :1 - ] - ), - parent_context=Subquery( - Scratch.objects.filter(pk=OuterRef("parent_id")).values("context_fk")[ - :1 - ] - ), - parent_compiler=Subquery( - Scratch.objects.filter(pk=OuterRef("parent_id")).values("compiler")[:1] - ), - parent_compiler_flags=Subquery( - Scratch.objects.filter(pk=OuterRef("parent_id")).values( - "compiler_flags" - )[:1] - ), - parent_diff_flags=Subquery( - Scratch.objects.filter(pk=OuterRef("parent_id")).values("diff_flags")[ - :1 - ] - ), - parent_preset=Subquery( - Scratch.objects.filter(pk=OuterRef("parent_id")).values("preset")[:1] - ), - ) - .filter( - source_code=F("parent_source"), - context_fk=F("parent_context"), - compiler=F("parent_compiler"), - compiler_flags=F("parent_compiler_flags"), - diff_flags=F("parent_diff_flags"), - preset=F("parent_preset"), - ) - ) - return perform_delete(to_delete, dry_run=dry_run) def main() -> None: @@ -134,6 +27,8 @@ def main() -> None: os.environ.setdefault("DJANGO_SETTINGS_MODULE", "decompme.settings") django.setup() + from coreapp.housekeeping import HOUSEKEEPING_TASKS + dry_run: bool = args.dry_run cutoff_days: int = args.cutoff_days assert cutoff_days >= 1, "--cutoff-days must be >= 1" @@ -144,12 +39,7 @@ def main() -> None: prefix = "[DRYRUN] " if dry_run else "" - for text, func in [ - ("Owner-less Scratches", remove_ownerless_scratches), - ("Scratch-less Profiles", remove_anonymous_profiles), - ("Orphan Contexts", remove_orphan_contexts), - ("Unchanged Anonymous Forks", remove_unchanged_anonymous_forks), - ]: + for text, func in HOUSEKEEPING_TASKS: print(f"{prefix}Cleaning up {text}... ", end="") deleted = func(cutoff_datetime, dry_run=dry_run) if deleted: