From 838e75738942f589c31f3b4987d37f0cd28234af Mon Sep 17 00:00:00 2001 From: Antoni Marroig Campomar Date: Tue, 28 Apr 2026 14:33:12 +0200 Subject: [PATCH 1/3] [IMP] fs_attachment_s3: Optimize clean up storages using mass deletion --- fs_attachment_s3/README.rst | 1 + fs_attachment_s3/models/__init__.py | 1 + fs_attachment_s3/models/fs_file_gc.py | 125 ++++++++++++++++++ fs_attachment_s3/readme/CONTRIBUTORS.md | 1 + .../static/description/index.html | 1 + fs_attachment_s3/tests/__init__.py | 1 + fs_attachment_s3/tests/test_fs_file_gc.py | 77 +++++++++++ 7 files changed, 207 insertions(+) create mode 100644 fs_attachment_s3/models/fs_file_gc.py create mode 100644 fs_attachment_s3/tests/test_fs_file_gc.py diff --git a/fs_attachment_s3/README.rst b/fs_attachment_s3/README.rst index 86c9c546ef..ba96c4fafd 100644 --- a/fs_attachment_s3/README.rst +++ b/fs_attachment_s3/README.rst @@ -155,6 +155,7 @@ Contributors - Laurent Mignon laurent.mignon@acsone.eu (https://www.acsone.eu) - Stéphane Bidoul stephane.bidoul@acsone.eu (https://www.acsone.eu) +- Antoni Marroig amarroig@apsl.net (https://apsl.tech) Other credits ------------- diff --git a/fs_attachment_s3/models/__init__.py b/fs_attachment_s3/models/__init__.py index 45a28cbdca..2ed0909478 100644 --- a/fs_attachment_s3/models/__init__.py +++ b/fs_attachment_s3/models/__init__.py @@ -1,2 +1,3 @@ from . import fs_storage from . import ir_attachment +from . import fs_file_gc diff --git a/fs_attachment_s3/models/fs_file_gc.py b/fs_attachment_s3/models/fs_file_gc.py new file mode 100644 index 0000000000..8baf99594a --- /dev/null +++ b/fs_attachment_s3/models/fs_file_gc.py @@ -0,0 +1,125 @@ +# Copyright 2026 APSL-Nagarro Antoni Marroig +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). + +import gc +import logging + +from odoo import models + +_logger = logging.getLogger(__name__) + + +class FsFileGc(models.Model): + _inherit = "fs.file.gc" + + # S3 bulk delete limit is 1000 objects per request + _GC_BATCH_SIZE = 1000 + + def _gc_files_unsafe(self) -> None: + """ + Overrides to optimize S3 storage using bulk deletes and + delegates other storage types to the super() method. + """ + # 1. HEAVY LIFTING: Clean up S3 storages first using mass deletion + self._gc_s3_bulk_delete() + + # 2. DELEGATION: Call super() for any remaining storage types + # Since S3 records have already been removed from fs_file_gc, + # super() will only process remaining types (Filestore, SFTP, etc.) + # without hitting timeouts. + return super()._gc_files_unsafe() + + def _gc_s3_bulk_delete(self): + """ + Specific logic for S3 using Boto3 Batch. + Filters storages in memory since autovacuum_gc is not a stored field. + """ + # Search for all storages and filter by the computed field in Python + all_storages = self.env["fs.storage"].search([]) + # We filter those with autovacuum active and verify it's an S3-based path + storages_to_gc = all_storages.filtered( + lambda s: s.autovacuum_gc and s.is_s3_storage + ) + + for storage in storages_to_gc: + code = storage.code + + try: + # directory_path usually contains the bucket name in S3 configurations + bucket_name = storage.get_directory_path().strip("/") + root_fs = storage._get_root_filesystem() + + s3_client = root_fs.s3 + + if not s3_client or not bucket_name: + _logger.warning( + "GC: Could not retrieve S3 client or Bucket name for %s", code + ) + continue + + _logger.info( + "GC: Starting optimized S3 cleanup for %s in bucket %s", + code, + bucket_name, + ) + + while True: + # Select the next batch of orphaned files + self._cr.execute( + """ + SELECT store_fname + FROM fs_file_gc + WHERE fs_storage_code = %s + AND NOT EXISTS ( + SELECT 1 FROM ir_attachment + WHERE store_fname = fs_file_gc.store_fname + ) + LIMIT %s + """, + (code, self._GC_BATCH_SIZE), + ) + + rows = self._cr.fetchall() + if not rows: + break + + fnames = [row[0] for row in rows] + # Prepare the keys by removing the protocol prefix (e.g., s3://) + objects_to_delete = [{"Key": f.partition("://")[2]} for f in fnames] + + try: + # Perform mass deletion (1 HTTP request per 1000 files) + _logger.info( + "GC: Sending bulk delete request to S3 (%s files)", + len(objects_to_delete), + ) + s3_client.delete_objects( + Bucket=bucket_name, Delete={"Objects": objects_to_delete} + ) + + # Mass delete from database + self._cr.execute( + "DELETE FROM fs_file_gc WHERE store_fname = ANY(%s)", + (fnames,), + ) + + # Commit per batch: releases locks and ensures progress is saved + self._cr.commit() # pylint: disable=invalid-commit + except Exception as e: + _logger.error( + "GC Error: Failed S3 delete_objects for %s: %s", code, e + ) + # We break the loop for this storage + # to prevent DB deletion if S3 fails + break + + # Explicit memory cleanup + gc.collect() + + except Exception as e: + _logger.error( + "GC Error: Could not initialize S3 storage %s: %s", code, e + ) + continue + + _logger.info("GC: Optimized S3 cleanup process finished.") diff --git a/fs_attachment_s3/readme/CONTRIBUTORS.md b/fs_attachment_s3/readme/CONTRIBUTORS.md index 06d49341ab..b738845804 100644 --- a/fs_attachment_s3/readme/CONTRIBUTORS.md +++ b/fs_attachment_s3/readme/CONTRIBUTORS.md @@ -1,2 +1,3 @@ - Laurent Mignon (https://www.acsone.eu) - Stéphane Bidoul (https://www.acsone.eu) +- Antoni Marroig (https://apsl.tech) diff --git a/fs_attachment_s3/static/description/index.html b/fs_attachment_s3/static/description/index.html index b9f0570d73..22f2a073f2 100644 --- a/fs_attachment_s3/static/description/index.html +++ b/fs_attachment_s3/static/description/index.html @@ -505,6 +505,7 @@

Contributors

diff --git a/fs_attachment_s3/tests/__init__.py b/fs_attachment_s3/tests/__init__.py index a788fc4772..7102627b74 100644 --- a/fs_attachment_s3/tests/__init__.py +++ b/fs_attachment_s3/tests/__init__.py @@ -1 +1,2 @@ from . import test_fs_attachment_s3 +from . import test_fs_file_gc diff --git a/fs_attachment_s3/tests/test_fs_file_gc.py b/fs_attachment_s3/tests/test_fs_file_gc.py new file mode 100644 index 0000000000..27b0132f58 --- /dev/null +++ b/fs_attachment_s3/tests/test_fs_file_gc.py @@ -0,0 +1,77 @@ +# Copyright 2026 APSL-Nagarro Antoni Marroig +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from .common import TestFSAttachmentS3Common + + +class TestFsFileGcS3(TestFSAttachmentS3Common): + def setUp(self): + super().setUp() + self.gc_file_model = self.env["fs.file.gc"] + + def _mark_for_gc(self, *store_fnames): + for store_fname in store_fnames: + self.gc_file_model._mark_for_gc(store_fname) + + def test_gc_s3_bulk_delete_removes_orphaned_files(self): + orphan_1 = "s3tst://dir/sub/orphan_1.txt" + orphan_2 = "s3tst://dir/sub/orphan_2.txt" + referenced = self.fake_attachment_s3.store_fname + self._mark_for_gc(orphan_1, orphan_2, referenced) + + s3_client = MagicMock() + root_fs = SimpleNamespace(s3=s3_client) + + storage_class = type(self.s3_backend) + with ( + patch.object( + storage_class, + "is_s3_storage", + new=property(lambda storage: storage.code == self.s3_backend.code), + ), + patch.object(storage_class, "_get_root_filesystem", return_value=root_fs), + patch.object(type(self.env.cr), "commit", return_value=None), + ): + self.gc_file_model._gc_s3_bulk_delete() + + s3_client.delete_objects.assert_called_once() + _, kwargs = s3_client.delete_objects.call_args + self.assertEqual(kwargs["Bucket"], "test-bucket") + self.assertCountEqual( + kwargs["Delete"]["Objects"], + [ + {"Key": "dir/sub/orphan_1.txt"}, + {"Key": "dir/sub/orphan_2.txt"}, + ], + ) + remaining_files = self.gc_file_model.search( + [("store_fname", "in", [orphan_1, orphan_2, referenced])] + ).mapped("store_fname") + self.assertNotIn(orphan_1, remaining_files) + self.assertNotIn(orphan_2, remaining_files) + self.assertIn(referenced, remaining_files) + + def test_gc_s3_bulk_delete_keeps_rows_when_s3_delete_fails(self): + orphan = "s3tst://dir/sub/orphan.txt" + self._mark_for_gc(orphan) + + s3_client = MagicMock() + s3_client.delete_objects.side_effect = Exception("S3 is unavailable") + root_fs = SimpleNamespace(s3=s3_client) + + storage_class = type(self.s3_backend) + with ( + patch.object( + storage_class, + "is_s3_storage", + new=property(lambda storage: storage.code == self.s3_backend.code), + ), + patch.object(storage_class, "_get_root_filesystem", return_value=root_fs), + patch.object(type(self.env.cr), "commit", return_value=None), + ): + self.gc_file_model._gc_s3_bulk_delete() + + self.assertTrue(self.gc_file_model.search_count([("store_fname", "=", orphan)])) From 4c8a4606ffb3e90118b7db471085f4fc43691e02 Mon Sep 17 00:00:00 2001 From: "Laurent Mignon (ACSONE)" Date: Wed, 27 May 2026 00:29:03 +0200 Subject: [PATCH 2/3] [IMP] fs_attachment_s3: bulk delete rely on _list_pending_gc_files More code shared with base implementation to list files --- fs_attachment_s3/models/fs_file_gc.py | 24 ++--------------------- fs_attachment_s3/tests/test_fs_file_gc.py | 3 +++ 2 files changed, 5 insertions(+), 22 deletions(-) diff --git a/fs_attachment_s3/models/fs_file_gc.py b/fs_attachment_s3/models/fs_file_gc.py index 8baf99594a..c849ee14f7 100644 --- a/fs_attachment_s3/models/fs_file_gc.py +++ b/fs_attachment_s3/models/fs_file_gc.py @@ -13,7 +13,7 @@ class FsFileGc(models.Model): _inherit = "fs.file.gc" # S3 bulk delete limit is 1000 objects per request - _GC_BATCH_SIZE = 1000 + _S3_GC_BATCH_SIZE = 1000 def _gc_files_unsafe(self) -> None: """ @@ -63,27 +63,7 @@ def _gc_s3_bulk_delete(self): bucket_name, ) - while True: - # Select the next batch of orphaned files - self._cr.execute( - """ - SELECT store_fname - FROM fs_file_gc - WHERE fs_storage_code = %s - AND NOT EXISTS ( - SELECT 1 FROM ir_attachment - WHERE store_fname = fs_file_gc.store_fname - ) - LIMIT %s - """, - (code, self._GC_BATCH_SIZE), - ) - - rows = self._cr.fetchall() - if not rows: - break - - fnames = [row[0] for row in rows] + while fnames := self._list_pending_gc_files(code, self._GC_BATCH_SIZE): # Prepare the keys by removing the protocol prefix (e.g., s3://) objects_to_delete = [{"Key": f.partition("://")[2]} for f in fnames] diff --git a/fs_attachment_s3/tests/test_fs_file_gc.py b/fs_attachment_s3/tests/test_fs_file_gc.py index 27b0132f58..685ee28902 100644 --- a/fs_attachment_s3/tests/test_fs_file_gc.py +++ b/fs_attachment_s3/tests/test_fs_file_gc.py @@ -4,6 +4,8 @@ from types import SimpleNamespace from unittest.mock import MagicMock, patch +from odoo.tools import mute_logger + from .common import TestFSAttachmentS3Common @@ -71,6 +73,7 @@ def test_gc_s3_bulk_delete_keeps_rows_when_s3_delete_fails(self): ), patch.object(storage_class, "_get_root_filesystem", return_value=root_fs), patch.object(type(self.env.cr), "commit", return_value=None), + mute_logger("odoo.addons.fs_attachment_s3.models.fs_file_gc"), ): self.gc_file_model._gc_s3_bulk_delete() From 6afb36c66971015ed26a2a15b64a0a910586aaad Mon Sep 17 00:00:00 2001 From: "Laurent Mignon (ACSONE)" Date: Wed, 27 May 2026 00:30:57 +0200 Subject: [PATCH 3/3] [DO NOT MERGE] test dependencies --- test-requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test-requirements.txt b/test-requirements.txt index 4ad8e0ecea..0691519621 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1 +1,3 @@ odoo-test-helper + +odoo-addon-fs-attachment @ git+https://github.com/OCA/storage.git@refs/pull/610/head#subdirectory=setup/fs_attachment