diff --git a/fs_attachment_s3/README.rst b/fs_attachment_s3/README.rst index 86c9c546ef..ba96c4fafd 100644 --- a/fs_attachment_s3/README.rst +++ b/fs_attachment_s3/README.rst @@ -155,6 +155,7 @@ Contributors - Laurent Mignon laurent.mignon@acsone.eu (https://www.acsone.eu) - Stéphane Bidoul stephane.bidoul@acsone.eu (https://www.acsone.eu) +- Antoni Marroig amarroig@apsl.net (https://apsl.tech) Other credits ------------- diff --git a/fs_attachment_s3/models/__init__.py b/fs_attachment_s3/models/__init__.py index 45a28cbdca..2ed0909478 100644 --- a/fs_attachment_s3/models/__init__.py +++ b/fs_attachment_s3/models/__init__.py @@ -1,2 +1,3 @@ from . import fs_storage from . import ir_attachment +from . import fs_file_gc diff --git a/fs_attachment_s3/models/fs_file_gc.py b/fs_attachment_s3/models/fs_file_gc.py new file mode 100644 index 0000000000..c849ee14f7 --- /dev/null +++ b/fs_attachment_s3/models/fs_file_gc.py @@ -0,0 +1,105 @@ +# Copyright 2026 APSL-Nagarro Antoni Marroig +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). + +import gc +import logging + +from odoo import models + +_logger = logging.getLogger(__name__) + + +class FsFileGc(models.Model): + _inherit = "fs.file.gc" + + # S3 bulk delete limit is 1000 objects per request + _S3_GC_BATCH_SIZE = 1000 + + def _gc_files_unsafe(self) -> None: + """ + Overrides to optimize S3 storage using bulk deletes and + delegates other storage types to the super() method. + """ + # 1. HEAVY LIFTING: Clean up S3 storages first using mass deletion + self._gc_s3_bulk_delete() + + # 2. DELEGATION: Call super() for any remaining storage types + # Since S3 records have already been removed from fs_file_gc, + # super() will only process remaining types (Filestore, SFTP, etc.) + # without hitting timeouts. + return super()._gc_files_unsafe() + + def _gc_s3_bulk_delete(self): + """ + Specific logic for S3 using Boto3 Batch. + Filters storages in memory since autovacuum_gc is not a stored field. + """ + # Search for all storages and filter by the computed field in Python + all_storages = self.env["fs.storage"].search([]) + # We filter those with autovacuum active and verify it's an S3-based path + storages_to_gc = all_storages.filtered( + lambda s: s.autovacuum_gc and s.is_s3_storage + ) + + for storage in storages_to_gc: + code = storage.code + + try: + # directory_path usually contains the bucket name in S3 configurations + bucket_name = storage.get_directory_path().strip("/") + root_fs = storage._get_root_filesystem() + + s3_client = root_fs.s3 + + if not s3_client or not bucket_name: + _logger.warning( + "GC: Could not retrieve S3 client or Bucket name for %s", code + ) + continue + + _logger.info( + "GC: Starting optimized S3 cleanup for %s in bucket %s", + code, + bucket_name, + ) + + while fnames := self._list_pending_gc_files(code, self._GC_BATCH_SIZE): + # Prepare the keys by removing the protocol prefix (e.g., s3://) + objects_to_delete = [{"Key": f.partition("://")[2]} for f in fnames] + + try: + # Perform mass deletion (1 HTTP request per 1000 files) + _logger.info( + "GC: Sending bulk delete request to S3 (%s files)", + len(objects_to_delete), + ) + s3_client.delete_objects( + Bucket=bucket_name, Delete={"Objects": objects_to_delete} + ) + + # Mass delete from database + self._cr.execute( + "DELETE FROM fs_file_gc WHERE store_fname = ANY(%s)", + (fnames,), + ) + + # Commit per batch: releases locks and ensures progress is saved + self._cr.commit() # pylint: disable=invalid-commit + except Exception as e: + _logger.error( + "GC Error: Failed S3 delete_objects for %s: %s", code, e + ) + # We break the loop for this storage + # to prevent DB deletion if S3 fails + break + + # Explicit memory cleanup + gc.collect() + + except Exception as e: + _logger.error( + "GC Error: Could not initialize S3 storage %s: %s", code, e + ) + continue + + _logger.info("GC: Optimized S3 cleanup process finished.") diff --git a/fs_attachment_s3/readme/CONTRIBUTORS.md b/fs_attachment_s3/readme/CONTRIBUTORS.md index 06d49341ab..b738845804 100644 --- a/fs_attachment_s3/readme/CONTRIBUTORS.md +++ b/fs_attachment_s3/readme/CONTRIBUTORS.md @@ -1,2 +1,3 @@ - Laurent Mignon (https://www.acsone.eu) - Stéphane Bidoul (https://www.acsone.eu) +- Antoni Marroig (https://apsl.tech) diff --git a/fs_attachment_s3/static/description/index.html b/fs_attachment_s3/static/description/index.html index b9f0570d73..22f2a073f2 100644 --- a/fs_attachment_s3/static/description/index.html +++ b/fs_attachment_s3/static/description/index.html @@ -505,6 +505,7 @@

Contributors

diff --git a/fs_attachment_s3/tests/__init__.py b/fs_attachment_s3/tests/__init__.py index a788fc4772..7102627b74 100644 --- a/fs_attachment_s3/tests/__init__.py +++ b/fs_attachment_s3/tests/__init__.py @@ -1 +1,2 @@ from . import test_fs_attachment_s3 +from . import test_fs_file_gc diff --git a/fs_attachment_s3/tests/test_fs_file_gc.py b/fs_attachment_s3/tests/test_fs_file_gc.py new file mode 100644 index 0000000000..685ee28902 --- /dev/null +++ b/fs_attachment_s3/tests/test_fs_file_gc.py @@ -0,0 +1,80 @@ +# Copyright 2026 APSL-Nagarro Antoni Marroig +# License AGPL-3.0 or later (https://www.gnu.org/licenses/agpl). + +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +from odoo.tools import mute_logger + +from .common import TestFSAttachmentS3Common + + +class TestFsFileGcS3(TestFSAttachmentS3Common): + def setUp(self): + super().setUp() + self.gc_file_model = self.env["fs.file.gc"] + + def _mark_for_gc(self, *store_fnames): + for store_fname in store_fnames: + self.gc_file_model._mark_for_gc(store_fname) + + def test_gc_s3_bulk_delete_removes_orphaned_files(self): + orphan_1 = "s3tst://dir/sub/orphan_1.txt" + orphan_2 = "s3tst://dir/sub/orphan_2.txt" + referenced = self.fake_attachment_s3.store_fname + self._mark_for_gc(orphan_1, orphan_2, referenced) + + s3_client = MagicMock() + root_fs = SimpleNamespace(s3=s3_client) + + storage_class = type(self.s3_backend) + with ( + patch.object( + storage_class, + "is_s3_storage", + new=property(lambda storage: storage.code == self.s3_backend.code), + ), + patch.object(storage_class, "_get_root_filesystem", return_value=root_fs), + patch.object(type(self.env.cr), "commit", return_value=None), + ): + self.gc_file_model._gc_s3_bulk_delete() + + s3_client.delete_objects.assert_called_once() + _, kwargs = s3_client.delete_objects.call_args + self.assertEqual(kwargs["Bucket"], "test-bucket") + self.assertCountEqual( + kwargs["Delete"]["Objects"], + [ + {"Key": "dir/sub/orphan_1.txt"}, + {"Key": "dir/sub/orphan_2.txt"}, + ], + ) + remaining_files = self.gc_file_model.search( + [("store_fname", "in", [orphan_1, orphan_2, referenced])] + ).mapped("store_fname") + self.assertNotIn(orphan_1, remaining_files) + self.assertNotIn(orphan_2, remaining_files) + self.assertIn(referenced, remaining_files) + + def test_gc_s3_bulk_delete_keeps_rows_when_s3_delete_fails(self): + orphan = "s3tst://dir/sub/orphan.txt" + self._mark_for_gc(orphan) + + s3_client = MagicMock() + s3_client.delete_objects.side_effect = Exception("S3 is unavailable") + root_fs = SimpleNamespace(s3=s3_client) + + storage_class = type(self.s3_backend) + with ( + patch.object( + storage_class, + "is_s3_storage", + new=property(lambda storage: storage.code == self.s3_backend.code), + ), + patch.object(storage_class, "_get_root_filesystem", return_value=root_fs), + patch.object(type(self.env.cr), "commit", return_value=None), + mute_logger("odoo.addons.fs_attachment_s3.models.fs_file_gc"), + ): + self.gc_file_model._gc_s3_bulk_delete() + + self.assertTrue(self.gc_file_model.search_count([("store_fname", "=", orphan)])) diff --git a/test-requirements.txt b/test-requirements.txt index 4ad8e0ecea..0691519621 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1 +1,3 @@ odoo-test-helper + +odoo-addon-fs-attachment @ git+https://github.com/OCA/storage.git@refs/pull/610/head#subdirectory=setup/fs_attachment