From 64042b0612ed9bb0c8e033ccfd4fa15c80e51fe8 Mon Sep 17 00:00:00 2001 From: memleakd <121398829+memleakd@users.noreply.github.com> Date: Wed, 1 Jul 2026 11:19:47 +0300 Subject: [PATCH 1/2] feat(model): add stable chunking by primary key - Add chunkById() and chunkRowsById() for primary-key ordered chunking - Reject incompatible builder state like orderBy(), groupBy(), limit(), offset(), and union() - Document ID-based chunking behavior and constraints - Add live model coverage for stable deletion, soft deletes, early exit, and invalid query shapes Signed-off-by: memleakd <121398829+memleakd@users.noreply.github.com> --- system/BaseModel.php | 29 +++ system/Database/BaseBuilder.php | 10 + system/Model.php | 114 +++++++- tests/system/Models/ChunkByIdModelTest.php | 272 ++++++++++++++++++++ user_guide_src/source/changelogs/v4.8.0.rst | 9 +- user_guide_src/source/models/model.rst | 29 +++ user_guide_src/source/models/model/069.php | 7 + user_guide_src/source/models/model/070.php | 7 + 8 files changed, 470 insertions(+), 7 deletions(-) create mode 100644 tests/system/Models/ChunkByIdModelTest.php create mode 100644 user_guide_src/source/models/model/069.php create mode 100644 user_guide_src/source/models/model/070.php diff --git a/system/BaseModel.php b/system/BaseModel.php index 2a826b9dd235..3a1a4e9d49fb 100644 --- a/system/BaseModel.php +++ b/system/BaseModel.php @@ -593,6 +593,19 @@ abstract public function countAllResults(bool $reset = true, bool $test = false) */ abstract public function chunk(int $size, Closure $userFunc); + /** + * Loops over records in batches ordered by the primary key. + * This method works only with DB calls. + * + * @param Closure(array|object): mixed $userFunc + * + * @return void + * + * @throws DataException + * @throws InvalidArgumentException if $size is not a positive integer or the current query cannot be chunked by ID + */ + abstract public function chunkById(int $size, Closure $userFunc); + /** * Loops over records in batches, allowing you to operate on each chunk at a time. * This method works only with DB calls. @@ -609,6 +622,22 @@ abstract public function chunk(int $size, Closure $userFunc); */ abstract public function chunkRows(int $size, Closure $userFunc); + /** + * Loops over records in batches ordered by the primary key, allowing you to operate on each chunk at a time. + * This method works only with DB calls. + * + * This method calls the `$userFunc` with the chunk, instead of a single record as in `chunkById()`. + * This allows you to operate on multiple records at once, which can be more efficient for certain operations. + * + * @param Closure(list>|list): mixed $userFunc + * + * @return void + * + * @throws DataException + * @throws InvalidArgumentException if $size is not a positive integer or the current query cannot be chunked by ID + */ + abstract public function chunkRowsById(int $size, Closure $userFunc); + /** * Fetches the row of database. * diff --git a/system/Database/BaseBuilder.php b/system/Database/BaseBuilder.php index 787d31d5f0ee..d3a2056e5f2a 100644 --- a/system/Database/BaseBuilder.php +++ b/system/Database/BaseBuilder.php @@ -2034,6 +2034,16 @@ public function offset(int $offset) return $this; } + /** + * Checks if the current query has a LIMIT, OFFSET or UNION clause. + * + * @internal This method is for internal Model use only. + */ + public function hasLimitOffsetOrUnion(): bool + { + return $this->QBLimit !== false || $this->QBOffset !== false || $this->QBUnion !== []; + } + /** * Generates a platform-specific LIMIT clause. */ diff --git a/system/Model.php b/system/Model.php index 73a24c35cebf..cea500463e01 100644 --- a/system/Model.php +++ b/system/Model.php @@ -165,6 +165,7 @@ class Model extends BaseModel 'getCompiledInsert', 'getCompiledSelect', 'getCompiledUpdate', + 'hasLimitOffsetOrUnion', ]; public function __construct(?ConnectionInterface $db = null, ?ValidationInterface $validation = null) @@ -599,9 +600,7 @@ private function prepareSoftDeleteQuery(bool $reset): void */ private function iterateChunks(int $size): Generator { - if ($size <= 0) { - throw new InvalidArgumentException('$size must be a positive integer.'); - } + $this->assertValidChunkSize($size); $total = $this->builder()->countAllResults(false); $offset = 0; @@ -626,6 +625,89 @@ private function iterateChunks(int $size): Generator } } + /** + * Iterates over the result set in chunks of the specified size ordered by the primary key. + * + * @param int $size The number of records to retrieve in each chunk. + * + * @return Generator>|list> + */ + private function iterateChunksById(int $size): Generator + { + $this->assertValidChunkSize($size); + + if ($this->primaryKey === '') { + throw new InvalidArgumentException('ID-based chunking requires a primary key.'); + } + + $builder = clone $this->builder(); + $qualifiedPrimaryKey = $this->table . '.' . $this->primaryKey; + $lastPrimaryKey = null; + $hasLastPrimaryKey = false; + + if ($builder->QBOrderBy !== []) { + throw new InvalidArgumentException('ID-based chunking cannot be used with orderBy().'); + } + + if ($builder->QBGroupBy !== []) { + throw new InvalidArgumentException('ID-based chunking cannot be used with groupBy().'); + } + + if ($builder->hasLimitOffsetOrUnion()) { + throw new InvalidArgumentException('ID-based chunking cannot be used with limit(), offset() or union().'); + } + + while (true) { + $chunkBuilder = clone $builder; + + if ($this->tempUseSoftDeletes) { + $chunkBuilder->where($this->table . '.' . $this->deletedField, null); + } + + if ($hasLastPrimaryKey) { + $chunkBuilder->where($qualifiedPrimaryKey . ' >', $lastPrimaryKey); + } + + $rows = $chunkBuilder + ->orderBy($qualifiedPrimaryKey, 'ASC') + ->get($size); + + if (! $rows) { + throw DataException::forEmptyDataset('chunkById'); + } + + $rows = $rows->getResult($this->tempReturnType); + + if ($rows === []) { + return; + } + + $lastPrimaryKey = $this->getIdValue($rows[array_key_last($rows)]); + + if ($lastPrimaryKey === null) { + throw new InvalidArgumentException('The primary key must be selected for ID-based chunking.'); + } + + $hasLastPrimaryKey = true; + + yield $rows; + + if (count($rows) < $size) { + return; + } + } + } + + /** + * Asserts the chunk size is valid. + */ + private function assertValidChunkSize(int $size): void + { + if ($size <= 0) { + throw new InvalidArgumentException('$size must be a positive integer.'); + } + } + /** * {@inheritDoc} */ @@ -640,6 +722,20 @@ public function chunk(int $size, Closure $userFunc) } } + /** + * {@inheritDoc} + */ + public function chunkById(int $size, Closure $userFunc) + { + foreach ($this->iterateChunksById($size) as $rows) { + foreach ($rows as $row) { + if ($userFunc($row) === false) { + return; + } + } + } + } + /** * {@inheritDoc} */ @@ -652,6 +748,18 @@ public function chunkRows(int $size, Closure $userFunc): void } } + /** + * {@inheritDoc} + */ + public function chunkRowsById(int $size, Closure $userFunc): void + { + foreach ($this->iterateChunksById($size) as $rows) { + if ($userFunc($rows) === false) { + return; + } + } + } + /** * Provides a shared instance of the Query Builder. * diff --git a/tests/system/Models/ChunkByIdModelTest.php b/tests/system/Models/ChunkByIdModelTest.php new file mode 100644 index 000000000000..a4db31964232 --- /dev/null +++ b/tests/system/Models/ChunkByIdModelTest.php @@ -0,0 +1,272 @@ + + * + * For the full copyright and license information, please view + * the LICENSE file that was distributed with this source code. + */ + +namespace CodeIgniter\Models; + +use CodeIgniter\Database\BaseBuilder; +use CodeIgniter\Exceptions\InvalidArgumentException; +use CodeIgniter\Model; +use PHPUnit\Framework\Attributes\Group; +use Tests\Support\Models\UserModel; + +/** + * @internal + */ +#[Group('DatabaseLive')] +final class ChunkByIdModelTest extends LiveModelTestCase +{ + public function testChunkById(): void + { + $ids = []; + + $this->createModel(UserModel::class)->chunkById(2, static function ($row) use (&$ids): void { + $ids[] = self::userId($row); + }); + + $this->assertSame([1, 2, 3, 4], $ids); + } + + public function testChunkByIdThrowsOnZeroSize(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('$size must be a positive integer.'); + + $this->createModel(UserModel::class)->chunkById(0, static function ($row): void {}); + } + + public function testChunkByIdThrowsOnNegativeSize(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('$size must be a positive integer.'); + + $this->createModel(UserModel::class)->chunkById(-1, static function ($row): void {}); + } + + public function testChunkByIdEarlyExit(): void + { + $ids = []; + + $this->createModel(UserModel::class)->chunkById(2, static function ($row) use (&$ids): bool { + $ids[] = self::userId($row); + + return false; + }); + + $this->assertSame([1], $ids); + } + + public function testChunkByIdRespectsBuilderConditions(): void + { + $ids = []; + + $this->createModel(UserModel::class) + ->where('country', 'US') + ->chunkById(2, static function ($row) use (&$ids): void { + $ids[] = self::userId($row); + }); + + $this->assertSame([1, 3], $ids); + } + + public function testChunkByIdDoesNotSkipRowsWhenProcessedRowsAreDeleted(): void + { + $ids = []; + + $this->createModel(UserModel::class)->chunkById(2, function ($row) use (&$ids): void { + $id = self::userId($row); + $ids[] = $id; + + if ($id === 1) { + $this->db->table('user')->where('id', 1)->delete(); + } + }); + + $this->assertSame([1, 2, 3, 4], $ids); + } + + public function testChunkByIdPreservesConditionsWhenSameModelDeletesRows(): void + { + $ids = []; + $this->createModel(UserModel::class); + + $this->model + ->where('country', 'US') + ->chunkById(1, function ($row) use (&$ids): void { + $id = self::userId($row); + $ids[] = $id; + + $this->model->delete($id); + }); + + $this->assertSame([1, 3], $ids); + } + + public function testChunkByIdRespectsSoftDeletes(): void + { + $ids = []; + $this->createModel(UserModel::class); + + $this->model->delete(1); + $this->model->chunkById(2, static function ($row) use (&$ids): void { + $ids[] = self::userId($row); + }); + + $this->assertSame([2, 3, 4], $ids); + } + + public function testChunkByIdWithDeleted(): void + { + $ids = []; + $this->createModel(UserModel::class); + + $this->model->delete(1); + $this->model->withDeleted()->chunkById(2, static function ($row) use (&$ids): void { + $ids[] = self::userId($row); + }); + + $this->assertSame([1, 2, 3, 4], $ids); + } + + public function testChunkByIdOnlyDeleted(): void + { + $ids = []; + $this->createModel(UserModel::class); + + $this->model->delete(1); + $this->model->onlyDeleted()->chunkById(2, static function ($row) use (&$ids): void { + $ids[] = self::userId($row); + }); + + $this->assertSame([1], $ids); + } + + public function testChunkByIdThrowsWithOrderBy(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('ID-based chunking cannot be used with orderBy().'); + + $this->createModel(UserModel::class) + ->orderBy('name') + ->chunkById(2, static function ($row): void {}); + } + + public function testChunkByIdThrowsWithGroupBy(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('ID-based chunking cannot be used with groupBy().'); + + $this->createModel(UserModel::class) + ->groupBy('country') + ->chunkById(2, static function ($row): void {}); + } + + public function testChunkByIdThrowsWithLimit(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('ID-based chunking cannot be used with limit(), offset() or union().'); + + $this->createModel(UserModel::class) + ->limit(2) + ->chunkById(2, static function ($row): void {}); + } + + public function testChunkByIdThrowsWithOffset(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('ID-based chunking cannot be used with limit(), offset() or union().'); + + $this->createModel(UserModel::class) + ->offset(1) + ->chunkById(2, static function ($row): void {}); + } + + public function testChunkByIdThrowsWithUnion(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('ID-based chunking cannot be used with limit(), offset() or union().'); + + $model = $this->createModel(UserModel::class); + $model->builder()->union(static fn (BaseBuilder $builder): BaseBuilder => $builder->from('user')); + $model->chunkById(2, static function ($row): void {}); + } + + public function testChunkByIdThrowsWhenPrimaryKeyIsNotSelected(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('The primary key must be selected for ID-based chunking.'); + + $this->createModel(UserModel::class) + ->select('name') + ->chunkById(2, static function ($row): void {}); + } + + public function testChunkByIdThrowsWithoutPrimaryKey(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('ID-based chunking requires a primary key.'); + + $model = new class ($this->db) extends Model { + protected $table = 'user'; + protected $primaryKey = ''; + protected $returnType = 'object'; + }; + + $model->chunkById(2, static function ($row): void {}); + } + + public function testChunkRowsById(): void + { + $chunkCount = 0; + $numRowsInChunk = []; + + $this->createModel(UserModel::class)->chunkRowsById(2, static function ($rows) use (&$chunkCount, &$numRowsInChunk): void { + $chunkCount++; + $numRowsInChunk[] = count($rows); + }); + + $this->assertSame(2, $chunkCount); + $this->assertSame([2, 2], $numRowsInChunk); + } + + public function testChunkRowsByIdEarlyExit(): void + { + $chunkCount = 0; + + $this->createModel(UserModel::class)->chunkRowsById(2, static function ($rows) use (&$chunkCount): bool { + $chunkCount++; + + return false; + }); + + $this->assertSame(1, $chunkCount); + } + + public function testChunkRowsByIdThrowsOnZeroSize(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('$size must be a positive integer.'); + + $this->createModel(UserModel::class)->chunkRowsById(0, static function ($rows): void {}); + } + + private static function userId(array|object $row): int + { + $data = (array) $row; + + if (! array_key_exists('id', $data)) { + self::fail('Expected the row to contain an id value.'); + } + + return (int) $data['id']; + } +} diff --git a/user_guide_src/source/changelogs/v4.8.0.rst b/user_guide_src/source/changelogs/v4.8.0.rst index e145c14cacbc..c44caab7b3cc 100644 --- a/user_guide_src/source/changelogs/v4.8.0.rst +++ b/user_guide_src/source/changelogs/v4.8.0.rst @@ -70,6 +70,7 @@ Method Signature Changes - **Config:** ``CodeIgniter\Config\Services::request()`` no longer accepts any parameter. - **Database:** The following methods have had their signatures updated to remove deprecated parameters: - ``CodeIgniter\Database\Forge::_createTable()`` no longer accepts the deprecated ``$ifNotExists`` parameter. The method signature is now ``_createTable(string $table, array $attributes)``. +- **Model:** ``CodeIgniter\BaseModel`` now requires the ``chunkRows()``, ``chunkById()``, and ``chunkRowsById()`` methods. Custom classes extending ``BaseModel`` directly must implement them. Property Scope Changes ====================== @@ -272,8 +273,8 @@ Debug Model ===== -- Added new ``chunkRows()`` method to ``CodeIgniter\Model`` for processing large datasets in smaller chunks. -- Added new ``firstOrInsert()`` method to ``CodeIgniter\Model`` that finds the first row matching the given attributes or inserts a new one. See :ref:`model-first-or-insert`. +- Added ``chunkRows()``, ``chunkById()``, and ``chunkRowsById()`` methods to ``CodeIgniter\Model`` for processing large datasets in chunks. +- Added ``firstOrInsert()`` method to ``CodeIgniter\Model`` that finds the first row matching the given attributes or inserts a new one. See :ref:`model-first-or-insert`. - Added ``$throwOnDisallowedFields`` and ``throwOnDisallowedFields()`` to ``CodeIgniter\Model`` to throw a ``DataException`` when write data contains fields that would otherwise be discarded by ``$allowedFields``. See :ref:`model-throw-on-disallowed-fields`. Libraries @@ -297,8 +298,8 @@ Helpers and Functions object rows (including ``Entity``) in :php:func:`dot_array_search()`, :php:func:`dot_array_has()`, :php:func:`dot_array_only()`, :php:func:`dot_array_except()`, and :php:func:`array_group_by()`. - :php:func:`dot_array_only()` and :php:func:`dot_array_except()` still return arrays. If the source itself is an object, - it is read as an array-like value. Object values inside the source are kept unchanged when selected as a whole or left untouched. + :php:func:`dot_array_only()` and :php:func:`dot_array_except()` still return arrays. If the source itself is an object, + it is read as an array-like value. Object values inside the source are kept unchanged when selected as a whole or left untouched. Partial object paths are returned as arrays. HTTP diff --git a/user_guide_src/source/models/model.rst b/user_guide_src/source/models/model.rst index 648d5b9c9b01..99666a43bdc1 100644 --- a/user_guide_src/source/models/model.rst +++ b/user_guide_src/source/models/model.rst @@ -998,6 +998,35 @@ On the other hand, if you want the entire chunk to be passed to the Closure at o .. literalinclude:: model/064.php +chunkById() +----------- + +.. versionadded:: 4.8.0 + +If you need to update or delete rows while processing them, use ``chunkById()``. +This method retrieves each chunk ordered by the model's primary key instead of using an offset, +so deleting or updating rows already processed will not cause later rows to be skipped. + +.. literalinclude:: model/069.php + +chunkRowsById() +--------------- + +.. versionadded:: 4.8.0 + +If you want the entire ID-based chunk to be passed to the Closure at once, use ``chunkRowsById()``. + +.. literalinclude:: model/070.php + +.. warning:: ID-based chunking needs the model to define a primary key, and that primary key must be selected. + If you customize the selected columns, make sure the primary key is included. + When joining tables, make sure the model table primary key remains available under its normal field name. + These methods manage their own primary key ordering and cannot be used with + ``orderBy()``, ``groupBy()``, ``limit()``, ``offset()`` or ``union()``. + You should also avoid changing primary key values while processing chunks. + Since these methods add their own ``WHERE`` condition to each chunk query, + group any ``orWhere()`` conditions that should be evaluated together. + .. _model-events-callbacks: Working with Query Builder diff --git a/user_guide_src/source/models/model/069.php b/user_guide_src/source/models/model/069.php new file mode 100644 index 000000000000..e5f2dd0b5f01 --- /dev/null +++ b/user_guide_src/source/models/model/069.php @@ -0,0 +1,7 @@ +where('country', 'US') + ->chunkById(100, static function ($data) { + // do something. + // $data is a single row of data. + }); diff --git a/user_guide_src/source/models/model/070.php b/user_guide_src/source/models/model/070.php new file mode 100644 index 000000000000..a6d9c71fee72 --- /dev/null +++ b/user_guide_src/source/models/model/070.php @@ -0,0 +1,7 @@ +where('country', 'US') + ->chunkRowsById(100, static function ($rows) { + // do something. + // $rows is an array of rows representing a chunk of 100 items. + }); From 74b3f2be6cc66b29f193c5f76db1995ac07e14d8 Mon Sep 17 00:00:00 2001 From: memleakd <121398829+memleakd@users.noreply.github.com> Date: Wed, 1 Jul 2026 11:48:08 +0300 Subject: [PATCH 2/2] fix: phpstan Signed-off-by: memleakd <121398829+memleakd@users.noreply.github.com> --- tests/system/Models/ChunkByIdModelTest.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/system/Models/ChunkByIdModelTest.php b/tests/system/Models/ChunkByIdModelTest.php index a4db31964232..7c0a97dffaa3 100644 --- a/tests/system/Models/ChunkByIdModelTest.php +++ b/tests/system/Models/ChunkByIdModelTest.php @@ -259,6 +259,9 @@ public function testChunkRowsByIdThrowsOnZeroSize(): void $this->createModel(UserModel::class)->chunkRowsById(0, static function ($rows): void {}); } + /** + * @param array|object $row + */ private static function userId(array|object $row): int { $data = (array) $row;