Skip to content

Commit 64042b0

Browse files
committed
feat(model): add stable chunking by primary key
- Add chunkById() and chunkRowsById() for primary-key ordered chunking - Reject incompatible builder state like orderBy(), groupBy(), limit(), offset(), and union() - Document ID-based chunking behavior and constraints - Add live model coverage for stable deletion, soft deletes, early exit, and invalid query shapes Signed-off-by: memleakd <121398829+memleakd@users.noreply.github.com>
1 parent 7b7742f commit 64042b0

8 files changed

Lines changed: 470 additions & 7 deletions

File tree

system/BaseModel.php

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,19 @@ abstract public function countAllResults(bool $reset = true, bool $test = false)
593593
*/
594594
abstract public function chunk(int $size, Closure $userFunc);
595595

596+
/**
597+
* Loops over records in batches ordered by the primary key.
598+
* This method works only with DB calls.
599+
*
600+
* @param Closure(array<string, string>|object): mixed $userFunc
601+
*
602+
* @return void
603+
*
604+
* @throws DataException
605+
* @throws InvalidArgumentException if $size is not a positive integer or the current query cannot be chunked by ID
606+
*/
607+
abstract public function chunkById(int $size, Closure $userFunc);
608+
596609
/**
597610
* Loops over records in batches, allowing you to operate on each chunk at a time.
598611
* This method works only with DB calls.
@@ -609,6 +622,22 @@ abstract public function chunk(int $size, Closure $userFunc);
609622
*/
610623
abstract public function chunkRows(int $size, Closure $userFunc);
611624

625+
/**
626+
* Loops over records in batches ordered by the primary key, allowing you to operate on each chunk at a time.
627+
* This method works only with DB calls.
628+
*
629+
* This method calls the `$userFunc` with the chunk, instead of a single record as in `chunkById()`.
630+
* This allows you to operate on multiple records at once, which can be more efficient for certain operations.
631+
*
632+
* @param Closure(list<array<string, string>>|list<object>): mixed $userFunc
633+
*
634+
* @return void
635+
*
636+
* @throws DataException
637+
* @throws InvalidArgumentException if $size is not a positive integer or the current query cannot be chunked by ID
638+
*/
639+
abstract public function chunkRowsById(int $size, Closure $userFunc);
640+
612641
/**
613642
* Fetches the row of database.
614643
*

system/Database/BaseBuilder.php

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2034,6 +2034,16 @@ public function offset(int $offset)
20342034
return $this;
20352035
}
20362036

2037+
/**
2038+
* Checks if the current query has a LIMIT, OFFSET or UNION clause.
2039+
*
2040+
* @internal This method is for internal Model use only.
2041+
*/
2042+
public function hasLimitOffsetOrUnion(): bool
2043+
{
2044+
return $this->QBLimit !== false || $this->QBOffset !== false || $this->QBUnion !== [];
2045+
}
2046+
20372047
/**
20382048
* Generates a platform-specific LIMIT clause.
20392049
*/

system/Model.php

Lines changed: 111 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ class Model extends BaseModel
165165
'getCompiledInsert',
166166
'getCompiledSelect',
167167
'getCompiledUpdate',
168+
'hasLimitOffsetOrUnion',
168169
];
169170

170171
public function __construct(?ConnectionInterface $db = null, ?ValidationInterface $validation = null)
@@ -599,9 +600,7 @@ private function prepareSoftDeleteQuery(bool $reset): void
599600
*/
600601
private function iterateChunks(int $size): Generator
601602
{
602-
if ($size <= 0) {
603-
throw new InvalidArgumentException('$size must be a positive integer.');
604-
}
603+
$this->assertValidChunkSize($size);
605604

606605
$total = $this->builder()->countAllResults(false);
607606
$offset = 0;
@@ -626,6 +625,89 @@ private function iterateChunks(int $size): Generator
626625
}
627626
}
628627

628+
/**
629+
* Iterates over the result set in chunks of the specified size ordered by the primary key.
630+
*
631+
* @param int $size The number of records to retrieve in each chunk.
632+
*
633+
* @return Generator<list<array<string, string>>|list<object>>
634+
*/
635+
private function iterateChunksById(int $size): Generator
636+
{
637+
$this->assertValidChunkSize($size);
638+
639+
if ($this->primaryKey === '') {
640+
throw new InvalidArgumentException('ID-based chunking requires a primary key.');
641+
}
642+
643+
$builder = clone $this->builder();
644+
$qualifiedPrimaryKey = $this->table . '.' . $this->primaryKey;
645+
$lastPrimaryKey = null;
646+
$hasLastPrimaryKey = false;
647+
648+
if ($builder->QBOrderBy !== []) {
649+
throw new InvalidArgumentException('ID-based chunking cannot be used with orderBy().');
650+
}
651+
652+
if ($builder->QBGroupBy !== []) {
653+
throw new InvalidArgumentException('ID-based chunking cannot be used with groupBy().');
654+
}
655+
656+
if ($builder->hasLimitOffsetOrUnion()) {
657+
throw new InvalidArgumentException('ID-based chunking cannot be used with limit(), offset() or union().');
658+
}
659+
660+
while (true) {
661+
$chunkBuilder = clone $builder;
662+
663+
if ($this->tempUseSoftDeletes) {
664+
$chunkBuilder->where($this->table . '.' . $this->deletedField, null);
665+
}
666+
667+
if ($hasLastPrimaryKey) {
668+
$chunkBuilder->where($qualifiedPrimaryKey . ' >', $lastPrimaryKey);
669+
}
670+
671+
$rows = $chunkBuilder
672+
->orderBy($qualifiedPrimaryKey, 'ASC')
673+
->get($size);
674+
675+
if (! $rows) {
676+
throw DataException::forEmptyDataset('chunkById');
677+
}
678+
679+
$rows = $rows->getResult($this->tempReturnType);
680+
681+
if ($rows === []) {
682+
return;
683+
}
684+
685+
$lastPrimaryKey = $this->getIdValue($rows[array_key_last($rows)]);
686+
687+
if ($lastPrimaryKey === null) {
688+
throw new InvalidArgumentException('The primary key must be selected for ID-based chunking.');
689+
}
690+
691+
$hasLastPrimaryKey = true;
692+
693+
yield $rows;
694+
695+
if (count($rows) < $size) {
696+
return;
697+
}
698+
}
699+
}
700+
701+
/**
702+
* Asserts the chunk size is valid.
703+
*/
704+
private function assertValidChunkSize(int $size): void
705+
{
706+
if ($size <= 0) {
707+
throw new InvalidArgumentException('$size must be a positive integer.');
708+
}
709+
}
710+
629711
/**
630712
* {@inheritDoc}
631713
*/
@@ -640,6 +722,20 @@ public function chunk(int $size, Closure $userFunc)
640722
}
641723
}
642724

725+
/**
726+
* {@inheritDoc}
727+
*/
728+
public function chunkById(int $size, Closure $userFunc)
729+
{
730+
foreach ($this->iterateChunksById($size) as $rows) {
731+
foreach ($rows as $row) {
732+
if ($userFunc($row) === false) {
733+
return;
734+
}
735+
}
736+
}
737+
}
738+
643739
/**
644740
* {@inheritDoc}
645741
*/
@@ -652,6 +748,18 @@ public function chunkRows(int $size, Closure $userFunc): void
652748
}
653749
}
654750

751+
/**
752+
* {@inheritDoc}
753+
*/
754+
public function chunkRowsById(int $size, Closure $userFunc): void
755+
{
756+
foreach ($this->iterateChunksById($size) as $rows) {
757+
if ($userFunc($rows) === false) {
758+
return;
759+
}
760+
}
761+
}
762+
655763
/**
656764
* Provides a shared instance of the Query Builder.
657765
*

0 commit comments

Comments
 (0)