From 18da7442466fc230906fef24bb68854829fb1340 Mon Sep 17 00:00:00 2001 From: Brandur Date: Fri, 23 May 2025 00:09:01 -0700 Subject: [PATCH] Add `ORDER BY` to `JobSetStateIfRunningMany` to try and fix intermittency Here, try to fix a test intermittency problem that I ran into while trying to fix a different intermittency problem. Here's the error: --- FAIL: TestDriverRiverDatabaseSQLPgx (0.01s) driver_test.go:72: Reusing idle postgres schema "river_2025_05_22t23_56_07_schema_07" [user facing: "river_2025_05_22t23_56_07_schema_07"] after cleaning in 12.716292ms [25 generated] [506 reused] driver_test.go:72: Driver does not support listener; skipping listener tests --- FAIL: TestDriverRiverDatabaseSQLPgx/JobSetStateIfRunningMany_MultipleJobsAtOnce (0.00s) riverdrivertest.go:2663: TestTx using postgres schema: river_2025_05_22t23_56_07_schema_14 riverdrivertest.go:2679: Error Trace: /Users/brandur/Documents/projects/river/internal/riverinternaltest/riverdrivertest/riverdrivertest.go:2679 Error: Not equal: expected: "completed" actual : "retryable" Diff: --- Expected +++ Actual @@ -1,2 +1,2 @@ -(rivertype.JobState) (len=9) "completed" +(rivertype.JobState) (len=9) "retryable" Test: TestDriverRiverDatabaseSQLPgx/JobSetStateIfRunningMany_MultipleJobsAtOnce riverdbtest.go:289: Checked in postgres schema "river_2025_05_22t23_56_07_schema_07"; 8 idle schema(s) [30 generated] [1000 reused] FAIL FAIL github.com/riverqueue/river 22.974s FAIL It stems from these lines in the driver tests: jobsAfter, err := exec.JobSetStateIfRunningMany(ctx, setStateManyParams( riverdriver.JobSetStateCompleted(job1.ID, now, []byte(`{"a":"b"}`)), riverdriver.JobSetStateErrorRetryable(job2.ID, future, makeErrPayload(t, now), nil), riverdriver.JobSetStateCancelled(job3.ID, now, makeErrPayload(t, now), nil), )) require.NoError(t, err) completedJob := jobsAfter[0] require.Equal(t, rivertype.JobStateCompleted, completedJob.State) I think the problem is that when selecting rows to return from `JobSetStateIfRunningMany`. Usually these return in input order, but SQL doesn't guarantee it, so occasionally jobs come back in the wrong order and we get the `retryable` job before the `completed` job. Here, we add an `ORDER BY` on the final `SELECT`. I only add it on one side of the `UNION` which if I'm reading this correctly [1], should be enough. [1] https://www.postgresql.org/message-id/16814.1280268424%40sss.pgh.pa.us --- riverdriver/riverdatabasesql/internal/dbsqlc/river_job.sql.go | 4 +++- riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql | 4 +++- riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql.go | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/riverdriver/riverdatabasesql/internal/dbsqlc/river_job.sql.go b/riverdriver/riverdatabasesql/internal/dbsqlc/river_job.sql.go index de1f981c..38e4f113 100644 --- a/riverdriver/riverdatabasesql/internal/dbsqlc/river_job.sql.go +++ b/riverdriver/riverdatabasesql/internal/dbsqlc/river_job.sql.go @@ -1266,7 +1266,9 @@ WHERE NOT EXISTS ( WHERE updated.id = river_job.id ) UNION ALL -SELECT id, args, attempt, attempted_at, attempted_by, created_at, errors, finalized_at, kind, max_attempts, metadata, priority, queue, state, scheduled_at, tags, unique_key, unique_states FROM updated +SELECT id, args, attempt, attempted_at, attempted_by, created_at, errors, finalized_at, kind, max_attempts, metadata, priority, queue, state, scheduled_at, tags, unique_key, unique_states +FROM updated +ORDER BY id ` type JobSetStateIfRunningManyParams struct { diff --git a/riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql b/riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql index 4edc9efd..46430c39 100644 --- a/riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql +++ b/riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql @@ -576,7 +576,9 @@ WHERE NOT EXISTS ( WHERE updated.id = river_job.id ) UNION ALL -SELECT * FROM updated; +SELECT * +FROM updated +ORDER BY id; -- A generalized update for any property on a job. This brings in a large number -- of parameters and therefore may be more suitable for testing than production. diff --git a/riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql.go b/riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql.go index 4c477fac..d90ec9ad 100644 --- a/riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql.go +++ b/riverdriver/riverpgxv5/internal/dbsqlc/river_job.sql.go @@ -1242,7 +1242,9 @@ WHERE NOT EXISTS ( WHERE updated.id = river_job.id ) UNION ALL -SELECT id, args, attempt, attempted_at, attempted_by, created_at, errors, finalized_at, kind, max_attempts, metadata, priority, queue, state, scheduled_at, tags, unique_key, unique_states FROM updated +SELECT id, args, attempt, attempted_at, attempted_by, created_at, errors, finalized_at, kind, max_attempts, metadata, priority, queue, state, scheduled_at, tags, unique_key, unique_states +FROM updated +ORDER BY id ` type JobSetStateIfRunningManyParams struct {