From ff1b46dfb46b1823087b5a8f018b095f6a5b202e Mon Sep 17 00:00:00 2001
From: Michael Erickson <michae2@cockroachlabs.com>
Date: Fri, 11 Apr 2025 12:09:49 -0700
Subject: [PATCH] sql: move job check back out of txn creating autostats job

In #119383 we moved the find-running-jobs-of-type query into the same
transaction that creates the autostats jobs, in order to avoid a race
between different nodes trying to start an autostats job. In practice,
however, we've seen that this combined read-write transaction suffers
from many retries due to RETRY_SERIALIZABLE errors, when racing
transactions write to system.jobs and then fail to refresh the
find-running-jobs read.

One the one hand, the fact that these transactions fail to commit does
mean fewer rows are visible in system.jobs. On the other hand, each
aborted transaction leaves behind an intent to clean up, so I don't
think the load on system.jobs is any lighter than it was before.

These types of races, where writing a new row depends on the absence of
other rows, is kind of the worst case for optimistic Serializable
isolation. If we could materialize the conflict to a single existing row
that the transaction locks, we'd avoid both the serialization failures
and the race to start a job. But we don't have such a row. (Maybe one
day we could create a "NEXT AUTOSTATS JOB" sentinel row in system.jobs
that the winner locks before claiming it and starting the job? But that
seems like a big change.)

For now, this PR partially reverts #119383 so that find-running-jobs is
back in a separate read-only transaction before creating the job. Using
a read-only transaction to check avoids the serializable errors, at the
cost of sometimes creating too many jobs which will then immediately
fail the repeated check within the job. We think this is going to reduce
the load on system.jobs during times of high contention. Cluster setting
`sql.stats.automatic_job_check_before_creating_job` can be used to get
back the old behavior.

In the future when we allow more than one autostats job, we hope to
improve the situation by changing the mechanism for preventing
concurrent stats collections on the same table. Maybe then we can have
some kind of sentinel row per table to materialize the conflict.

Informs: #108435

Release note: None
---
 pkg/sql/create_stats.go | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/pkg/sql/create_stats.go b/pkg/sql/create_stats.go
index 11d1093d8494..fad491dd383d 100644
--- a/pkg/sql/create_stats.go
+++ b/pkg/sql/create_stats.go
@@ -76,6 +76,13 @@ var nonIndexJSONHistograms = settings.RegisterBoolSetting(
 	false,
 	settings.WithPublic)
 
+var automaticJobCheckBeforeCreatingJob = settings.RegisterBoolSetting(
+	settings.ApplicationLevel,
+	"sql.stats.automatic_job_check_before_creating_job.enabled",
+	"set to true to perform the autostats job check before creating the job, instead of in the same "+
+		"transaction as creating the job",
+	true)
+
 const nonIndexColHistogramBuckets = 2
 
 // StubTableStats generates "stub" statistics for a table which are missing
@@ -148,18 +155,27 @@ func (n *createStatsNode) runJob(ctx context.Context) error {
 	}
 	details := record.Details.(jobspb.CreateStatsDetails)
 
-	if n.Name != jobspb.AutoStatsName && n.Name != jobspb.AutoPartialStatsName {
+	jobCheckBefore := automaticJobCheckBeforeCreatingJob.Get(n.p.ExecCfg().SV())
+	if (n.Name == jobspb.AutoStatsName || n.Name == jobspb.AutoPartialStatsName) && jobCheckBefore {
+		// Don't start the job if there is already a CREATE STATISTICS job running.
+		// (To handle race conditions we check this again after the job starts,
+		// but this check is used to prevent creating a large number of jobs that
+		// immediately fail).
+		if err := checkRunningJobs(
+			ctx, nil /* job */, n.p, n.Name == jobspb.AutoPartialStatsName, n.p.ExecCfg().JobRegistry,
+			details.Table.ID,
+		); err != nil {
+			return err
+		}
+	} else {
 		telemetry.Inc(sqltelemetry.CreateStatisticsUseCounter)
 	}
 
 	var job *jobs.StartableJob
 	jobID := n.p.ExecCfg().JobRegistry.MakeJobID()
 	if err := n.p.ExecCfg().InternalDB.Txn(ctx, func(ctx context.Context, txn isql.Txn) (err error) {
-		if n.Name == jobspb.AutoStatsName || n.Name == jobspb.AutoPartialStatsName {
+		if (n.Name == jobspb.AutoStatsName || n.Name == jobspb.AutoPartialStatsName) && !jobCheckBefore {
 			// Don't start the job if there is already a CREATE STATISTICS job running.
-			// (To handle race conditions we check this again after the job starts,
-			// but this check is used to prevent creating a large number of jobs that
-			// immediately fail).
 			if err := checkRunningJobsInTxn(
 				ctx, n.p.EvalContext().Settings, jobspb.InvalidJobID, txn,
 			); err != nil {