neondatabase
diff --git a/‎test_runner/performance/test_compute_startup.py
+240 b/‎test_runner/performance/test_compute_startup.py
+240
diff --git a/‎test_runner/performance/test_lazy_startup.py
-110 b/‎test_runner/performance/test_lazy_startup.py
-110
@@ -0,0 +1,240 @@
+from __future__ import annotations
+
+import pytest
+import requests
+from fixtures.benchmark_fixture import MetricReport, NeonBenchmarker
+from fixtures.neon_fixtures import NeonEnvBuilder, PgBin
+
+
+# Just start and measure duration.
+#
+# This test runs pretty quickly and can be informative when used in combination
+# with emulated network delay. Some useful delay commands:
+#
+# 1. Add 2msec delay to all localhost traffic
+# `sudo tc qdisc add dev lo root handle 1:0 netem delay 2msec`
+#
+# 2. Test that it works (you should see 4ms ping)
+# `ping localhost`
+#
+# 3. Revert back to normal
+# `sudo tc qdisc del dev lo root netem`
+#
+# NOTE this test might not represent the real startup time because the basebackup
+#      for a large database might be larger if there's a lof of transaction metadata,
+#      or safekeepers might need more syncing, or there might be more operations to
+#      apply during config step, like more users, databases, or extensions. By default
+#      we load extensions 'neon,pg_stat_statements,timescaledb,pg_cron', but in this
+#      test we only load neon.
+def test_compute_startup_simple(
+    neon_env_builder: NeonEnvBuilder,
+    zenbenchmark: NeonBenchmarker,
+):
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()
+
+    env.create_branch("test_startup")
+
+    endpoint = None
+
+    # We do two iterations so we can see if the second startup is faster. It should
+    # be because the compute node should already be configured with roles, databases,
+    # extensions, etc from the first run.
+    for i in range(2):
+        # Start
+        with zenbenchmark.record_duration(f"{i}_start_and_select"):
+            if endpoint:
+                endpoint.start()
+            else:
+                endpoint = env.endpoints.create(
+                    "test_startup",
+                    # Shared buffers need to be allocated during startup, so they
+                    # impact startup time. This is the default value we use for
+                    # 1CPU pods (maybe different for VMs).
+                    #
+                    # TODO extensions also contribute to shared memory allocation,
+                    #      and this test doesn't include all default extensions we
+                    #      load.
+                    config_lines=["shared_buffers=262144"],
+                )
+                # Do not skip pg_catalog updates at first start, i.e.
+                # imitate 'the first start after project creation'.
+                endpoint.respec(skip_pg_catalog_updates=False)
+                endpoint.start()
+            endpoint.safe_psql("select 1;")
+
+        # Get metrics
+        metrics = requests.get(
+            f"http://localhost:{endpoint.external_http_port}/metrics.json"
+        ).json()
+        durations = {
+            "wait_for_spec_ms": f"{i}_wait_for_spec",
+            "sync_safekeepers_ms": f"{i}_sync_safekeepers",
+            "sync_sk_check_ms": f"{i}_sync_sk_check",
+            "basebackup_ms": f"{i}_basebackup",
+            "start_postgres_ms": f"{i}_start_postgres",
+            "config_ms": f"{i}_config",
+            "total_startup_ms": f"{i}_total_startup",
+        }
+        for key, name in durations.items():
+            value = metrics[key]
+            zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
+
+        # Check basebackup size makes sense
+        basebackup_bytes = metrics["basebackup_bytes"]
+        if i > 0:
+            assert basebackup_bytes < 100 * 1024
+
+        # Stop so we can restart
+        endpoint.stop()
+
+        # Imitate optimizations that console would do for the second start
+        endpoint.respec(skip_pg_catalog_updates=True)
+
+
+# Start and measure duration with huge SLRU segments.
+# This test is similar to test_compute_startup_simple, but it creates huge number of transactions
+# and records containing this XIDs. Autovacuum is disable for the table to prevent CLOG truncation.
+# TODO: this is very suspicious test, I doubt that it does what it's supposed to do,
+# e.g. these two starts do not make much sense. Looks like it's just copy-paste.
+# To be fixed within https://github.com/neondatabase/cloud/issues/8673
+@pytest.mark.timeout(1800)
+@pytest.mark.parametrize("slru", ["lazy", "eager"])
+def test_compute_ondemand_slru_startup(
+    slru: str, neon_env_builder: NeonEnvBuilder, zenbenchmark: NeonBenchmarker
+):
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()
+
+    lazy_slru_download = "true" if slru == "lazy" else "false"
+    tenant, _ = env.create_tenant(
+        conf={
+            "lazy_slru_download": lazy_slru_download,
+        }
+    )
+
+    endpoint = env.endpoints.create_start("main", tenant_id=tenant)
+    with endpoint.cursor() as cur:
+        cur.execute("CREATE TABLE t (pk integer PRIMARY KEY, x integer)")
+        cur.execute("ALTER TABLE t SET (autovacuum_enabled = false)")
+        cur.execute("INSERT INTO t VALUES (1, 0)")
+        cur.execute(
+            """
+            CREATE PROCEDURE updating() as
+            $$
+                DECLARE
+                i integer;
+                BEGIN
+                FOR i IN 1..1000000 LOOP
+                    UPDATE t SET x = x + 1 WHERE pk=1;
+                    COMMIT;
+                END LOOP;
+                END
+            $$ LANGUAGE plpgsql
+            """
+        )
+        cur.execute("SET statement_timeout=0")
+        cur.execute("call updating()")
+
+    endpoint.stop()
+
+    # We do two iterations so we can see if the second startup is faster. It should
+    # be because the compute node should already be configured with roles, databases,
+    # extensions, etc from the first run.
+    for i in range(2):
+        # Start
+        with zenbenchmark.record_duration(f"{slru}_{i}_start"):
+            endpoint.start()
+
+        with zenbenchmark.record_duration(f"{slru}_{i}_select"):
+            sum = endpoint.safe_psql("select sum(x) from t")[0][0]
+            assert sum == 1000000
+
+        # Get metrics
+        metrics = requests.get(
+            f"http://localhost:{endpoint.external_http_port}/metrics.json"
+        ).json()
+        durations = {
+            "wait_for_spec_ms": f"{slru}_{i}_wait_for_spec",
+            "sync_safekeepers_ms": f"{slru}_{i}_sync_safekeepers",
+            "sync_sk_check_ms": f"{slru}_{i}_sync_sk_check",
+            "basebackup_ms": f"{slru}_{i}_basebackup",
+            "start_postgres_ms": f"{slru}_{i}_start_postgres",
+            "config_ms": f"{slru}_{i}_config",
+            "total_startup_ms": f"{slru}_{i}_total_startup",
+        }
+        for key, name in durations.items():
+            value = metrics[key]
+            zenbenchmark.record(name, value, "ms", report=MetricReport.LOWER_IS_BETTER)
+
+        basebackup_bytes = metrics["basebackup_bytes"]
+        zenbenchmark.record(
+            f"{slru}_{i}_basebackup_bytes",
+            basebackup_bytes,
+            "bytes",
+            report=MetricReport.LOWER_IS_BETTER,
+        )
+
+        # Stop so we can restart
+        endpoint.stop()
+
+        # Imitate optimizations that console would do for the second start
+        endpoint.respec(skip_pg_catalog_updates=True)
+
+
+@pytest.mark.timeout(240)
+def test_compute_startup_latency(
+    neon_env_builder: NeonEnvBuilder,
+    pg_bin: PgBin,
+    zenbenchmark: NeonBenchmarker,
+):
+    """
+    Do NUM_STARTS 'optimized' starts, i.e. with pg_catalog updates skipped,
+    and measure the duration of each step. Report p50, p90, p99 latencies.
+    """
+    neon_env_builder.num_safekeepers = 3
+    env = neon_env_builder.init_start()
+
+    endpoint = env.endpoints.create_start("main")
+    pg_bin.run_capture(["pgbench", "-i", "-I", "dtGvp", "-s4", endpoint.connstr()])
+    endpoint.stop()
+
+    NUM_STARTS = 100
+
+    durations: dict[str, list[int]] = {
+        "sync_sk_check_ms": [],
+        "sync_safekeepers_ms": [],
+        "basebackup_ms": [],
+        "start_postgres_ms": [],
+        "total_startup_ms": [],
+    }
+
+    for _i in range(NUM_STARTS):
+        endpoint.start()
+        client = endpoint.http_client()
+        metrics = client.metrics_json()
+        for key in durations.keys():
+            value = metrics[key]
+            durations[key].append(value)
+        endpoint.stop()
+
+    for key in durations.keys():
+        durations[key] = sorted(durations[key])
+        zenbenchmark.record(
+            f"{key}_p50",
+            durations[key][len(durations[key]) // 2],
+            "ms",
+            report=MetricReport.LOWER_IS_BETTER,
+        )
+        zenbenchmark.record(
+            f"{key}_p90",
+            durations[key][len(durations[key]) * 9 // 10],
+            "ms",
+            report=MetricReport.LOWER_IS_BETTER,
+        )
+        zenbenchmark.record(
+            f"{key}_p99",
+            durations[key][len(durations[key]) * 99 // 100],
+            "ms",
+            report=MetricReport.LOWER_IS_BETTER,
+        )