Layr-Labs
diff --git a/‎.github/workflows/test-golang.yml
+1 b/‎.github/workflows/test-golang.yml
+1
diff --git a/‎kurtosis-devnet/tests/eigenda/failover_test.go
+45-18 b/‎kurtosis-devnet/tests/eigenda/failover_test.go
+45-18
diff --git a/‎op-alt-da/damgr.go
+14-2 b/‎op-alt-da/damgr.go
+14-2
diff --git a/‎op-alt-da/damgr_test.go
+14-14 b/‎op-alt-da/damgr_test.go
+14-14
diff --git a/‎op-alt-da/damock.go
+6 b/‎op-alt-da/damock.go
+6
diff --git a/‎op-alt-da/dastate.go
+16-13 b/‎op-alt-da/dastate.go
+16-13
@@ -50,6 +50,7 @@ jobs:
         packages:
           - op-batcher
           - op-node
+          - op-alt-da
           - op-e2e/system/altda
           - op-e2e/actions/altda
     steps:
 
@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/json"
 	"fmt"
+	"log/slog"
 	"math/big"
 	"net/http"
 	"reflect"
@@ -14,9 +15,12 @@ import (
 
 	"github.com/Layr-Labs/eigenda-proxy/clients/memconfig_client"
 	"github.com/ethereum-optimism/optimism/op-e2e/e2eutils/geth"
+	"github.com/ethereum-optimism/optimism/op-service/dial"
+	"github.com/ethereum-optimism/optimism/op-service/sources"
+	"github.com/ethereum-optimism/optimism/op-service/testlog"
 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/ethclient"
-	"github.com/ethereum/go-ethereum/rpc"
+	"github.com/ethereum/go-ethereum/log"
 	"github.com/kurtosis-tech/kurtosis/api/golang/core/lib/enclaves"
 	"github.com/kurtosis-tech/kurtosis/api/golang/engine/lib/kurtosis_context"
 	"github.com/stretchr/testify/require"
@@ -35,20 +39,18 @@ const enclaveName = "eigenda-memstore-devnet"
 //
 // Note: because this test relies on modifying the proxy's memstore config, it should be run in isolation.
 // That is, if we ever implement more kurtosis tests, they would currently need to be run sequentially.
-//
-// TODO: We will also need to test the failover behavior of the node, which currently doesn't finalize after failover (fixed in https://github.com/Layr-Labs/optimism/pull/23)
 func TestFailoverToEthDACalldata(t *testing.T) {
 	deadline, ok := t.Deadline()
 	if !ok {
-		deadline = time.Now().Add(1 * time.Minute)
+		deadline = time.Now().Add(10 * time.Minute)
 	}
 	ctxWithDeadline, cancel := context.WithDeadline(context.Background(), deadline)
 	defer cancel()
 
 	harness := newHarness(t)
 	t.Cleanup(func() {
 		// switch proxy back to normal mode, in case test gets cancelled
-		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 		defer cancel()
 		err := harness.clients.proxyMemconfigClient.Failback(ctx)
 		if err != nil {
@@ -75,7 +77,7 @@ func TestFailoverToEthDACalldata(t *testing.T) {
 	harness.requireBatcherTxsToBeFromLayer(t, fromBlock, fromBlock+l1BlocksQueriedForBatcherTxs, DALayerEigenDA)
 
 	// 2. Failover and check that the commitments are now EthDACalldata
-	t.Logf("Failover over... changing proxy's config to return 503 errors")
+	t.Logf("Failing over... changing proxy's config to return 503 errors")
 	err := harness.clients.proxyMemconfigClient.Failover(ctxWithDeadline)
 	require.NoError(t, err)
 
@@ -87,6 +89,16 @@ func TestFailoverToEthDACalldata(t *testing.T) {
 
 	harness.requireBatcherTxsToBeFromLayer(t, afterFailoverFromBlockNum, afterFailoverToBlockNum, DALayerEthCalldata)
 
+	// We also check that the op-node is still finalizing blocks after the failover
+	syncStatus, err := harness.clients.opNodeClient.SyncStatus(ctxWithDeadline)
+	require.NoError(t, err)
+	afterFailoverFinalizedL2 := syncStatus.FinalizedL2
+	t.Logf("Current finalized L2 block: %d. Waiting for next block to finalize to make sure finalization is still happening.", afterFailoverFinalizedL2.Number)
+	// On average would expect this to take half an epoch, aka 16 L1 blocks, which at 6 sec/block means 1.5 minutes.
+	// This generally takes longer (3-6 minutes), but I'm not quite sure why.
+	_, err = geth.WaitForBlockToBeFinalized(new(big.Int).SetUint64(afterFailoverFinalizedL2.Number+1), harness.clients.opGethClient, 6*time.Minute)
+	require.NoError(t, err, "op-node should still be finalizing blocks after failover")
+
 	// 3. Failback and check that the commitments are EigenDA again
 	t.Logf("Failing back... changing proxy's config to start processing PUT requests normally again")
 	err = harness.clients.proxyMemconfigClient.Failback(ctxWithDeadline)
@@ -105,13 +117,16 @@ func TestFailoverToEthDACalldata(t *testing.T) {
 // Test Harness, which contains all the state needed to run the tests.
 // harness also defines some higher-level "require" methods that are used in the tests.
 type harness struct {
+	logger              log.Logger
 	endpoints           *EnclaveServicePublicEndpoints
 	clients             *EnclaveServiceClients
 	batchInboxAddr      common.Address
 	testStartL1BlockNum uint64
 }
 
 func newHarness(t *testing.T) *harness {
+	logger := testlog.Logger(t, slog.LevelInfo)
+
 	// We leave 20 seconds to build the entire testHarness.
 	ctxWithTimeout, cancel := context.WithTimeout(context.Background(), 20*time.Second)
 	defer cancel()
@@ -128,24 +143,22 @@ func newHarness(t *testing.T) *harness {
 	require.NoError(t, err)
 	t.Logf("Endpoints: %+v", endpoints)
 
-	clients, err := getClientsFromEndpoints(endpoints)
+	clients, err := getClientsFromEndpoints(ctxWithTimeout, logger, endpoints)
 	require.NoError(t, err)
 
-	// Get the batch inbox address
-	var rollupConfigMap struct {
-		BatchInboxAddress string `json:"batch_inbox_address"`
-	}
-	err = clients.opNodeClient.CallContext(ctxWithTimeout, &rollupConfigMap, "optimism_rollupConfig")
+	// Get the batch inbox address from the rollup config
+	rollupConfig, err := clients.opNodeClient.RollupConfig(ctxWithTimeout)
 	require.NoError(t, err)
 
 	// Get the current L1 block number
 	testStartL1BlockNum, err := clients.gethL1Client.BlockNumber(ctxWithTimeout)
 	require.NoError(t, err)
 
 	return &harness{
+		logger:              logger,
 		endpoints:           endpoints,
 		clients:             clients,
-		batchInboxAddr:      common.HexToAddress(rollupConfigMap.BatchInboxAddress),
+		batchInboxAddr:      rollupConfig.BatchInboxAddress,
 		testStartL1BlockNum: testStartL1BlockNum,
 	}
 }
@@ -315,6 +328,7 @@ func fetchBatcherTxs(gethL1Endpoint string, batchInbox string, fromBlockNum, toB
 // The public endpoints are the ones that are exposed to the host machine.
 type EnclaveServicePublicEndpoints struct {
 	OpNodeEndpoint       string `kurtosis:"op-cl-1-op-node-op-geth-op-kurtosis,http"`
+	OpGethEndpoint       string `kurtosis:"op-el-1-op-geth-op-node-op-kurtosis,rpc"`
 	GethL1Endpoint       string `kurtosis:"el-1-geth-teku,rpc"`
 	EigendaProxyEndpoint string `kurtosis:"da-server-op-kurtosis,http"`
 	// Adding new endpoints is as simple as adding a new field with a kurtosis tag
@@ -374,17 +388,29 @@ func getPublicEndpointsFromKurtosis(enclaveCtx *enclaves.EnclaveContext) (*Encla
 }
 
 type EnclaveServiceClients struct {
-	opNodeClient         *rpc.Client
-	gethL1Client         *ethclient.Client
+	// opNode and opGeth are the L2 clients for the rollup.
+	opNodeClient *sources.RollupClient
+	// opGeth is the client for the L2 execution layer client.
+	opGethClient *ethclient.Client
+	// gethL1 is the client for the L1 chain execution layer client.
+	gethL1Client *ethclient.Client
+	// proxyMemconfigClient is the client for the eigenda-proxy's memstore config API.
+	// It allows us to toggle the proxy's failover behavior.
 	proxyMemconfigClient *ProxyMemconfigClient
 }
 
-func getClientsFromEndpoints(endpoints *EnclaveServicePublicEndpoints) (*EnclaveServiceClients, error) {
-	opNodeClient, err := rpc.Dial(endpoints.OpNodeEndpoint)
+func getClientsFromEndpoints(ctx context.Context, logger log.Logger, endpoints *EnclaveServicePublicEndpoints) (*EnclaveServiceClients, error) {
+	opNodeClient, err := dial.DialRollupClientWithTimeout(ctx, 10*time.Second, logger, endpoints.OpNodeEndpoint)
+	if err != nil {
+		return nil, fmt.Errorf("dial.DialRollupClientWithTimeout: %w", err)
+	}
+
+	opGethClient, err := dial.DialEthClientWithTimeout(ctx, 10*time.Second, logger, endpoints.OpGethEndpoint)
 	if err != nil {
-		return nil, fmt.Errorf("rpc.Dial: %w", err)
+		return nil, fmt.Errorf("dial.DialEthClientWithTimeout: %w", err)
 	}
 
+	// TODO: prob also change to use dial.DialEthClient?
 	gethL1Client, err := ethclient.Dial(endpoints.GethL1Endpoint)
 	if err != nil {
 		return nil, fmt.Errorf("ethclient.Dial: %w", err)
@@ -396,6 +422,7 @@ func getClientsFromEndpoints(endpoints *EnclaveServicePublicEndpoints) (*Enclave
 
 	return &EnclaveServiceClients{
 		opNodeClient:         opNodeClient,
+		opGethClient:         opGethClient,
 		gethL1Client:         gethL1Client,
 		proxyMemconfigClient: proxyMemconfigClient,
 	}, nil
 
@@ -117,8 +117,18 @@ func (d *DA) OnFinalizedHeadSignal(f HeadSignalFn) {
 func (d *DA) updateFinalizedHead(l1Finalized eth.L1BlockRef) {
 	d.l1FinalizedHead = l1Finalized
 	// Prune the state to the finalized head
-	d.state.Prune(l1Finalized.ID())
-	d.finalizedHead = d.state.lastPrunedCommitment
+	lastPrunedCommIncBlock := d.state.Prune(l1Finalized.ID())
+	d.log.Debug("updateFinalizedHead",
+		"currFinalizedHead", d.finalizedHead.Number,
+		"lastPrunedCommIncBlock", lastPrunedCommIncBlock.Number,
+		"l1Finalized", l1Finalized.Number)
+	// If a commitment was pruned, set the finalized head to that commitment's inclusion block
+	// When no commitments are left to be pruned (one example is if we have failed over to ethda)
+	// then updateFinalizedFromL1 becomes the main driver of the finalized head.
+	// Note that updateFinalizedFromL1 is only called when d.state.NoCommitments() is true.
+	if lastPrunedCommIncBlock != (eth.L1BlockRef{}) {
+		d.finalizedHead = lastPrunedCommIncBlock
+	}
 }
 
 // updateFinalizedFromL1 updates the finalized head based on the challenge window.
@@ -133,6 +143,7 @@ func (d *DA) updateFinalizedFromL1(ctx context.Context, l1 L1Fetcher) error {
 	if err != nil {
 		return err
 	}
+	d.log.Debug("updateFinalizedFromL1", "currFinalizedHead", d.finalizedHead.Number, "newFinalizedHead", ref.Number, "l1FinalizedHead", d.l1FinalizedHead.Number, "challengeWindow", d.cfg.ChallengeWindow)
 	d.finalizedHead = ref
 	return nil
 }
@@ -413,6 +424,7 @@ func (d *DA) fetchChallengeLogs(ctx context.Context, l1 L1Fetcher, block eth.Blo
 		}
 		for _, log := range rec.Logs {
 			if log.Address == d.cfg.DAChallengeContractAddress && len(log.Topics) > 0 && log.Topics[0] == ChallengeStatusEventABIHash {
+				d.log.Info("found challenge event", "block", block.Number, "log", log.Index)
 				logs = append(logs, log)
 			}
 		}
 
@@ -53,12 +53,12 @@ func TestFinalization(t *testing.T) {
 	require.NoError(t, state.ExpireCommitments(bID(8)))
 	require.Empty(t, state.commitments)
 
-	state.Prune(bID(bn1))
-	require.Equal(t, eth.L1BlockRef{}, state.lastPrunedCommitment)
-	state.Prune(bID(7))
-	require.Equal(t, eth.L1BlockRef{}, state.lastPrunedCommitment)
-	state.Prune(bID(8))
-	require.Equal(t, l1Ref(bn1), state.lastPrunedCommitment)
+	lastPrunedCommitment := state.Prune(bID(bn1))
+	require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
+	lastPrunedCommitment = state.Prune(bID(7))
+	require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
+	lastPrunedCommitment = state.Prune(bID(8))
+	require.Equal(t, l1Ref(bn1), lastPrunedCommitment)
 
 	// Track a commitment, challenge it, & then resolve it
 	c2 := RandomCommitment(rng)
@@ -83,12 +83,12 @@ func TestFinalization(t *testing.T) {
 	require.Empty(t, state.challenges)
 
 	// Now finalize everything
-	state.Prune(bID(20))
-	require.Equal(t, l1Ref(bn1), state.lastPrunedCommitment)
-	state.Prune(bID(28))
-	require.Equal(t, l1Ref(bn1), state.lastPrunedCommitment)
-	state.Prune(bID(32))
-	require.Equal(t, l1Ref(bn2), state.lastPrunedCommitment)
+	lastPrunedCommitment = state.Prune(bID(20))
+	require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
+	lastPrunedCommitment = state.Prune(bID(28))
+	require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
+	lastPrunedCommitment = state.Prune(bID(32))
+	require.Equal(t, l1Ref(bn2), lastPrunedCommitment)
 }
 
 // TestExpireChallenges expires challenges and prunes the state for longer windows
@@ -175,8 +175,8 @@ func TestDAChallengeDetached(t *testing.T) {
 	require.ErrorIs(t, err, ErrReorgRequired)
 
 	// pruning finalized block is safe. It should not prune any commitments yet.
-	state.Prune(bID(1))
-	require.Equal(t, eth.L1BlockRef{}, state.lastPrunedCommitment)
+	lastPrunedCommitment := state.Prune(bID(1))
+	require.Equal(t, eth.L1BlockRef{}, lastPrunedCommitment)
 
 	// Perform reorg back to bn2
 	state.ClearCommitments()
 
@@ -48,6 +48,8 @@ func (c *MockDAClient) DeleteData(key []byte) error {
 	return c.store.Delete(key)
 }
 
+// DAErrFaker is a DA client that can be configured to return errors on GetInput
+// and SetInput calls.
 type DAErrFaker struct {
 	Client *MockDAClient
 
@@ -109,6 +111,10 @@ func (d *AltDADisabled) AdvanceL1Origin(ctx context.Context, l1 L1Fetcher, block
 //   - request latencies, to mimic a DA service with slow responses
 //     (eg. eigenDA with 10 min batching interval).
 //   - response status codes, to mimic a DA service that is down.
+//
+// We use this FakeDaServer as opposed to the DAErrFaker client in the op-e2e altda system tests
+// because the batcher service only has a constructor to build from CLI flags (no dependency injection),
+// meaning the da client is built from an rpc url config instead of being injected.
 type FakeDAServer struct {
 	*DAServer
 	putRequestLatency time.Duration
 
@@ -52,15 +52,14 @@ func challengeKey(comm CommitmentData, inclusionBlockNumber uint64) string {
 // In the special case of a L2 reorg, challenges are still tracked but commitments are removed.
 // This will allow the altDA fetcher to find the expired challenge.
 type State struct {
-	commitments          []Commitment          // commitments where the challenge/resolve period has not expired yet
-	expiredCommitments   []Commitment          // commitments where the challenge/resolve period has expired but not finalized
-	challenges           []*Challenge          // challenges ordered by L1 inclusion
-	expiredChallenges    []*Challenge          // challenges ordered by L1 inclusion
-	challengesMap        map[string]*Challenge // challenges by serialized comm + block number for easy lookup
-	lastPrunedCommitment eth.L1BlockRef        // the last commitment to be pruned
-	cfg                  Config
-	log                  log.Logger
-	metrics              Metricer
+	commitments        []Commitment          // commitments where the challenge/resolve period has not expired yet
+	expiredCommitments []Commitment          // commitments where the challenge/resolve period has expired but not finalized
+	challenges         []*Challenge          // challenges ordered by L1 inclusion
+	expiredChallenges  []*Challenge          // challenges ordered by L1 inclusion
+	challengesMap      map[string]*Challenge // challenges by serialized comm + block number for easy lookup
+	cfg                Config
+	log                log.Logger
+	metrics            Metricer
 }
 
 func NewState(log log.Logger, m Metricer, cfg Config) *State {
@@ -207,15 +206,18 @@ func (s *State) ExpireChallenges(origin eth.BlockID) {
 }
 
 // Prune removes challenges & commitments which have an expiry block number beyond the given block number.
-func (s *State) Prune(origin eth.BlockID) {
+// It returns the last pruned commitment's inclusion block number, or eth.L1BlockRef{} if no commitments were pruned.
+func (s *State) Prune(origin eth.BlockID) eth.L1BlockRef {
 	// Commitments rely on challenges, so we prune commitments first.
-	s.pruneCommitments(origin)
+	lastPrunedCommIncBlock := s.pruneCommitments(origin)
 	s.pruneChallenges(origin)
+	return lastPrunedCommIncBlock
 }
 
 // pruneCommitments removes commitments which have are beyond a given block number.
 // It will remove commitments in order of inclusion until it finds a commitment which is not beyond the given block number.
-func (s *State) pruneCommitments(origin eth.BlockID) {
+func (s *State) pruneCommitments(origin eth.BlockID) eth.L1BlockRef {
+	var lastPrunedCommIncBlock eth.L1BlockRef
 	for len(s.expiredCommitments) > 0 {
 		c := s.expiredCommitments[0]
 		challenge, ok := s.GetChallenge(c.data, c.inclusionBlock.Number)
@@ -236,8 +238,9 @@ func (s *State) pruneCommitments(origin eth.BlockID) {
 		s.expiredCommitments = s.expiredCommitments[1:]
 
 		// Record the latest inclusion block to be returned
-		s.lastPrunedCommitment = c.inclusionBlock
+		lastPrunedCommIncBlock = c.inclusionBlock
 	}
+	return lastPrunedCommIncBlock
 }
 
 // pruneChallenges removes challenges which have are beyond a given block number.
Original file line number	Diff line number	Diff line change
`@@ -117,8 +117,18 @@ func (d *DA) OnFinalizedHeadSignal(f HeadSignalFn) {`
`117`	`117`	`func (d *DA) updateFinalizedHead(l1Finalized eth.L1BlockRef) {`
`118`	`118`	`d.l1FinalizedHead = l1Finalized`
`119`	`119`	`// Prune the state to the finalized head`
`120`		`- d.state.Prune(l1Finalized.ID())`
`121`		`- d.finalizedHead = d.state.lastPrunedCommitment`
	`120`	`+ lastPrunedCommIncBlock := d.state.Prune(l1Finalized.ID())`
	`121`	`+ d.log.Debug("updateFinalizedHead",`
	`122`	`+ "currFinalizedHead", d.finalizedHead.Number,`
	`123`	`+ "lastPrunedCommIncBlock", lastPrunedCommIncBlock.Number,`
	`124`	`+ "l1Finalized", l1Finalized.Number)`
	`125`	`+ // If a commitment was pruned, set the finalized head to that commitment's inclusion block`
	`126`	`+ // When no commitments are left to be pruned (one example is if we have failed over to ethda)`
	`127`	`+ // then updateFinalizedFromL1 becomes the main driver of the finalized head.`
	`128`	`+ // Note that updateFinalizedFromL1 is only called when d.state.NoCommitments() is true.`
	`129`	`+ if lastPrunedCommIncBlock != (eth.L1BlockRef{}) {`
	`130`	`+ d.finalizedHead = lastPrunedCommIncBlock`
	`131`	`+ }`
`122`	`132`	`}`
`123`	`133`
`124`	`134`	`// updateFinalizedFromL1 updates the finalized head based on the challenge window.`
`@@ -133,6 +143,7 @@ func (d *DA) updateFinalizedFromL1(ctx context.Context, l1 L1Fetcher) error {`
`133`	`143`	`if err != nil {`
`134`	`144`	`return err`
`135`	`145`	`}`
	`146`	`+ d.log.Debug("updateFinalizedFromL1", "currFinalizedHead", d.finalizedHead.Number, "newFinalizedHead", ref.Number, "l1FinalizedHead", d.l1FinalizedHead.Number, "challengeWindow", d.cfg.ChallengeWindow)`
`136`	`147`	`d.finalizedHead = ref`
`137`	`148`	`return nil`
`138`	`149`	`}`
`@@ -413,6 +424,7 @@ func (d *DA) fetchChallengeLogs(ctx context.Context, l1 L1Fetcher, block eth.Blo`
`413`	`424`	`}`
`414`	`425`	`for _, log := range rec.Logs {`
`415`	`426`	`if log.Address == d.cfg.DAChallengeContractAddress && len(log.Topics) > 0 && log.Topics[0] == ChallengeStatusEventABIHash {`
	`427`	`+ d.log.Info("found challenge event", "block", block.Number, "log", log.Index)`
`416`	`428`	`logs = append(logs, log)`
`417`	`429`	`}`
`418`	`430`	`}`