Skip to content

Commit 37e4d80

Browse files
LexLuthrwillscottjacobheundirkmcnonsense
authored
feat: data segment indexing (#1739)
feat: support for podsi indexing --------- Co-authored-by: Will <[email protected]> Co-authored-by: Jacob Heun <[email protected]> Co-authored-by: dirkmc <[email protected]> Co-authored-by: Anton Evangelatov <[email protected]> Co-authored-by: Jacob Heun <[email protected]> Co-authored-by: Rod Vagg <[email protected]> Co-authored-by: Łukasz Magiera <[email protected]> Co-authored-by: Łukasz Magiera <[email protected]> Co-authored-by: Hannah Howard <[email protected]> Co-authored-by: gammazero <[email protected]> Co-authored-by: Adin Schmahmann <[email protected]> Co-authored-by: Masih H. Derkani <[email protected]> Co-authored-by: Ivan Schasny <[email protected]> Co-authored-by: Ivan Schasny <[email protected]>
1 parent e9d18ac commit 37e4d80

31 files changed

+1378
-166
lines changed

.circleci/config.yml

+5
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,11 @@ workflows:
353353
suite: all
354354
target: "`go list ./... | grep -v boost/itests | grep -v cmd/booster-http | grep -v cmd/booster-bitswap`"
355355

356+
- test:
357+
name: test-itest-data-segment-index
358+
suite: itest-data-segment-index
359+
target: "./itests/data_segment_index_retrieval_test.go"
360+
356361
- test:
357362
name: test-itest-ipni
358363
suite: itest-ipni

car/multi_reader_at.go

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
package car
2+
3+
import (
4+
"io"
5+
"sort"
6+
)
7+
8+
func NewMultiReaderAt(parts ...ReaderAtSize) io.ReaderAt {
9+
m := &multiReaderAt{
10+
parts: make([]readerAtOffset, 0, len(parts)),
11+
}
12+
var off int64
13+
for _, p := range parts {
14+
rao := readerAtOffset{off, p}
15+
m.parts = append(m.parts, rao)
16+
off += rao.Size()
17+
}
18+
m.size = off
19+
return m
20+
}
21+
22+
type ReaderAtSize interface {
23+
io.ReaderAt
24+
Size() int64
25+
}
26+
27+
type readerAtOffset struct {
28+
off int64
29+
ReaderAtSize
30+
}
31+
32+
type multiReaderAt struct {
33+
parts []readerAtOffset
34+
size int64
35+
}
36+
37+
func (m *multiReaderAt) Size() int64 {
38+
return m.size
39+
}
40+
41+
func (m *multiReaderAt) ReadAt(p []byte, off int64) (n int, err error) {
42+
wantN := len(p)
43+
44+
// Skip past the requested offset.
45+
skipParts := sort.Search(len(m.parts), func(i int) bool {
46+
// This function returns whether parts[i] will
47+
// contribute any bytes to our output.
48+
part := m.parts[i]
49+
return part.off+part.Size() > off
50+
})
51+
parts := m.parts[skipParts:]
52+
53+
// How far to skip in the first part.
54+
needSkip := off
55+
if len(parts) > 0 {
56+
needSkip -= parts[0].off
57+
}
58+
59+
for len(parts) > 0 && len(p) > 0 {
60+
readP := p
61+
partSize := parts[0].Size()
62+
if int64(len(readP)) > partSize-needSkip {
63+
readP = readP[:partSize-needSkip]
64+
}
65+
pn, err0 := parts[0].ReadAt(readP, needSkip)
66+
if err0 != nil {
67+
return n, err0
68+
}
69+
n += pn
70+
p = p[pn:]
71+
if int64(pn)+needSkip == partSize {
72+
parts = parts[1:]
73+
}
74+
needSkip = 0
75+
}
76+
77+
if n != wantN {
78+
err = io.ErrUnexpectedEOF
79+
}
80+
return
81+
}

car/multi_reader_at_test.go

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
package car
2+
3+
import (
4+
"bytes"
5+
"crypto/rand"
6+
"fmt"
7+
"io"
8+
mrand "math/rand"
9+
"strings"
10+
"testing"
11+
12+
"github.com/stretchr/testify/require"
13+
)
14+
15+
func TestMultiReaderAt(t *testing.T) {
16+
req := require.New(t)
17+
18+
sourceData := make([]byte, 4<<20)
19+
_, err := rand.Read(sourceData)
20+
req.NoError(err)
21+
22+
testRead := func(t *testing.T, mra io.ReaderAt, readLen int, pos int) {
23+
// t.Logf("testRead() readLen=%d pos=%d", readLen, pos)
24+
req := require.New(t)
25+
readData := make([]byte, readLen)
26+
n, err := mra.ReadAt(readData, int64(pos))
27+
req.NoError(err)
28+
req.Equal(readLen, n)
29+
req.True(bytes.Equal(sourceData[pos:pos+readLen], readData))
30+
}
31+
32+
for _, testCase := range [][]int{
33+
{1},
34+
{8},
35+
{10},
36+
{1000},
37+
{1024},
38+
{2000},
39+
{1 << 20},
40+
{10, 10},
41+
{1 << 20, 1 << 20},
42+
{10, 10, 10},
43+
{1 << 20, 1 << 20, 1 << 20},
44+
{1, 1, 1, 1, 1},
45+
{8, 1, 8, 1, 8},
46+
{1000, 8, 10, 1000},
47+
{1000, 2000, 2000, 1000},
48+
{1000, 2000, 2000, 8, 1000},
49+
{8, 2000, 1024, 1 << 20, 1000},
50+
} {
51+
var sb strings.Builder
52+
for ii, sz := range testCase {
53+
if ii > 0 {
54+
sb.WriteString("_")
55+
}
56+
sb.WriteString(fmt.Sprintf("%d", sz))
57+
}
58+
59+
t.Run(sb.String(), func(t *testing.T) {
60+
testLen := 0
61+
ra := make([]ReaderAtSize, len(testCase))
62+
for ii, sz := range testCase {
63+
ra[ii] = bytes.NewReader(sourceData[testLen : testLen+sz])
64+
testLen += sz
65+
}
66+
mra := NewMultiReaderAt(ra...)
67+
// read all
68+
testRead(t, mra, testLen, 0)
69+
// read at random positions
70+
for ii := 0; ii < 100; ii++ {
71+
pos := mrand.Intn(testLen)
72+
readLen := mrand.Intn(testLen - pos)
73+
testRead(t, mra, readLen, pos)
74+
}
75+
// read blocks
76+
off := 0
77+
for _, sz := range testCase {
78+
testRead(t, mra, sz, off)
79+
off += sz
80+
}
81+
// read just outsize of blocks
82+
off = 0
83+
for ii, sz := range testCase {
84+
pos := off
85+
rd := sz
86+
if ii > 0 {
87+
rd++
88+
off--
89+
}
90+
if off < testLen {
91+
rd++
92+
}
93+
if rd > testLen-pos {
94+
rd = testLen - pos
95+
}
96+
testRead(t, mra, rd, pos)
97+
off += sz
98+
}
99+
})
100+
}
101+
}

cmd/boostd/recover.go

+6-29
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"github.com/filecoin-project/boost/extern/boostd-data/model"
2121
"github.com/filecoin-project/boost/node/config"
2222
"github.com/filecoin-project/boost/piecedirectory"
23+
"github.com/filecoin-project/dagstore/mount"
2324
"github.com/filecoin-project/go-address"
2425
"github.com/filecoin-project/go-commp-utils/writer"
2526
"github.com/filecoin-project/go-jsonrpc"
@@ -34,7 +35,6 @@ import (
3435
"github.com/google/uuid"
3536
"github.com/ipfs/go-cid"
3637
"github.com/ipfs/go-cidutil/cidenc"
37-
carv2 "github.com/ipld/go-car/v2"
3838
"github.com/mitchellh/go-homedir"
3939
"github.com/multiformats/go-multibase"
4040
"github.com/urfave/cli/v2"
@@ -467,7 +467,7 @@ func (dr *DisasterRecovery) CompleteSector(s abi.SectorNumber) error {
467467
}
468468

469469
// safeUnsealSector tries to return a reader to an unsealed sector or times out
470-
func safeUnsealSector(ctx context.Context, sectorid abi.SectorNumber, offset abi.UnpaddedPieceSize, piecesize abi.PaddedPieceSize) (io.ReadCloser, bool, error) {
470+
func safeUnsealSector(ctx context.Context, sectorid abi.SectorNumber, offset abi.UnpaddedPieceSize, piecesize abi.PaddedPieceSize) (mount.Reader, bool, error) {
471471
mid, _ := address.IDFromAddress(maddr)
472472

473473
sid := abi.SectorID{
@@ -480,7 +480,7 @@ func safeUnsealSector(ctx context.Context, sectorid abi.SectorNumber, offset abi
480480
logger.Errorw("storage find sector", "err", err)
481481
}
482482

483-
var reader io.ReadCloser
483+
var reader mount.Reader
484484
var isUnsealed bool
485485

486486
done := make(chan struct{})
@@ -513,7 +513,7 @@ func safeUnsealSector(ctx context.Context, sectorid abi.SectorNumber, offset abi
513513
logger.Debugw("sa.IsUnsealed return true", "sector", sectorid)
514514

515515
go func() {
516-
reader, err = sa.UnsealSector(ctx, sectorid, offset, piecesize.Unpadded())
516+
reader, err = sa.UnsealSectorAt(ctx, sectorid, offset, piecesize.Unpadded())
517517
if err != nil {
518518
logger.Errorw("sa.UnsealSector return error", "sector", sectorid, "err", err)
519519
return
@@ -580,25 +580,12 @@ func processPiece(ctx context.Context, sectorid abi.SectorNumber, chainDealID ab
580580
if err != nil {
581581
return err
582582
}
583+
defer reader.Close()
583584
if !isUnsealed {
584585
return fmt.Errorf("sector %d is not unsealed", sid)
585586
}
586587

587588
dr.Sectors[sid].Deals[cdi].IsUnsealed = true
588-
589-
readerAt := reader.(Reader)
590-
591-
opts := []carv2.Option{carv2.ZeroLengthSectionAsEOF(true)}
592-
rr, err := carv2.NewReader(readerAt, opts...)
593-
if err != nil {
594-
return err
595-
}
596-
597-
drr, err := rr.DataReader()
598-
if err != nil {
599-
return err
600-
}
601-
602589
dr.Sectors[sid].Deals[cdi].GotDataReader = true
603590

604591
if !ignoreLID { // populate LID
@@ -663,7 +650,7 @@ func processPiece(ctx context.Context, sectorid abi.SectorNumber, chainDealID ab
663650

664651
if !ignoreCommp { // commp over data reader
665652
w := &writer.Writer{}
666-
_, err = io.CopyBuffer(w, drr, make([]byte, writer.CommPBuf))
653+
_, err = io.CopyBuffer(w, reader, make([]byte, writer.CommPBuf))
667654
if err != nil {
668655
return fmt.Errorf("copy into commp writer: %w", err)
669656
}
@@ -676,9 +663,6 @@ func processPiece(ctx context.Context, sectorid abi.SectorNumber, chainDealID ab
676663
encoder := cidenc.Encoder{Base: multibase.MustNewEncoder(multibase.Base32)}
677664
_ = encoder
678665

679-
//fmt.Println("CommP CID: ", encoder.Encode(commp.PieceCID))
680-
//fmt.Println("Piece size: ", types.NewInt(uint64(commp.PieceSize.Unpadded().Padded())))
681-
682666
if !commp.PieceCID.Equals(piececid) {
683667
return fmt.Errorf("calculated commp doesnt match on-chain data, expected %s, got %s", piececid, commp.PieceCID)
684668
}
@@ -782,13 +766,6 @@ func getActorAddress(ctx context.Context, cctx *cli.Context) (maddr address.Addr
782766
return maddr, nil
783767
}
784768

785-
type Reader interface {
786-
io.Closer
787-
io.Reader
788-
io.ReaderAt
789-
io.Seeker
790-
}
791-
792769
func createLogger(logPath string) (*zap.SugaredLogger, error) {
793770
logCfg := zap.NewDevelopmentConfig()
794771
logCfg.OutputPaths = []string{"stdout", logPath}

cmd/booster-http/e2e_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ func TestE2E(t *testing.T) {
4040
framework.SetLogLevel()
4141

4242
t.Log("Starting boost and miner")
43-
boostAndMiner := framework.NewTestFramework(ctx, t, framework.EnableLegacyDeals(true))
44-
req.NoError(boostAndMiner.Start(framework.WithMaxStagingDealsBytes(40000000)))
43+
boostAndMiner := framework.NewTestFramework(ctx, t, framework.EnableLegacyDeals(true), framework.SetMaxStagingBytes(10485760))
44+
req.NoError(boostAndMiner.Start())
4545
defer boostAndMiner.Stop()
4646

4747
req.NoError(boostAndMiner.AddClientProviderBalance(abi.NewTokenAmount(1e15)))

cmd/booster-http/trustless_gateway_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ func TestTrustlessGateway(t *testing.T) {
3131
kit.QuietMiningLogs()
3232
framework.SetLogLevel()
3333

34-
boostAndMiner := framework.NewTestFramework(ctx, t, framework.EnableLegacyDeals(true))
35-
req.NoError(boostAndMiner.Start(framework.WithMaxStagingDealsBytes(40000000)))
34+
boostAndMiner := framework.NewTestFramework(ctx, t, framework.EnableLegacyDeals(true), framework.SetMaxStagingBytes(10485760))
35+
req.NoError(boostAndMiner.Start())
3636
defer boostAndMiner.Stop()
3737

3838
req.NoError(boostAndMiner.AddClientProviderBalance(abi.NewTokenAmount(1e15)))

go.mod

+7-8
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ require (
2121
github.com/filecoin-project/go-commp-utils v0.1.4
2222
github.com/filecoin-project/go-data-transfer v1.15.4-boost
2323
github.com/filecoin-project/go-fil-commcid v0.1.0
24-
github.com/filecoin-project/go-fil-commp-hashhash v0.1.0
24+
github.com/filecoin-project/go-fil-commp-hashhash v0.2.0
25+
github.com/filecoin-project/go-fil-markets v1.28.3
2526
github.com/filecoin-project/go-jsonrpc v0.3.1
2627
github.com/filecoin-project/go-padreader v0.0.1
2728
github.com/filecoin-project/go-paramfetch v0.0.4
@@ -50,7 +51,7 @@ require (
5051
github.com/ipfs/go-ipfs-files v0.3.0 // indirect
5152
github.com/ipfs/go-ipld-format v0.6.0
5253
github.com/ipfs/go-ipld-legacy v0.2.1
53-
github.com/ipfs/go-libipfs v0.7.0 // indirect
54+
github.com/ipfs/go-libipfs v0.7.0
5455
github.com/ipfs/go-log/v2 v2.5.1
5556
github.com/ipfs/go-merkledag v0.11.0 // indirect
5657
github.com/ipfs/go-metrics-interface v0.0.1
@@ -201,7 +202,6 @@ require (
201202
github.com/ipfs/bbloom v0.0.4 // indirect
202203
github.com/ipfs/go-bitfield v1.1.0 // indirect
203204
github.com/ipfs/go-ds-badger2 v0.1.3 // indirect
204-
github.com/ipfs/go-ds-leveldb v0.5.0
205205
github.com/ipfs/go-ds-measure v0.2.0 // indirect
206206
github.com/ipfs/go-fs-lock v0.0.7 // indirect
207207
github.com/ipfs/go-ipfs-cmds v0.10.0 // indirect
@@ -321,10 +321,11 @@ require (
321321
github.com/filecoin-project/boost-gfm v1.26.7
322322
github.com/filecoin-project/boost-graphsync v0.13.9
323323
github.com/filecoin-project/boost/extern/boostd-data v0.0.0-20231124125934-3233c510357f
324+
github.com/filecoin-project/go-data-segment v0.0.1
324325
github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7
325-
github.com/filecoin-project/go-fil-markets v1.28.3
326326
github.com/filecoin-project/lotus v1.25.0
327327
github.com/ipfs/boxo v0.12.0
328+
github.com/ipfs/go-ds-leveldb v0.5.0
328329
github.com/ipfs/kubo v0.22.0
329330
github.com/ipld/frisbii v0.4.1
330331
github.com/ipld/go-fixtureplate v0.0.2
@@ -336,13 +337,11 @@ require (
336337
github.com/schollz/progressbar/v3 v3.13.1
337338
)
338339

339-
require (
340-
github.com/alexbrainman/goissue34681 v0.0.0-20191006012335-3fc7a47baff5 // indirect
341-
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
342-
)
340+
require github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
343341

344342
require (
345343
github.com/Jorropo/jsync v1.0.1 // indirect
344+
github.com/alexbrainman/goissue34681 v0.0.0-20191006012335-3fc7a47baff5 // indirect
346345
github.com/filecoin-project/kubo-api-client v0.0.2-0.20230829103503-14448166d14d // indirect
347346
github.com/gammazero/channelqueue v0.2.1 // indirect
348347
github.com/gammazero/deque v0.2.1 // indirect

go.sum

+4-2
Original file line numberDiff line numberDiff line change
@@ -347,6 +347,8 @@ github.com/filecoin-project/go-commp-utils/nonffi v0.0.0-20220905160352-62059082
347347
github.com/filecoin-project/go-crypto v0.0.0-20191218222705-effae4ea9f03/go.mod h1:+viYnvGtUTgJRdy6oaeF4MTFKAfatX071MPDPBL11EQ=
348348
github.com/filecoin-project/go-crypto v0.0.1 h1:AcvpSGGCgjaY8y1az6AMfKQWreF/pWO2JJGLl6gCq6o=
349349
github.com/filecoin-project/go-crypto v0.0.1/go.mod h1:+viYnvGtUTgJRdy6oaeF4MTFKAfatX071MPDPBL11EQ=
350+
github.com/filecoin-project/go-data-segment v0.0.1 h1:1wmDxOG4ubWQm3ZC1XI5nCon5qgSq7Ra3Rb6Dbu10Gs=
351+
github.com/filecoin-project/go-data-segment v0.0.1/go.mod h1:H0/NKbsRxmRFBcLibmABv+yFNHdmtl5AyplYLnb0Zv4=
350352
github.com/filecoin-project/go-data-transfer v1.15.4-boost h1:rGsPDeDk0nbzLOPn/9iCIrhLNy69Vkr9tRBcetM4kd0=
351353
github.com/filecoin-project/go-data-transfer v1.15.4-boost/go.mod h1:S5Es9uoD+3TveYyGjxZInAF6mSQtRjNzezV7Y7Sh8X0=
352354
github.com/filecoin-project/go-data-transfer/v2 v2.0.0-rc7 h1:v+zJS5B6pA3ptWZS4t8tbt1Hz9qENnN4nVr1w99aSWc=
@@ -357,8 +359,8 @@ github.com/filecoin-project/go-fil-commcid v0.0.0-20200716160307-8f644712406f/go
357359
github.com/filecoin-project/go-fil-commcid v0.0.0-20201016201715-d41df56b4f6a/go.mod h1:Eaox7Hvus1JgPrL5+M3+h7aSPHc0cVqpSxA+TxIEpZQ=
358360
github.com/filecoin-project/go-fil-commcid v0.1.0 h1:3R4ds1A9r6cr8mvZBfMYxTS88OqLYEo6roi+GiIeOh8=
359361
github.com/filecoin-project/go-fil-commcid v0.1.0/go.mod h1:Eaox7Hvus1JgPrL5+M3+h7aSPHc0cVqpSxA+TxIEpZQ=
360-
github.com/filecoin-project/go-fil-commp-hashhash v0.1.0 h1:imrrpZWEHRnNqqv0tN7LXep5bFEVOVmQWHJvl2mgsGo=
361-
github.com/filecoin-project/go-fil-commp-hashhash v0.1.0/go.mod h1:73S8WSEWh9vr0fDJVnKADhfIv/d6dCbAGaAGWbdJEI8=
362+
github.com/filecoin-project/go-fil-commp-hashhash v0.2.0 h1:HYIUugzjq78YvV3vC6rL95+SfC/aSTVSnZSZiDV5pCk=
363+
github.com/filecoin-project/go-fil-commp-hashhash v0.2.0/go.mod h1:VH3fAFOru4yyWar4626IoS5+VGE8SfZiBODJLUigEo4=
362364
github.com/filecoin-project/go-fil-markets v1.28.3 h1:2cFu7tLZYrfNz4LnxjgERaVD7k5+Wwp0H76mnnTGPBk=
363365
github.com/filecoin-project/go-fil-markets v1.28.3/go.mod h1:eryxo/oVgIxaR5g5CNr9PlvZOi+u/bak0IsPL/PT1hk=
364366
github.com/filecoin-project/go-hamt-ipld v0.1.5 h1:uoXrKbCQZ49OHpsTCkrThPNelC4W3LPEk0OrS/ytIBM=

0 commit comments

Comments
 (0)