Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[no-release-notes] archive clone/fetch #8857

Merged
merged 28 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
16caa44
Introduce ToChunker interface
macneale4 Jan 6, 2025
a73bfed
Simplest possible update to gRPC interface for archive spans
macneale4 Jan 7, 2025
38f80df
Generated Proto code
macneale4 Jan 7, 2025
d45e042
First successful clone. No tests yet.
macneale4 Jan 9, 2025
0e78d7b
Use darc suffix const
macneale4 Feb 10, 2025
6c0a039
Add test for cloning archived repository.
macneale4 Jan 23, 2025
0ae4d51
Revert tableSpec field changes
macneale4 Feb 11, 2025
5fbe0d1
Munge the location path in GetChunkLocationsWithPaths
macneale4 Feb 11, 2025
564c1f1
Checkpoint: url generation fixed, and no longer blocking incorrectly
macneale4 Feb 11, 2025
8f933ce
Clone tests passing again
macneale4 Feb 12, 2025
66f1b9f
[ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/upda…
macneale4 Feb 12, 2025
6c270b1
First steel thread for archive fetch.
macneale4 Feb 4, 2025
1f498f8
Use better dictionary cache
macneale4 Feb 5, 2025
f8330ff
Make metadata code aware of newgen archives
macneale4 Feb 10, 2025
57c6a21
Small refactor and clean up
macneale4 Feb 12, 2025
c3440f0
[ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/upda…
macneale4 Feb 12, 2025
72e7029
More ToChunker refactor
macneale4 Feb 12, 2025
dbef286
[ga-format-pr] Run go/utils/repofmt/format_repo.sh and go/Godeps/upda…
macneale4 Feb 12, 2025
4a3d7f8
go: remotestorage: Rework how dictionary fetching and dictionary cach…
reltuk Feb 13, 2025
52fab61
go: remotestorage: Some cleanups to ChunkFetcher.
reltuk Feb 13, 2025
78d451b
go: remotestorage: chunk_fetcher: Ranges carry the Dict{Offset,Len}, …
reltuk Feb 13, 2025
c76ab7e
more cleanup
macneale4 Feb 13, 2025
ecac26b
Remove the FullCompressedChunkLen function from ToChunker interface
macneale4 Feb 13, 2025
63e29a7
rollback the change to the getMany interface
macneale4 Feb 13, 2025
2434b63
PR Feedback
macneale4 Feb 13, 2025
77a4e59
More PR Feedback
macneale4 Feb 13, 2025
b086492
Account for empty chunks in archive.getManyCompressed
macneale4 Feb 14, 2025
2cab73a
Archive test updates to account for default dictionaries
macneale4 Feb 14, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
784 changes: 403 additions & 381 deletions go/gen/proto/dolt/services/remotesapi/v1alpha1/chunkstore.pb.go

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions go/libraries/doltcore/remotesrv/grpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,12 @@ func (rs *RemoteChunkStore) StreamDownloadLocations(stream remotesapi.ChunkStore
var ranges []*remotesapi.RangeChunk
for h, r := range hashToRange {
hCpy := h
ranges = append(ranges, &remotesapi.RangeChunk{Hash: hCpy[:], Offset: r.Offset, Length: r.Length})
ranges = append(ranges, &remotesapi.RangeChunk{
Hash: hCpy[:],
Offset: r.Offset,
Length: r.Length,
DictionaryOffset: r.DictOffset,
DictionaryLength: r.DictLength})
}

url := rs.getDownloadUrl(md, prefix+"/"+loc)
Expand Down Expand Up @@ -606,7 +611,7 @@ func getTableFileInfo(
}
appendixTableFileInfo := make([]*remotesapi.TableFileInfo, 0)
for _, t := range tableList {
url := rs.getDownloadUrl(md, prefix+"/"+t.LocationPrefix()+t.FileID())
url := rs.getDownloadUrl(md, prefix+"/"+t.LocationPrefix()+t.FileID()+t.LocationSuffix())
url, err = rs.sealer.Seal(url)
if err != nil {
return nil, status.Error(codes.Internal, "failed to get seal download url for "+t.FileID())
Expand Down
11 changes: 9 additions & 2 deletions go/libraries/doltcore/remotesrv/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (

"github.com/dolthub/dolt/go/libraries/utils/filesys"
"github.com/dolthub/dolt/go/store/hash"
"github.com/dolthub/dolt/go/store/nbs"
"github.com/dolthub/dolt/go/store/types"
)

Expand Down Expand Up @@ -94,12 +95,18 @@ func (fh filehandler) ServeHTTP(respWr http.ResponseWriter, req *http.Request) {
respWr.WriteHeader(http.StatusBadRequest)
return
}
_, ok := hash.MaybeParse(path[i+1:])

fileName := path[i+1:]
if strings.HasSuffix(fileName, nbs.ArchiveFileSuffix) {
fileName = fileName[:len(fileName)-len(nbs.ArchiveFileSuffix)]
}
_, ok := hash.MaybeParse(fileName)
if !ok {
logger.WithField("last_path_component", path[i+1:]).Warn("bad request with unparseable last path component")
logger.WithField("last_path_component", fileName).Warn("bad request with unparseable last path component")
respWr.WriteHeader(http.StatusBadRequest)
return
}

abs, err := fh.fs.Abs(path)
if err != nil {
logger.WithError(err).Error("could not get absolute path")
Expand Down
10 changes: 0 additions & 10 deletions go/libraries/doltcore/remotesrv/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ package remotesrv
import (
"context"
"crypto/tls"
"errors"
"net"
"net/http"
"strings"
Expand All @@ -29,7 +28,6 @@ import (
"google.golang.org/grpc"

remotesapi "github.com/dolthub/dolt/go/gen/proto/dolt/services/remotesapi/v1alpha1"
"github.com/dolthub/dolt/go/libraries/doltcore/env"
"github.com/dolthub/dolt/go/libraries/utils/filesys"
)

Expand Down Expand Up @@ -80,14 +78,6 @@ func NewServer(args ServerArgs) (*Server, error) {
args.Logger = logrus.NewEntry(logrus.StandardLogger())
}

storageMetadata, err := env.GetMultiEnvStorageMetadata(args.FS)
if err != nil {
return nil, err
}
if storageMetadata.ArchiveFilesPresent() {
return nil, errors.New("archive files present. Please run `dolt archive --revert` before running the server.")
}

s := new(Server)
s.stopChan = make(chan struct{})

Expand Down
13 changes: 6 additions & 7 deletions go/libraries/doltcore/remotestorage/chunk_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,20 @@ import (

// ChunkCache is an interface used for caching chunks
type ChunkCache interface {
// Put puts a slice of chunks into the cache.
Put(c []nbs.CompressedChunk) bool
// Put puts a slice of chunks into the cache. Error returned if the cache capacity has been exceeded.
Put(c []nbs.ToChunker) error

// Get gets a map of hash to chunk for a set of hashes. In the event that a chunk is not in the cache, chunks.Empty.
// is put in it's place
Get(h hash.HashSet) map[hash.Hash]nbs.CompressedChunk
Get(h hash.HashSet) map[hash.Hash]nbs.ToChunker

// Has takes a set of hashes and returns the set of hashes that the cache currently does not have in it.
Has(h hash.HashSet) (absent hash.HashSet)

// PutChunk puts a single chunk in the cache. true returns in the event that the chunk was cached successfully
// and false is returned if that chunk is already is the cache.
PutChunk(chunk nbs.CompressedChunk) bool
// PutChunk puts a single chunk in the cache. Returns an error if the cache capacity has been exceeded.
PutChunk(chunk nbs.ToChunker) error

// GetAndClearChunksToFlush gets a map of hash to chunk which includes all the chunks that were put in the cache
// between the last time GetAndClearChunksToFlush was called and now.
GetAndClearChunksToFlush() map[hash.Hash]nbs.CompressedChunk
GetAndClearChunksToFlush() map[hash.Hash]nbs.ToChunker
}
Loading
Loading