Skip to content

Commit d9a219d

Browse files
authored
SupraSeal (#69)
* Import supraseal extern * supraseal test bin * supraseal build setup * begin implementing batch seal task * Minimum viable Batch seal task * Supraseal config * supraseal persist batch meta in cache * batch seal: Mastly done C1 * binary C1 decode * better decode * Fixes to raw proof decode * Working SN C1 reader, C2 poc test * supraseal slot manager * add created_at to batch_sector_refs * update supraseal extern ref * call schedule in supraseal task * supraseal debugging * gen, fix task wiring * supraseal link fixes * more supraffi ld flags * try a thing * linkers are easy * debug init quit * better slot size error message * pipelines and pages are different units * allow batch scheduling * pass all deps to NewSupraSeal * set treed task in batch seal * batch storage fixes * set correct path type in batch seal task * hugepage check * use correct parents file path * log pc2 inputs * pc2 debugging * fixed supra path decode * more supraseal updates * setup for real batch run * batch: fix pipeline machine id getter * fix invalid commr error return * supraseal with fixed c1 * fix machine host/port in finalize * log c1 debug * oh no databases * run finalize on batchseal nodes * fix batch slot allocation * allow batch.json * fix batch slot startup * gen fixes * allow c1 out in cache * fix api verson check * post-rebase fixes * register the supraseal task * fix lint * make gen * batch build target * update supraseal to fix sppark conflict * fix batch task registration * make: Always set LIBRARY_PATH * supraseal with gcc11 default * build supraseal dep from make * minimum sdr tasks setting * schedule batch seal on sdr tasks which weren't claimed * allow multiple supraseal pipelines * prometheus metrics endpoint * supraseal phase metrics * Don't turn supraseal tasks into supraseal tasks * fix LIBRARY_PATH handling * Fix library_path more correctly * Prefer user LIBRARY_PATH * Set LIBRARY_PATH only if not already set * Fix sptool build * faster ema * batch cpu calc * hasher count is in threads * fix calc assinging one core too many * supraseal config generator * fix calc comment * Move batch seal properties to a Seal section * make gen * batch config gen * fix config gen * fix config nvme list * Add --duration-days to curio seal start --cc * Set after_synth in supraseal task * fix: WinPoSt: Prioritize recent tasks, don't care about old mining bases * Address review * make gen * supraseal docs * docs on hw * supraseal: Allocate cores on first processor * count cpu packages correctly * webui: Fix redirect on sector remove * improve supraseal config output, more diag info * add some docs on troubleshooting batch seal perf * fix lint * improve docs * no linter, you are wrong
1 parent 0ce2342 commit d9a219d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+4500
-72
lines changed

.gitmodules

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
11
[submodule "extern/filecoin-ffi"]
22
path = extern/filecoin-ffi
33
url = https://github.com/filecoin-project/filecoin-ffi.git
4+
[submodule "extern/supra_seal"]
5+
path = extern/supra_seal
6+
url = https://github.com/magik6k/supra_seal.git
7+
branch = feat/multi-out-paths

Makefile

+51-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ SHELL=/usr/bin/env bash
22

33
GOCC?=go
44

5+
## FILECOIN-FFI
6+
57
FFI_PATH:=extern/filecoin-ffi/
68
FFI_DEPS:=.install-filcrypto
79
FFI_DEPS:=$(addprefix $(FFI_PATH),$(FFI_DEPS))
@@ -22,6 +24,23 @@ BUILD_DEPS+=ffi-version-check
2224

2325
.PHONY: ffi-version-check
2426

27+
## SUPRA-FFI
28+
29+
ifeq ($(shell uname),Linux)
30+
SUPRA_FFI_PATH:=extern/supra_seal/
31+
SUPRA_FFI_DEPS:=.install-supraseal
32+
SUPRA_FFI_DEPS:=$(addprefix $(SUPRA_FFI_PATH),$(SUPRA_FFI_DEPS))
33+
34+
$(SUPRA_FFI_DEPS): build/.supraseal-install ;
35+
36+
build/.supraseal-install: $(SUPRA_FFI_PATH)
37+
cd $(SUPRA_FFI_PATH) && ./build.sh
38+
@touch $@
39+
40+
MODULES+=$(SUPRA_FFI_PATH)
41+
CLEAN+=build/.supraseal-install
42+
endif
43+
2544
$(MODULES): build/.update-modules ;
2645
# dummy file that marks the last time modules were updated
2746
build/.update-modules:
@@ -30,6 +49,12 @@ build/.update-modules:
3049

3150
# end git modules
3251

52+
## CUDA Library Path
53+
CUDA_PATH := $(shell dirname $$(dirname $$(which nvcc)))
54+
CUDA_LIB_PATH := $(CUDA_PATH)/lib64
55+
LIBRARY_PATH ?= $(CUDA_LIB_PATH)
56+
export LIBRARY_PATH
57+
3358
## MAIN BINARIES
3459

3560
CLEAN+=build/.update-modules
@@ -41,7 +66,7 @@ deps: $(BUILD_DEPS)
4166

4267
curio: $(BUILD_DEPS)
4368
rm -f curio
44-
GOAMD64=v3 $(GOCC) build $(GOFLAGS) -o curio -ldflags " -s -w \
69+
GOAMD64=v3 CGO_LDFLAGS_ALLOW=$(CGO_LDFLAGS_ALLOW) $(GOCC) build $(GOFLAGS) -o curio -ldflags " -s -w \
4570
-X github.com/filecoin-project/curio/build.IsOpencl=$(FFI_USE_OPENCL) \
4671
-X github.com/filecoin-project/curio/build.CurrentCommit=+git_`git log -1 --format=%h_%cI`" \
4772
./cmd/curio
@@ -54,6 +79,31 @@ sptool: $(BUILD_DEPS)
5479
.PHONY: sptool
5580
BINS+=sptool
5681

82+
ifeq ($(shell uname),Linux)
83+
84+
batchdep: build/.supraseal-install
85+
batchdep: $(BUILD_DEPS)
86+
,PHONY: batchdep
87+
88+
batch: GOFLAGS+=-tags=supraseal
89+
batch: CGO_LDFLAGS_ALLOW='.*'
90+
batch: batchdep build
91+
.PHONY: batch
92+
93+
batch-calibnet: GOFLAGS+=-tags=calibnet,supraseal
94+
batch-calibnet: CGO_LDFLAGS_ALLOW='.*'
95+
batch-calibnet: batchdep build
96+
.PHONY: batch-calibnet
97+
98+
else
99+
batch:
100+
@echo "Batch target is only available on Linux systems"
101+
@exit 1
102+
103+
batch-calibnet:
104+
@echo "Batch-calibnet target is only available on Linux systems"
105+
@exit 1
106+
endif
57107

58108
calibnet: GOFLAGS+=-tags=calibnet
59109
calibnet: build

cmd/curio/calc.go

+169
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/fatih/color"
7+
"github.com/urfave/cli/v2"
8+
9+
"github.com/filecoin-project/curio/tasks/sealsupra"
10+
)
11+
12+
var calcCmd = &cli.Command{
13+
Name: "calc",
14+
Usage: "Math Utils",
15+
Flags: []cli.Flag{
16+
&cli.StringFlag{
17+
Name: "actor",
18+
},
19+
},
20+
Subcommands: []*cli.Command{
21+
calcBatchCpuCmd,
22+
calcSuprasealConfigCmd,
23+
},
24+
}
25+
26+
var calcBatchCpuCmd = &cli.Command{
27+
Name: "batch-cpu",
28+
Usage: "Analyze and display the layout of batch sealer threads",
29+
Description: `Analyze and display the layout of batch sealer threads on your CPU.
30+
31+
It provides detailed information about CPU utilization for batch sealing operations, including core allocation, thread
32+
distribution for different batch sizes.`,
33+
Flags: []cli.Flag{
34+
&cli.BoolFlag{Name: "dual-hashers", Value: true},
35+
},
36+
Action: func(cctx *cli.Context) error {
37+
info, err := sealsupra.GetSystemInfo()
38+
if err != nil {
39+
return err
40+
}
41+
42+
fmt.Println("Basic CPU Information")
43+
fmt.Println("")
44+
fmt.Printf("Processor count: %d\n", info.ProcessorCount)
45+
fmt.Printf("Core count: %d\n", info.CoreCount)
46+
fmt.Printf("Thread count: %d\n", info.CoreCount*info.ThreadsPerCore)
47+
fmt.Printf("Threads per core: %d\n", info.ThreadsPerCore)
48+
fmt.Printf("Cores per L3 cache (CCX): %d\n", info.CoresPerL3)
49+
fmt.Printf("L3 cache count (CCX count): %d\n", info.CoreCount/info.CoresPerL3)
50+
51+
ccxFreeCores := info.CoresPerL3 - 1 // one core per ccx goes to the coordinator
52+
ccxFreeThreads := ccxFreeCores * info.ThreadsPerCore
53+
fmt.Printf("Hasher Threads per CCX: %d\n", ccxFreeThreads)
54+
55+
sectorsPerThread := 1
56+
if cctx.Bool("dual-hashers") {
57+
sectorsPerThread = 2
58+
}
59+
60+
sectorsPerCCX := ccxFreeThreads * sectorsPerThread
61+
fmt.Printf("Sectors per CCX: %d\n", sectorsPerCCX)
62+
63+
fmt.Println("---------")
64+
65+
printForBatchSize := func(batchSize int) {
66+
fmt.Printf("Batch Size: %s sectors\n", color.CyanString("%d", batchSize))
67+
fmt.Println()
68+
69+
config, err := sealsupra.GenerateSupraSealConfig(*info, cctx.Bool("dual-hashers"), batchSize, nil)
70+
if err != nil {
71+
fmt.Printf("Error generating config: %s\n", err)
72+
return
73+
}
74+
75+
fmt.Printf("Required Threads: %d\n", config.RequiredThreads)
76+
fmt.Printf("Required CCX: %d\n", config.RequiredCCX)
77+
fmt.Printf("Required Cores: %d hasher (+4 minimum for non-hashers)\n", config.RequiredCores)
78+
79+
enoughCores := config.RequiredCores <= info.CoreCount
80+
if enoughCores {
81+
fmt.Printf("Enough cores available for hashers %s\n", color.GreenString("✔"))
82+
} else {
83+
fmt.Printf("Not enough cores available for hashers %s\n", color.RedString("✘"))
84+
return
85+
}
86+
87+
fmt.Printf("Non-hasher cores: %d\n", info.CoreCount-config.RequiredCores)
88+
89+
if config.P2WrRdOverlap {
90+
color.Yellow("! P2 writer will share a core with P2 reader, performance may be impacted")
91+
}
92+
if config.P2HsP1WrOverlap {
93+
color.Yellow("! P2 hasher will share a core with P1 writer, performance may be impacted")
94+
}
95+
if config.P2HcP2RdOverlap {
96+
color.Yellow("! P2 hasher_cpu will share a core with P2 reader, performance may be impacted")
97+
}
98+
99+
fmt.Println()
100+
fmt.Printf("pc1 writer: %d\n", config.Topology.PC1Writer)
101+
fmt.Printf("pc1 reader: %d\n", config.Topology.PC1Reader)
102+
fmt.Printf("pc1 orchestrator: %d\n", config.Topology.PC1Orchestrator)
103+
fmt.Println()
104+
fmt.Printf("pc2 reader: %d\n", config.Topology.PC2Reader)
105+
fmt.Printf("pc2 hasher: %d\n", config.Topology.PC2Hasher)
106+
fmt.Printf("pc2 hasher_cpu: %d\n", config.Topology.PC2HasherCPU)
107+
fmt.Printf("pc2 writer: %d\n", config.Topology.PC2Writer)
108+
fmt.Printf("pc2 writer_cores: %d\n", config.Topology.PC2WriterCores)
109+
fmt.Println()
110+
fmt.Printf("c1 reader: %d\n", config.Topology.C1Reader)
111+
fmt.Println()
112+
113+
fmt.Printf("Unoccupied Cores: %d\n\n", config.UnoccupiedCores)
114+
115+
fmt.Println("{")
116+
fmt.Printf(" sectors = %d;\n", batchSize)
117+
fmt.Println(" coordinators = (")
118+
for i, coord := range config.Topology.SectorConfigs[0].Coordinators {
119+
fmt.Printf(" { core = %d;\n hashers = %d; }", coord.Core, coord.Hashers)
120+
if i < len(config.Topology.SectorConfigs[0].Coordinators)-1 {
121+
fmt.Println(",")
122+
} else {
123+
fmt.Println()
124+
}
125+
}
126+
fmt.Println(" )")
127+
fmt.Println("}")
128+
129+
fmt.Println("---------")
130+
}
131+
132+
printForBatchSize(16)
133+
printForBatchSize(32)
134+
printForBatchSize(64)
135+
printForBatchSize(128)
136+
137+
return nil
138+
},
139+
}
140+
141+
var calcSuprasealConfigCmd = &cli.Command{
142+
Name: "supraseal-config",
143+
Usage: "Generate a supra_seal configuration",
144+
Description: `Generate a supra_seal configuration for a given batch size.
145+
146+
This command outputs a configuration expected by SupraSeal. Main purpose of this command is for debugging and testing.
147+
The config can be used directly with SupraSeal binaries to test it without involving Curio.`,
148+
Flags: []cli.Flag{
149+
&cli.BoolFlag{
150+
Name: "dual-hashers",
151+
Value: true,
152+
Usage: "Zen3 and later supports two sectors per thread, set to false for older CPUs",
153+
},
154+
&cli.IntFlag{
155+
Name: "batch-size",
156+
Aliases: []string{"b"},
157+
Required: true,
158+
},
159+
},
160+
Action: func(cctx *cli.Context) error {
161+
cstr, err := sealsupra.GenerateSupraSealConfigString(cctx.Bool("dual-hashers"), cctx.Int("batch-size"), nil)
162+
if err != nil {
163+
return err
164+
}
165+
166+
fmt.Println(cstr)
167+
return nil
168+
},
169+
}

cmd/curio/main.go

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ func main() {
6767
marketCmd,
6868
fetchParamCmd,
6969
ffiCmd,
70+
calcCmd,
7071
}
7172

7273
jaeger := tracing.SetupJaegerTracing("curio")

cmd/curio/pipeline.go

+23-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import (
99

1010
"github.com/filecoin-project/go-address"
1111
"github.com/filecoin-project/go-state-types/abi"
12+
"github.com/filecoin-project/go-state-types/builtin"
13+
miner12 "github.com/filecoin-project/go-state-types/builtin/v12/miner"
1214

1315
"github.com/filecoin-project/curio/cmd/curio/guidedsetup"
1416
"github.com/filecoin-project/curio/deps"
@@ -61,6 +63,12 @@ var sealStartCmd = &cli.Command{
6163
Name: "layers",
6264
Usage: "list of layers to be interpreted (atop defaults). Default: base",
6365
},
66+
&cli.IntFlag{
67+
Name: "duration-days",
68+
Aliases: []string{"d"},
69+
Usage: "How long to commit sectors for",
70+
DefaultText: "1278 (3.5 years)",
71+
},
6472
},
6573
Action: func(cctx *cli.Context) error {
6674
if !cctx.Bool("now") {
@@ -118,9 +126,23 @@ var sealStartCmd = &cli.Command{
118126
return xerrors.Errorf("getting seal proof type: %w", err)
119127
}
120128

129+
var userDuration *int64
130+
if cctx.IsSet("duration-days") {
131+
days := cctx.Int("duration-days")
132+
userDuration = new(int64)
133+
*userDuration = int64(days) * builtin.EpochsInDay
134+
135+
if miner12.MaxSectorExpirationExtension < *userDuration {
136+
return xerrors.Errorf("duration exceeds max allowed: %d > %d", *userDuration, miner12.MaxSectorExpirationExtension)
137+
}
138+
if miner12.MinSectorExpiration > *userDuration {
139+
return xerrors.Errorf("duration is too short: %d < %d", *userDuration, miner12.MinSectorExpiration)
140+
}
141+
}
142+
121143
num, err := seal.AllocateSectorNumbers(ctx, dep.Chain, dep.DB, act, cctx.Int("count"), func(tx *harmonydb.Tx, numbers []abi.SectorNumber) (bool, error) {
122144
for _, n := range numbers {
123-
_, err := tx.Exec("insert into sectors_sdr_pipeline (sp_id, sector_number, reg_seal_proof) values ($1, $2, $3)", mid, n, spt)
145+
_, err := tx.Exec("insert into sectors_sdr_pipeline (sp_id, sector_number, reg_seal_proof, user_sector_duration_epochs) values ($1, $2, $3, $4)", mid, n, spt, userDuration)
124146
if err != nil {
125147
return false, xerrors.Errorf("inserting into sectors_sdr_pipeline: %w", err)
126148
}

cmd/curio/rpc/rpc.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,15 @@ import (
3030
"github.com/filecoin-project/curio/api/client"
3131
"github.com/filecoin-project/curio/build"
3232
"github.com/filecoin-project/curio/deps"
33+
"github.com/filecoin-project/curio/lib/metrics"
3334
"github.com/filecoin-project/curio/lib/paths"
3435
"github.com/filecoin-project/curio/lib/repo"
3536
"github.com/filecoin-project/curio/web"
3637

3738
lapi "github.com/filecoin-project/lotus/api"
3839
cliutil "github.com/filecoin-project/lotus/cli/util"
3940
"github.com/filecoin-project/lotus/lib/rpcenc"
40-
"github.com/filecoin-project/lotus/metrics"
41+
lotusmetrics "github.com/filecoin-project/lotus/metrics"
4142
"github.com/filecoin-project/lotus/metrics/proxy"
4243
"github.com/filecoin-project/lotus/storage/pipeline/piece"
4344
"github.com/filecoin-project/lotus/storage/sealer/fsutil"
@@ -71,6 +72,7 @@ func CurioHandler(
7172
mux.Handle("/rpc/v0", rpcServer)
7273
mux.Handle("/rpc/streams/v0/push/{uuid}", readerHandler)
7374
mux.PathPrefix("/remote").HandlerFunc(remote)
75+
mux.Handle("/debug/metrics", metrics.Exporter())
7476
mux.PathPrefix("/").Handler(http.DefaultServeMux) // pprof
7577

7678
if !permissioned {
@@ -283,7 +285,7 @@ func ListenAndServe(ctx context.Context, dependencies *deps.Deps, shutdownChan c
283285
permissioned),
284286
ReadHeaderTimeout: time.Minute * 3,
285287
BaseContext: func(listener net.Listener) context.Context {
286-
ctx, _ := tag.New(context.Background(), tag.Upsert(metrics.APIInterface, "lotus-worker"))
288+
ctx, _ := tag.New(context.Background(), tag.Upsert(lotusmetrics.APIInterface, "curio"))
287289
return ctx
288290
},
289291
Addr: dependencies.ListenAddr,

cmd/curio/run.go

+1-8
Original file line numberDiff line numberDiff line change
@@ -106,14 +106,7 @@ var runCmd = &cli.Command{
106106
ctxclose()
107107
}()
108108
}
109-
// Register all metric views
110-
/*
111-
if err := view.Register(
112-
metrics.MinerNodeViews...,
113-
); err != nil {
114-
log.Fatalf("Cannot register the view: %v", err)
115-
}
116-
*/
109+
117110
// Set the metric to one so it is published to the exporter
118111
stats.Record(ctx, metrics.LotusInfo.M(1))
119112

0 commit comments

Comments
 (0)