This repository was archived by the owner on Aug 2, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 110
file, testutil: Add reference file hasher #2099
Merged
Merged
Changes from 3 commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
42b1887
file, testutil: Add reference file hasher
nolash 457b569
file: Remove premature code
nolash 65a444e
file: Remove unused zeroHex and unused logs
nolash 93bdad9
file: Add comments
nolash d603c6d
file: Elaborate comments, remove redundant loglines, var rename
nolash 028aa1e
file: Split up digest function, add explanations
nolash fe7ddee
file: Purify digest method
nolash File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| package hasher | ||
|
|
||
| import ( | ||
| "github.com/ethersphere/swarm/testutil" | ||
| ) | ||
|
|
||
| const ( | ||
| sectionSize = 32 | ||
| branches = 128 | ||
| chunkSize = 4096 | ||
| ) | ||
|
|
||
| var ( | ||
| dataLengths = []int{31, // 0 | ||
| 32, // 1 | ||
| 33, // 2 | ||
| 63, // 3 | ||
| 64, // 4 | ||
| 65, // 5 | ||
| chunkSize, // 6 | ||
| chunkSize + 31, // 7 | ||
| chunkSize + 32, // 8 | ||
| chunkSize + 63, // 9 | ||
| chunkSize + 64, // 10 | ||
| chunkSize * 2, // 11 | ||
| chunkSize*2 + 32, // 12 | ||
| chunkSize * 128, // 13 | ||
| chunkSize*128 + 31, // 14 | ||
| chunkSize*128 + 32, // 15 | ||
| chunkSize*128 + 64, // 16 | ||
| chunkSize * 129, // 17 | ||
| chunkSize * 130, // 18 | ||
| chunkSize * 128 * 128, // 19 | ||
| chunkSize*128*128 + 32, // 20 | ||
| } | ||
| expected = []string{ | ||
| "ece86edb20669cc60d142789d464d57bdf5e33cb789d443f608cbd81cfa5697d", // 0 | ||
| "0be77f0bb7abc9cd0abed640ee29849a3072ccfd1020019fe03658c38f087e02", // 1 | ||
| "3463b46d4f9d5bfcbf9a23224d635e51896c1daef7d225b86679db17c5fd868e", // 2 | ||
| "95510c2ff18276ed94be2160aed4e69c9116573b6f69faaeed1b426fea6a3db8", // 3 | ||
| "490072cc55b8ad381335ff882ac51303cc069cbcb8d8d3f7aa152d9c617829fe", // 4 | ||
| "541552bae05e9a63a6cb561f69edf36ffe073e441667dbf7a0e9a3864bb744ea", // 5 | ||
| "c10090961e7682a10890c334d759a28426647141213abda93b096b892824d2ef", // 6 | ||
| "91699c83ed93a1f87e326a29ccd8cc775323f9e7260035a5f014c975c5f3cd28", // 7 | ||
| "73759673a52c1f1707cbb61337645f4fcbd209cdc53d7e2cedaaa9f44df61285", // 8 | ||
| "db1313a727ffc184ae52a70012fbbf7235f551b9f2d2da04bf476abe42a3cb42", // 9 | ||
| "ade7af36ac0c7297dc1c11fd7b46981b629c6077bce75300f85b02a6153f161b", // 10 | ||
| "29a5fb121ce96194ba8b7b823a1f9c6af87e1791f824940a53b5a7efe3f790d9", // 11 | ||
| "61416726988f77b874435bdd89a419edc3861111884fd60e8adf54e2f299efd6", // 12 | ||
| "3047d841077898c26bbe6be652a2ec590a5d9bd7cd45d290ea42511b48753c09", // 13 | ||
| "e5c76afa931e33ac94bce2e754b1bb6407d07f738f67856783d93934ca8fc576", // 14 | ||
| "485a526fc74c8a344c43a4545a5987d17af9ab401c0ef1ef63aefcc5c2c086df", // 15 | ||
| "624b2abb7aefc0978f891b2a56b665513480e5dc195b4a66cd8def074a6d2e94", // 16 | ||
| "b8e1804e37a064d28d161ab5f256cc482b1423d5cd0a6b30fde7b0f51ece9199", // 17 | ||
| "59de730bf6c67a941f3b2ffa2f920acfaa1713695ad5deea12b4a121e5f23fa1", // 18 | ||
| "522194562123473dcfd7a457b18ee7dee8b7db70ed3cfa2b73f348a992fdfd3b", // 19 | ||
| "ed0cc44c93b14fef2d91ab3a3674eeb6352a42ac2f0bbe524711824aae1e7bcc", // 20 | ||
| } | ||
|
|
||
| start = 0 | ||
| end = len(dataLengths) | ||
| ) | ||
|
|
||
| func init() { | ||
| testutil.Init() | ||
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,56 @@ | ||
| package hasher | ||
|
|
||
| import ( | ||
| "context" | ||
| "sync" | ||
|
|
||
| "github.com/ethersphere/swarm/file" | ||
| ) | ||
|
|
||
| // defines the boundaries of the hashing job and also contains the hash factory functino of the job | ||
| // setting Debug means omitting any automatic behavior (for now it means job processing won't auto-start) | ||
| type treeParams struct { | ||
| SectionSize int | ||
acud marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Branches int | ||
| ChunkSize int | ||
| Spans []int | ||
| Debug bool | ||
| hashFunc file.SectionWriterFunc | ||
| writerPool sync.Pool | ||
| ctx context.Context | ||
| } | ||
|
|
||
| func newTreeParams(hashFunc file.SectionWriterFunc) *treeParams { | ||
|
|
||
| h := hashFunc(context.Background()) | ||
| p := &treeParams{ | ||
| SectionSize: h.SectionSize(), | ||
| Branches: h.Branches(), | ||
| ChunkSize: h.SectionSize() * h.Branches(), | ||
| hashFunc: hashFunc, | ||
| } | ||
| h.Reset() | ||
| p.writerPool.New = func() interface{} { | ||
| hf := p.hashFunc(p.ctx) | ||
| return hf | ||
| } | ||
| p.Spans = generateSpanSizes(p.Branches, 9) | ||
| return p | ||
| } | ||
|
|
||
| func (p *treeParams) SetContext(ctx context.Context) { | ||
| p.ctx = ctx | ||
acud marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| func (p *treeParams) GetContext() context.Context { | ||
| return p.ctx | ||
| } | ||
|
|
||
| func (p *treeParams) PutWriter(w file.SectionWriter) { | ||
| w.Reset() | ||
| p.writerPool.Put(w) | ||
| } | ||
|
|
||
| func (p *treeParams) GetWriter() file.SectionWriter { | ||
| return p.writerPool.Get().(file.SectionWriter) | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,117 @@ | ||
| package hasher | ||
|
|
||
| import ( | ||
| "github.com/ethersphere/swarm/file" | ||
| "github.com/ethersphere/swarm/log" | ||
| ) | ||
|
|
||
| // ReferenceHasher is the source-of-truth implementation of the swarm file hashing algorithm | ||
| type ReferenceHasher struct { | ||
| params *treeParams | ||
| cursors []int // section write position, indexed per level | ||
| length int // number of bytes written to the data level of the hasher | ||
| buffer []byte // keeps data and hashes, indexed by cursors | ||
| counts []int // number of sums performed, indexed per level | ||
| hasher file.SectionWriter // underlying hasher | ||
| } | ||
|
|
||
| // NewReferenceHasher constructs and returns a new ReferenceHasher | ||
| func NewReferenceHasher(params *treeParams) *ReferenceHasher { | ||
| // TODO: remove when bmt interface is amended | ||
| h := params.GetWriter() | ||
| return &ReferenceHasher{ | ||
| params: params, | ||
| cursors: make([]int, 9), | ||
| counts: make([]int, 9), | ||
| buffer: make([]byte, params.ChunkSize*9), | ||
nolash marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| hasher: h, | ||
| } | ||
| } | ||
|
|
||
| // Hash computes and returns the root hash of arbitrary data | ||
| func (r *ReferenceHasher) Hash(data []byte) []byte { | ||
| l := r.params.ChunkSize | ||
| for i := 0; i < len(data); i += r.params.ChunkSize { | ||
| if len(data)-i < r.params.ChunkSize { | ||
| l = len(data) - i | ||
| } | ||
| r.update(0, data[i:i+l]) | ||
| } | ||
| for i := 0; i < 9; i++ { | ||
| log.Trace("cursor", "lvl", i, "pos", r.cursors[i]) | ||
| } | ||
| return r.digest() | ||
| } | ||
|
|
||
| // write to the data buffer on the specified level | ||
| // calls sum if chunk boundary is reached and recursively calls this function for the next level with the acquired bmt hash | ||
| // adjusts cursors accordingly | ||
| func (r *ReferenceHasher) update(lvl int, data []byte) { | ||
| if lvl == 0 { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it would be nice to write that level 0 is the data layer. especially when this reference hasher is another representation of a tree or trie, in which tree height is measured as the inverse (0 is the root)
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. agreed. |
||
| r.length += len(data) | ||
| } | ||
| copy(r.buffer[r.cursors[lvl]:r.cursors[lvl]+len(data)], data) | ||
| r.cursors[lvl] += len(data) | ||
| if r.cursors[lvl]-r.cursors[lvl+1] == r.params.ChunkSize { | ||
| ref := r.sum(lvl) | ||
| r.update(lvl+1, ref) | ||
| r.cursors[lvl] = r.cursors[lvl+1] | ||
acud marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
| } | ||
|
|
||
| // calculates and returns the bmt sum of the last written data on the level | ||
| func (r *ReferenceHasher) sum(lvl int) []byte { | ||
| r.counts[lvl]++ | ||
| spanSize := r.params.Spans[lvl] * r.params.ChunkSize | ||
| span := (r.length-1)%spanSize + 1 | ||
acud marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| toSumSize := r.cursors[lvl] - r.cursors[lvl+1] | ||
acud marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| r.hasher.Reset() | ||
| r.hasher.SetSpan(span) | ||
| r.hasher.Write(r.buffer[r.cursors[lvl+1] : r.cursors[lvl+1]+toSumSize]) | ||
| ref := r.hasher.Sum(nil) | ||
| return ref | ||
| } | ||
|
|
||
| // called after all data has been written | ||
| // sums the final chunks of each level | ||
| // skips intermediate levels that end on span boundary | ||
| func (r *ReferenceHasher) digest() []byte { | ||
|
|
||
| // if we did not end on a chunk boundary, the last chunk hasn't been hashed | ||
| // we need to do this first | ||
| if r.length%r.params.ChunkSize != 0 { | ||
| ref := r.sum(0) | ||
acud marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| copy(r.buffer[r.cursors[1]:], ref) | ||
| r.cursors[1] += len(ref) | ||
| r.cursors[0] = r.cursors[1] | ||
| } | ||
|
|
||
| // calculate the total number of levels needed to represent the data (including the data level) | ||
| targetLevel := getLevelsFromLength(r.length, r.params.SectionSize, r.params.Branches) | ||
|
|
||
| // sum every intermediate level and write to the level above it | ||
| for i := 1; i < targetLevel; i++ { | ||
acud marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| // if the tree is balanced or if there is a single reference outside a balanced tree on this level | ||
| // don't hash it again but pass it on to the next level | ||
| if r.counts[i] > 0 { | ||
| // TODO: simplify if possible | ||
| if r.counts[i-1]-r.params.Spans[targetLevel-1-i] <= 1 { | ||
acud marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| log.Trace("skip") | ||
nolash marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| r.cursors[i+1] = r.cursors[i] | ||
| r.cursors[i] = r.cursors[i-1] | ||
| continue | ||
| } | ||
| } | ||
|
|
||
| ref := r.sum(i) | ||
| copy(r.buffer[r.cursors[i+1]:], ref) | ||
| r.cursors[i+1] += len(ref) | ||
| r.cursors[i] = r.cursors[i+1] | ||
| } | ||
|
|
||
| // the first section of the buffer will hold the root hash | ||
| return r.buffer[:r.params.SectionSize] | ||
| } | ||
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.