From 3502a9df28c3c75b9a1202e549cee475df616298 Mon Sep 17 00:00:00 2001 From: Tony Arcieri Date: Sun, 18 Oct 2020 09:40:23 -0700 Subject: [PATCH] scrypt: use `salsa20` crate to implement Salsa20/8 Closes #29. Using the `salsa20` crate allows us to focus on a single place for things like SIMD optimizations (#16). --- Cargo.lock | 10 ++++ scrypt/Cargo.toml | 6 +- scrypt/src/romix.rs | 135 +++++++++++++------------------------------- 3 files changed, 51 insertions(+), 100 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 16e233f0..82345bd5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -310,6 +310,15 @@ dependencies = [ "num_cpus", ] +[[package]] +name = "salsa20" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "803bc218af7bc955c5ae0e0cf3a7af4331fefd1fc78cde0371853c634a358125" +dependencies = [ + "cipher", +] + [[package]] name = "scopeguard" version = "1.1.0" @@ -325,6 +334,7 @@ dependencies = [ "pbkdf2", "rand", "rand_core", + "salsa20", "sha2", "subtle", ] diff --git a/scrypt/Cargo.toml b/scrypt/Cargo.toml index 7d21adba..3f707078 100644 --- a/scrypt/Cargo.toml +++ b/scrypt/Cargo.toml @@ -12,13 +12,13 @@ edition = "2018" readme = "README.md" [dependencies] +base64 = { version = "0.13", default-features = false, features = ["alloc"], optional = true } hmac = "0.10" pbkdf2 = { version = "0.6.0-pre", default-features = false, path = "../pbkdf2" } -sha2 = { version = "0.9", default-features = false } - -base64 = { version = "0.13", default-features = false, features = ["alloc"], optional = true } rand_core = { version = "0.5", default-features = false, features = ["getrandom"], optional = true } rand = { version = "0.7", default-features = false, optional = true } +salsa20 = { version = "0.7.1", default-features = false, features = ["expose-core"] } +sha2 = { version = "0.9", default-features = false } subtle = { version = "2", default-features = false, optional = true } [features] diff --git a/scrypt/src/romix.rs b/scrypt/src/romix.rs index 2b0589b9..13343a4f 100644 --- a/scrypt/src/romix.rs +++ b/scrypt/src/romix.rs @@ -1,102 +1,7 @@ use core::convert::TryInto; -/// The salsa20/8 core function. -#[inline(never)] -fn salsa20_8(input: &[u8], output: &mut [u8]) { - let mut x = [0u32; 16]; - assert_eq!(input.len(), 4 * x.len()); - for (c, b) in input.chunks_exact(4).zip(x.iter_mut()) { - *b = u32::from_le_bytes(c.try_into().unwrap()); - } - - macro_rules! run_round ( - ($($set_idx:expr, $idx_a:expr, $idx_b:expr, $rot:expr);*) => { { - $( x[$set_idx] ^= x[$idx_a].wrapping_add(x[$idx_b]).rotate_left($rot); )* - } } - ); - - macro_rules! repeat4 ( - ($block:expr) => { - $block; - $block; - $block; - $block; - } - ); - - repeat4!({ - run_round!( - 0x4, 0x0, 0xc, 7; - 0x8, 0x4, 0x0, 9; - 0xc, 0x8, 0x4, 13; - 0x0, 0xc, 0x8, 18; - 0x9, 0x5, 0x1, 7; - 0xd, 0x9, 0x5, 9; - 0x1, 0xd, 0x9, 13; - 0x5, 0x1, 0xd, 18; - 0xe, 0xa, 0x6, 7; - 0x2, 0xe, 0xa, 9; - 0x6, 0x2, 0xe, 13; - 0xa, 0x6, 0x2, 18; - 0x3, 0xf, 0xb, 7; - 0x7, 0x3, 0xf, 9; - 0xb, 0x7, 0x3, 13; - 0xf, 0xb, 0x7, 18; - 0x1, 0x0, 0x3, 7; - 0x2, 0x1, 0x0, 9; - 0x3, 0x2, 0x1, 13; - 0x0, 0x3, 0x2, 18; - 0x6, 0x5, 0x4, 7; - 0x7, 0x6, 0x5, 9; - 0x4, 0x7, 0x6, 13; - 0x5, 0x4, 0x7, 18; - 0xb, 0xa, 0x9, 7; - 0x8, 0xb, 0xa, 9; - 0x9, 0x8, 0xb, 13; - 0xa, 0x9, 0x8, 18; - 0xc, 0xf, 0xe, 7; - 0xd, 0xc, 0xf, 9; - 0xe, 0xd, 0xc, 13; - 0xf, 0xe, 0xd, 18 - ) - }); - - for (o, (i, b)) in output - .chunks_exact_mut(4) - .zip(input.chunks_exact(4).zip(x.iter())) - { - let a = u32::from_le_bytes((&*i).try_into().unwrap()); - let t = b.wrapping_add(a); - o.copy_from_slice(&t.to_le_bytes()); - } -} - -fn xor(x: &[u8], y: &[u8], output: &mut [u8]) { - for ((out, &x_i), &y_i) in output.iter_mut().zip(x.iter()).zip(y.iter()) { - *out = x_i ^ y_i; - } -} - -/// Execute the BlockMix operation -/// input - the input vector. The length must be a multiple of 128. -/// output - the output vector. Must be the same length as input. -fn scrypt_block_mix(input: &[u8], output: &mut [u8]) { - let mut x = [0u8; 64]; - x.copy_from_slice(&input[input.len() - 64..]); - - let mut t = [0u8; 64]; - - for (i, chunk) in input.chunks(64).enumerate() { - xor(&x, chunk, &mut t); - salsa20_8(&t, &mut x); - let pos = if i % 2 == 0 { - (i / 2) * 64 - } else { - (i / 2) * 64 + input.len() / 2 - }; - output[pos..pos + 64].copy_from_slice(&x); - } -} +/// The Salsa20/8 core function +type Salsa20_8 = salsa20::Block; /// Execute the ROMix operation in-place. /// b - the data to operate on @@ -129,3 +34,39 @@ pub(crate) fn scrypt_ro_mix(b: &mut [u8], v: &mut [u8], t: &mut [u8], n: usize) scrypt_block_mix(t, b); } } + +/// Execute the BlockMix operation +/// input - the input vector. The length must be a multiple of 128. +/// output - the output vector. Must be the same length as input. +fn scrypt_block_mix(input: &[u8], output: &mut [u8]) { + let mut x = [0u8; 64]; + x.copy_from_slice(&input[input.len() - 64..]); + + let mut t = [0u8; 64]; + + for (i, chunk) in input.chunks(64).enumerate() { + xor(&x, chunk, &mut t); + + let mut t2 = [0u32; 16]; + + for (c, b) in t.chunks_exact(4).zip(t2.iter_mut()) { + *b = u32::from_le_bytes(c.try_into().unwrap()); + } + + Salsa20_8::from(t2).generate(&mut x); + + let pos = if i % 2 == 0 { + (i / 2) * 64 + } else { + (i / 2) * 64 + input.len() / 2 + }; + + output[pos..pos + 64].copy_from_slice(&x); + } +} + +fn xor(x: &[u8], y: &[u8], output: &mut [u8]) { + for ((out, &x_i), &y_i) in output.iter_mut().zip(x.iter()).zip(y.iter()) { + *out = x_i ^ y_i; + } +}