From 65ec527a0a0d0a65b873ee17e07a12de470a9690 Mon Sep 17 00:00:00 2001 From: xunilrj Date: Tue, 22 Apr 2025 11:04:21 -0300 Subject: [PATCH 1/4] eytzinger binary search for span lines --- sway-types/src/eytzinger.rs | 81 +++++++++++++++++++++++++++++++++++++ sway-types/src/lib.rs | 2 + sway-types/src/span.rs | 17 ++++---- 3 files changed, 92 insertions(+), 8 deletions(-) create mode 100644 sway-types/src/eytzinger.rs diff --git a/sway-types/src/eytzinger.rs b/sway-types/src/eytzinger.rs new file mode 100644 index 00000000000..00752f61872 --- /dev/null +++ b/sway-types/src/eytzinger.rs @@ -0,0 +1,81 @@ +#[derive(Default)] +pub struct Eytzinger(Vec, Vec); + +#[derive(Debug)] +pub struct Index { + pub index: usize, + pub original: usize, +} + +fn eytzinger(a: &[T], b: &mut [T], mut i: usize, k: usize) -> usize { + if k <= a.len() { + i = eytzinger(a, b, i, 2 * k); + b[k] = a[i]; + i += 1; + i = eytzinger(a, b, i, 2 * k + 1); + } + i +} + +impl From<&[T]> for Eytzinger { + fn from(input: &[T]) -> Self { + let mut result = vec![T::default(); input.len() + 1]; + eytzinger(&input[..], &mut result[..], 0, 1); + let original = (0..input.len()).into_iter().collect::>(); + let mut order = vec![input.len(); input.len() + 1]; + eytzinger(&original[..], &mut order[..], 0, 1); + Self(result, order) + } +} + +impl Eytzinger { + pub fn get(&self, idx: usize) -> Option<&T> { + self.0.get(idx) + } + + #[inline] + pub fn binary_search(&self, target: T) -> Result { + let mut idx = 1; + + while idx < self.0.len() { + #[cfg(target_arch = "x86_64")] + unsafe { + use std::arch::x86_64::*; + let prefetch = self.0.as_ptr().wrapping_offset(2 * idx as isize); + _mm_prefetch::<_MM_HINT_T0>(std::ptr::addr_of!(prefetch) as *const i8); + } + let current = self.0[idx]; + idx = 2 * idx + usize::from(current < target); + } + + idx >>= idx.trailing_ones() + 1; + + let r = Index { + index: idx, + original: self.1[idx], + }; + + if self.0[idx] == target { + Ok(r) + } else { + Err(r) + } + } +} + +#[test] +fn ok_binary_search() { + let v = Eytzinger::from(vec![1, 5, 10].as_slice()); + assert_eq!(v.binary_search(1).unwrap().original, 0); + assert_eq!(v.binary_search(5).unwrap().original, 1); + assert_eq!(v.binary_search(10).unwrap().original, 2); + + assert_eq!(v.binary_search(0).unwrap_err().original, 0); + assert_eq!(v.binary_search(2).unwrap_err().original, 1); + + assert_eq!(v.binary_search(4).unwrap_err().original, 1); + assert_eq!(v.binary_search(6).unwrap_err().original, 2); + + assert_eq!(v.binary_search(9).unwrap_err().original, 2); + assert_eq!(v.binary_search(11).unwrap_err().original, 3); +} diff --git a/sway-types/src/lib.rs b/sway-types/src/lib.rs index b45471dcba2..4137e4df6fc 100644 --- a/sway-types/src/lib.rs +++ b/sway-types/src/lib.rs @@ -24,6 +24,8 @@ pub mod style; pub mod ast; +mod eytzinger; + pub type Id = [u8; Bytes32::LEN]; pub type Contract = [u8; ContractId::LEN]; diff --git a/sway-types/src/span.rs b/sway-types/src/span.rs index 9c5b526bcc3..0758b8046ca 100644 --- a/sway-types/src/span.rs +++ b/sway-types/src/span.rs @@ -1,3 +1,4 @@ +use crate::eytzinger::{Eytzinger, Index}; use crate::SourceId; use lazy_static::lazy_static; use serde::{Deserialize, Serialize}; @@ -12,7 +13,7 @@ lazy_static! { static ref DUMMY_SPAN: Span = Span::new( Source { text: Arc::from(""), - line_starts: Arc::new(vec![]) + line_starts: Arc::new(<_>::default()) }, 0, 0, @@ -28,7 +29,7 @@ lazy_static! { pub struct Source { pub text: Arc, #[serde(skip)] - pub line_starts: Arc>, + pub line_starts: Arc>, } impl serde::Serialize for Source { @@ -49,7 +50,7 @@ impl<'de> serde::Deserialize<'de> for Source { } impl Source { - fn calc_line_starts(text: &str) -> Arc> { + fn calc_line_starts(text: &str) -> Arc> { let mut lines_starts = Vec::with_capacity(text.len() / 80); lines_starts.push(0); for (idx, c) in text.char_indices() { @@ -57,7 +58,7 @@ impl Source { lines_starts.push(idx + c.len_utf8()) } } - Arc::new(lines_starts) + Arc::new(lines_starts.as_slice().into()) } pub fn new(text: &str) -> Self { @@ -72,10 +73,10 @@ impl Source { if position > self.text.len() || self.text.is_empty() { LineCol { line: 0, col: 0 } } else { - let (line, line_start) = match self.line_starts.binary_search(&position) { - Ok(line) => (line, self.line_starts.get(line)), - Err(0) => (0, None), - Err(line) => (line - 1, self.line_starts.get(line - 1)), + let (line, line_start) = match self.line_starts.binary_search(position) { + Ok(Index { original: line, .. }) => (line, self.line_starts.get(line)), + Err(Index { original: 0, .. }) => (0, None), + Err(Index { original: line, .. }) => (line - 1, self.line_starts.get(line - 1)), }; line_start.map_or(LineCol { line: 0, col: 0 }, |line_start| LineCol { line, From 82f49d1ed21c8aac2278d5640e35e84e32f4ee45 Mon Sep 17 00:00:00 2001 From: xunilrj Date: Tue, 22 Apr 2025 13:30:46 -0300 Subject: [PATCH 2/4] clippy and fmt issues --- sway-types/src/eytzinger.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sway-types/src/eytzinger.rs b/sway-types/src/eytzinger.rs index 00752f61872..dd179ddae17 100644 --- a/sway-types/src/eytzinger.rs +++ b/sway-types/src/eytzinger.rs @@ -20,8 +20,8 @@ fn eytzinger(a: &[T], b: &mut [T], mut i: usize, k: usize) -> usize { impl From<&[T]> for Eytzinger { fn from(input: &[T]) -> Self { let mut result = vec![T::default(); input.len() + 1]; - eytzinger(&input[..], &mut result[..], 0, 1); - let original = (0..input.len()).into_iter().collect::>(); + eytzinger(input, &mut result[..], 0, 1); + let original = (0..input.len()).collect::>(); let mut order = vec![input.len(); input.len() + 1]; eytzinger(&original[..], &mut order[..], 0, 1); Self(result, order) From 3a9a8e9b46c36a665d7311cbb95bc0a9412e1670 Mon Sep 17 00:00:00 2001 From: xunilrj Date: Tue, 22 Apr 2025 14:30:25 -0300 Subject: [PATCH 3/4] deal with empty arrays --- sway-types/src/eytzinger.rs | 37 +++++++++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/sway-types/src/eytzinger.rs b/sway-types/src/eytzinger.rs index dd179ddae17..94887272229 100644 --- a/sway-types/src/eytzinger.rs +++ b/sway-types/src/eytzinger.rs @@ -35,6 +35,13 @@ impl Eytzinger { #[inline] pub fn binary_search(&self, target: T) -> Result { + if self.1.len() == 1 { + return Err(Index { + index: 0, + original: 0, + }); + } + let mut idx = 1; while idx < self.0.len() { @@ -65,17 +72,43 @@ impl Eytzinger { #[test] fn ok_binary_search() { + let v = Eytzinger::from(vec![].as_slice()); + assert_eq!(v.binary_search(0).unwrap_err().original, 0); + + let v = Eytzinger::from(vec![1].as_slice()); + assert_eq!(v.binary_search(0).unwrap_err().original, 0); + assert_eq!(v.binary_search(1).unwrap().original, 0); + assert_eq!(v.binary_search(2).unwrap_err().original, 1); + let v = Eytzinger::from(vec![1, 5, 10].as_slice()); + assert_eq!(v.binary_search(0).unwrap_err().original, 0); assert_eq!(v.binary_search(1).unwrap().original, 0); + assert_eq!(v.binary_search(2).unwrap_err().original, 1); + assert_eq!(v.binary_search(3).unwrap_err().original, 1); + assert_eq!(v.binary_search(4).unwrap_err().original, 1); assert_eq!(v.binary_search(5).unwrap().original, 1); + assert_eq!(v.binary_search(6).unwrap_err().original, 2); + assert_eq!(v.binary_search(7).unwrap_err().original, 2); + assert_eq!(v.binary_search(8).unwrap_err().original, 2); + assert_eq!(v.binary_search(9).unwrap_err().original, 2); assert_eq!(v.binary_search(10).unwrap().original, 2); + assert_eq!(v.binary_search(11).unwrap_err().original, 3); + assert_eq!(v.binary_search(12).unwrap_err().original, 3); + let v = Eytzinger::from(vec![1, 5, 10, 13].as_slice()); assert_eq!(v.binary_search(0).unwrap_err().original, 0); + assert_eq!(v.binary_search(1).unwrap().original, 0); assert_eq!(v.binary_search(2).unwrap_err().original, 1); - + assert_eq!(v.binary_search(3).unwrap_err().original, 1); assert_eq!(v.binary_search(4).unwrap_err().original, 1); + assert_eq!(v.binary_search(5).unwrap().original, 1); assert_eq!(v.binary_search(6).unwrap_err().original, 2); - + assert_eq!(v.binary_search(7).unwrap_err().original, 2); + assert_eq!(v.binary_search(8).unwrap_err().original, 2); assert_eq!(v.binary_search(9).unwrap_err().original, 2); + assert_eq!(v.binary_search(10).unwrap().original, 2); assert_eq!(v.binary_search(11).unwrap_err().original, 3); + assert_eq!(v.binary_search(12).unwrap_err().original, 3); + assert_eq!(v.binary_search(13).unwrap().original, 3); + assert_eq!(v.binary_search(14).unwrap_err().original, 4); } From dc83d15ee8e575704279d7142fbe1e1aa8f72a93 Mon Sep 17 00:00:00 2001 From: xunilrj Date: Tue, 22 Apr 2025 17:54:58 -0300 Subject: [PATCH 4/4] easy way to search eytzinger previous item --- sway-types/src/eytzinger.rs | 137 +++++++++++++++++++----------------- sway-types/src/span.rs | 17 +++-- 2 files changed, 84 insertions(+), 70 deletions(-) diff --git a/sway-types/src/eytzinger.rs b/sway-types/src/eytzinger.rs index 94887272229..98052cc1343 100644 --- a/sway-types/src/eytzinger.rs +++ b/sway-types/src/eytzinger.rs @@ -1,13 +1,8 @@ +// (Value, Original Index, Previous Value) #[derive(Default)] -pub struct Eytzinger(Vec, Vec); +pub struct Eytzinger(pub Vec<(T, usize, Option)>); -#[derive(Debug)] -pub struct Index { - pub index: usize, - pub original: usize, -} - -fn eytzinger(a: &[T], b: &mut [T], mut i: usize, k: usize) -> usize { +fn eytzinger(a: &[(T, usize, Option)], b: &mut [(T, usize, Option)], mut i: usize, k: usize) -> usize { if k <= a.len() { i = eytzinger(a, b, i, 2 * k); b[k] = a[i]; @@ -19,27 +14,42 @@ fn eytzinger(a: &[T], b: &mut [T], mut i: usize, k: usize) -> usize { impl From<&[T]> for Eytzinger { fn from(input: &[T]) -> Self { - let mut result = vec![T::default(); input.len() + 1]; - eytzinger(input, &mut result[..], 0, 1); - let original = (0..input.len()).collect::>(); - let mut order = vec![input.len(); input.len() + 1]; - eytzinger(&original[..], &mut order[..], 0, 1); - Self(result, order) + let new_input = input.iter().copied() + .enumerate() + .zip(input.iter().enumerate().map(|x| { + if x.0 > 0 { + input.get(x.0 - 1).cloned() + } else { + None + } + })) + .map(|((idx, v), previous)| (v, idx, previous)) + .collect::>(); + + let mut result = vec![(T::default(), input.len(), input.last().copied()); input.len() + 1]; + eytzinger(&new_input[..], &mut result[..], 0, 1); + + Self(result) } } impl Eytzinger { pub fn get(&self, idx: usize) -> Option<&T> { - self.0.get(idx) + self.0.get(idx).map(|x| &x.0) + } + + pub fn get_original_index(&self, idx: usize) -> Option { + self.0.get(idx).map(|x| x.1) + } + + pub fn get_previous_value(&self, idx: usize) -> Option<&T> { + self.0.get(idx).and_then(|x| x.2.as_ref()) } #[inline] - pub fn binary_search(&self, target: T) -> Result { - if self.1.len() == 1 { - return Err(Index { - index: 0, - original: 0, - }); + pub fn binary_search(&self, target: T) -> Result { + if self.0.len() == 1 { + return Err(0); } let mut idx = 1; @@ -51,21 +61,16 @@ impl Eytzinger { let prefetch = self.0.as_ptr().wrapping_offset(2 * idx as isize); _mm_prefetch::<_MM_HINT_T0>(std::ptr::addr_of!(prefetch) as *const i8); } - let current = self.0[idx]; - idx = 2 * idx + usize::from(current < target); + let current = &self.0[idx]; + idx = 2 * idx + usize::from(current.0 < target); } idx >>= idx.trailing_ones() + 1; - let r = Index { - index: idx, - original: self.1[idx], - }; - - if self.0[idx] == target { - Ok(r) + if self.0[idx].0 == target { + Ok(idx) } else { - Err(r) + Err(idx) } } } @@ -73,42 +78,48 @@ impl Eytzinger { #[test] fn ok_binary_search() { let v = Eytzinger::from(vec![].as_slice()); - assert_eq!(v.binary_search(0).unwrap_err().original, 0); + assert_eq!(v.binary_search(0).unwrap_err(), 0); let v = Eytzinger::from(vec![1].as_slice()); - assert_eq!(v.binary_search(0).unwrap_err().original, 0); - assert_eq!(v.binary_search(1).unwrap().original, 0); - assert_eq!(v.binary_search(2).unwrap_err().original, 1); + assert_eq!(v.get_original_index(v.binary_search(0).unwrap_err()).unwrap(), 0); + assert_eq!(v.get_original_index(v.binary_search(1).unwrap()).unwrap(), 0); + assert_eq!(v.get_original_index(v.binary_search(2).unwrap_err()).unwrap(), 1); let v = Eytzinger::from(vec![1, 5, 10].as_slice()); - assert_eq!(v.binary_search(0).unwrap_err().original, 0); - assert_eq!(v.binary_search(1).unwrap().original, 0); - assert_eq!(v.binary_search(2).unwrap_err().original, 1); - assert_eq!(v.binary_search(3).unwrap_err().original, 1); - assert_eq!(v.binary_search(4).unwrap_err().original, 1); - assert_eq!(v.binary_search(5).unwrap().original, 1); - assert_eq!(v.binary_search(6).unwrap_err().original, 2); - assert_eq!(v.binary_search(7).unwrap_err().original, 2); - assert_eq!(v.binary_search(8).unwrap_err().original, 2); - assert_eq!(v.binary_search(9).unwrap_err().original, 2); - assert_eq!(v.binary_search(10).unwrap().original, 2); - assert_eq!(v.binary_search(11).unwrap_err().original, 3); - assert_eq!(v.binary_search(12).unwrap_err().original, 3); + assert_eq!(v.get_original_index(v.binary_search(0).unwrap_err()).unwrap(), 0); + assert_eq!(v.get_original_index(v.binary_search(1).unwrap()).unwrap(), 0); + + assert_eq!(v.get_original_index(v.binary_search(2).unwrap_err()).unwrap(), 1); + assert_eq!(*v.get_previous_value(v.binary_search(2).unwrap_err()).unwrap(), 1); + + assert_eq!(v.get_original_index(v.binary_search(3).unwrap_err()).unwrap(), 1); + assert_eq!(v.get_original_index(v.binary_search(4).unwrap_err()).unwrap(), 1); + assert_eq!(v.get_original_index(v.binary_search(5).unwrap()).unwrap(), 1); + assert_eq!(v.get_original_index(v.binary_search(6).unwrap_err()).unwrap(), 2); + + assert_eq!(v.get_original_index(v.binary_search(7).unwrap_err()).unwrap(), 2); + assert_eq!(*v.get_previous_value(v.binary_search(7).unwrap_err()).unwrap(), 5); + + assert_eq!(v.get_original_index(v.binary_search(8).unwrap_err()).unwrap(), 2); + assert_eq!(v.get_original_index(v.binary_search(9).unwrap_err()).unwrap(), 2); + assert_eq!(v.get_original_index(v.binary_search(10).unwrap()).unwrap(), 2); + assert_eq!(v.get_original_index(v.binary_search(11).unwrap_err()).unwrap(), 3); + assert_eq!(v.get_original_index(v.binary_search(12).unwrap_err()).unwrap(), 3); let v = Eytzinger::from(vec![1, 5, 10, 13].as_slice()); - assert_eq!(v.binary_search(0).unwrap_err().original, 0); - assert_eq!(v.binary_search(1).unwrap().original, 0); - assert_eq!(v.binary_search(2).unwrap_err().original, 1); - assert_eq!(v.binary_search(3).unwrap_err().original, 1); - assert_eq!(v.binary_search(4).unwrap_err().original, 1); - assert_eq!(v.binary_search(5).unwrap().original, 1); - assert_eq!(v.binary_search(6).unwrap_err().original, 2); - assert_eq!(v.binary_search(7).unwrap_err().original, 2); - assert_eq!(v.binary_search(8).unwrap_err().original, 2); - assert_eq!(v.binary_search(9).unwrap_err().original, 2); - assert_eq!(v.binary_search(10).unwrap().original, 2); - assert_eq!(v.binary_search(11).unwrap_err().original, 3); - assert_eq!(v.binary_search(12).unwrap_err().original, 3); - assert_eq!(v.binary_search(13).unwrap().original, 3); - assert_eq!(v.binary_search(14).unwrap_err().original, 4); + assert_eq!(v.get_original_index(v.binary_search(0).unwrap_err()).unwrap(), 0); + assert_eq!(v.get_original_index(v.binary_search(1).unwrap()).unwrap(), 0); + assert_eq!(v.get_original_index(v.binary_search(2).unwrap_err()).unwrap(), 1); + assert_eq!(v.get_original_index(v.binary_search(3).unwrap_err()).unwrap(), 1); + assert_eq!(v.get_original_index(v.binary_search(4).unwrap_err()).unwrap(), 1); + assert_eq!(v.get_original_index(v.binary_search(5).unwrap()).unwrap(), 1); + assert_eq!(v.get_original_index(v.binary_search(6).unwrap_err()).unwrap(), 2); + assert_eq!(v.get_original_index(v.binary_search(7).unwrap_err()).unwrap(), 2); + assert_eq!(v.get_original_index(v.binary_search(8).unwrap_err()).unwrap(), 2); + assert_eq!(v.get_original_index(v.binary_search(9).unwrap_err()).unwrap(), 2); + assert_eq!(v.get_original_index(v.binary_search(10).unwrap()).unwrap(), 2); + assert_eq!(v.get_original_index(v.binary_search(11).unwrap_err()).unwrap(), 3); + assert_eq!(v.get_original_index(v.binary_search(12).unwrap_err()).unwrap(), 3); + assert_eq!(v.get_original_index(v.binary_search(13).unwrap()).unwrap(), 3); + assert_eq!(v.get_original_index(v.binary_search(14).unwrap_err()).unwrap(), 4); } diff --git a/sway-types/src/span.rs b/sway-types/src/span.rs index 0758b8046ca..05faaf3be3b 100644 --- a/sway-types/src/span.rs +++ b/sway-types/src/span.rs @@ -1,4 +1,4 @@ -use crate::eytzinger::{Eytzinger, Index}; +use crate::eytzinger::Eytzinger; use crate::SourceId; use lazy_static::lazy_static; use serde::{Deserialize, Serialize}; @@ -74,13 +74,16 @@ impl Source { LineCol { line: 0, col: 0 } } else { let (line, line_start) = match self.line_starts.binary_search(position) { - Ok(Index { original: line, .. }) => (line, self.line_starts.get(line)), - Err(Index { original: 0, .. }) => (0, None), - Err(Index { original: line, .. }) => (line - 1, self.line_starts.get(line - 1)), + Ok(idx) => (self.line_starts.get_original_index(idx).unwrap(), self.line_starts.get(idx)), + Err(0) => (0, None), + Err(idx) => (self.line_starts.get_original_index(idx).unwrap() - 1, self.line_starts.get_previous_value(idx)), }; - line_start.map_or(LineCol { line: 0, col: 0 }, |line_start| LineCol { - line, - col: position - line_start, + + line_start.map_or(LineCol { line: 0, col: 0 }, |line_start| { + LineCol { + line, + col: position - line_start, + } }) } }