Skip to content

eytzinger binary search for span lines #7097

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions sway-types/src/eytzinger.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// (Value, Original Index, Previous Value)
#[derive(Default)]
pub struct Eytzinger<T>(pub Vec<(T, usize, Option<T>)>);

fn eytzinger<T: Copy>(a: &[(T, usize, Option<T>)], b: &mut [(T, usize, Option<T>)], mut i: usize, k: usize) -> usize {
if k <= a.len() {
i = eytzinger(a, b, i, 2 * k);
b[k] = a[i];
i += 1;
i = eytzinger(a, b, i, 2 * k + 1);
}
i
}

impl<T: Copy + Default> From<&[T]> for Eytzinger<T> {
fn from(input: &[T]) -> Self {
let new_input = input.iter().copied()
.enumerate()
.zip(input.iter().enumerate().map(|x| {
if x.0 > 0 {
input.get(x.0 - 1).cloned()
} else {
None
}
}))
.map(|((idx, v), previous)| (v, idx, previous))
.collect::<Vec<_>>();

let mut result = vec![(T::default(), input.len(), input.last().copied()); input.len() + 1];
eytzinger(&new_input[..], &mut result[..], 0, 1);

Self(result)
}
}

impl<T: Copy + Ord> Eytzinger<T> {
pub fn get(&self, idx: usize) -> Option<&T> {
self.0.get(idx).map(|x| &x.0)
}

pub fn get_original_index(&self, idx: usize) -> Option<usize> {
self.0.get(idx).map(|x| x.1)
}

pub fn get_previous_value(&self, idx: usize) -> Option<&T> {
self.0.get(idx).and_then(|x| x.2.as_ref())
}

#[inline]
pub fn binary_search(&self, target: T) -> Result<usize, usize> {
if self.0.len() == 1 {
return Err(0);
}

let mut idx = 1;

while idx < self.0.len() {
#[cfg(target_arch = "x86_64")]
unsafe {
use std::arch::x86_64::*;
let prefetch = self.0.as_ptr().wrapping_offset(2 * idx as isize);
_mm_prefetch::<_MM_HINT_T0>(std::ptr::addr_of!(prefetch) as *const i8);
}
let current = &self.0[idx];
idx = 2 * idx + usize::from(current.0 < target);
}

idx >>= idx.trailing_ones() + 1;

if self.0[idx].0 == target {
Ok(idx)
} else {
Err(idx)
}
}
}

#[test]
fn ok_binary_search() {
let v = Eytzinger::from(vec![].as_slice());
assert_eq!(v.binary_search(0).unwrap_err(), 0);

let v = Eytzinger::from(vec![1].as_slice());
assert_eq!(v.get_original_index(v.binary_search(0).unwrap_err()).unwrap(), 0);
assert_eq!(v.get_original_index(v.binary_search(1).unwrap()).unwrap(), 0);
assert_eq!(v.get_original_index(v.binary_search(2).unwrap_err()).unwrap(), 1);

let v = Eytzinger::from(vec![1, 5, 10].as_slice());
assert_eq!(v.get_original_index(v.binary_search(0).unwrap_err()).unwrap(), 0);
assert_eq!(v.get_original_index(v.binary_search(1).unwrap()).unwrap(), 0);

assert_eq!(v.get_original_index(v.binary_search(2).unwrap_err()).unwrap(), 1);
assert_eq!(*v.get_previous_value(v.binary_search(2).unwrap_err()).unwrap(), 1);

assert_eq!(v.get_original_index(v.binary_search(3).unwrap_err()).unwrap(), 1);
assert_eq!(v.get_original_index(v.binary_search(4).unwrap_err()).unwrap(), 1);
assert_eq!(v.get_original_index(v.binary_search(5).unwrap()).unwrap(), 1);
assert_eq!(v.get_original_index(v.binary_search(6).unwrap_err()).unwrap(), 2);

assert_eq!(v.get_original_index(v.binary_search(7).unwrap_err()).unwrap(), 2);
assert_eq!(*v.get_previous_value(v.binary_search(7).unwrap_err()).unwrap(), 5);

assert_eq!(v.get_original_index(v.binary_search(8).unwrap_err()).unwrap(), 2);
assert_eq!(v.get_original_index(v.binary_search(9).unwrap_err()).unwrap(), 2);
assert_eq!(v.get_original_index(v.binary_search(10).unwrap()).unwrap(), 2);
assert_eq!(v.get_original_index(v.binary_search(11).unwrap_err()).unwrap(), 3);
assert_eq!(v.get_original_index(v.binary_search(12).unwrap_err()).unwrap(), 3);

let v = Eytzinger::from(vec![1, 5, 10, 13].as_slice());
assert_eq!(v.get_original_index(v.binary_search(0).unwrap_err()).unwrap(), 0);
assert_eq!(v.get_original_index(v.binary_search(1).unwrap()).unwrap(), 0);
assert_eq!(v.get_original_index(v.binary_search(2).unwrap_err()).unwrap(), 1);
assert_eq!(v.get_original_index(v.binary_search(3).unwrap_err()).unwrap(), 1);
assert_eq!(v.get_original_index(v.binary_search(4).unwrap_err()).unwrap(), 1);
assert_eq!(v.get_original_index(v.binary_search(5).unwrap()).unwrap(), 1);
assert_eq!(v.get_original_index(v.binary_search(6).unwrap_err()).unwrap(), 2);
assert_eq!(v.get_original_index(v.binary_search(7).unwrap_err()).unwrap(), 2);
assert_eq!(v.get_original_index(v.binary_search(8).unwrap_err()).unwrap(), 2);
assert_eq!(v.get_original_index(v.binary_search(9).unwrap_err()).unwrap(), 2);
assert_eq!(v.get_original_index(v.binary_search(10).unwrap()).unwrap(), 2);
assert_eq!(v.get_original_index(v.binary_search(11).unwrap_err()).unwrap(), 3);
assert_eq!(v.get_original_index(v.binary_search(12).unwrap_err()).unwrap(), 3);
assert_eq!(v.get_original_index(v.binary_search(13).unwrap()).unwrap(), 3);
assert_eq!(v.get_original_index(v.binary_search(14).unwrap_err()).unwrap(), 4);
}
2 changes: 2 additions & 0 deletions sway-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ pub mod style;

pub mod ast;

mod eytzinger;

pub type Id = [u8; Bytes32::LEN];
pub type Contract = [u8; ContractId::LEN];

Expand Down
24 changes: 14 additions & 10 deletions sway-types/src/span.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::eytzinger::Eytzinger;
use crate::SourceId;
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
Expand All @@ -12,7 +13,7 @@ lazy_static! {
static ref DUMMY_SPAN: Span = Span::new(
Source {
text: Arc::from(""),
line_starts: Arc::new(vec![])
line_starts: Arc::new(<_>::default())
},
0,
0,
Expand All @@ -28,7 +29,7 @@ lazy_static! {
pub struct Source {
pub text: Arc<str>,
#[serde(skip)]
pub line_starts: Arc<Vec<usize>>,
pub line_starts: Arc<Eytzinger<usize>>,
}

impl serde::Serialize for Source {
Expand All @@ -49,15 +50,15 @@ impl<'de> serde::Deserialize<'de> for Source {
}

impl Source {
fn calc_line_starts(text: &str) -> Arc<Vec<usize>> {
fn calc_line_starts(text: &str) -> Arc<Eytzinger<usize>> {
let mut lines_starts = Vec::with_capacity(text.len() / 80);
lines_starts.push(0);
for (idx, c) in text.char_indices() {
if c == '\n' {
lines_starts.push(idx + c.len_utf8())
}
}
Arc::new(lines_starts)
Arc::new(lines_starts.as_slice().into())
}

pub fn new(text: &str) -> Self {
Expand All @@ -72,14 +73,17 @@ impl Source {
if position > self.text.len() || self.text.is_empty() {
LineCol { line: 0, col: 0 }
} else {
let (line, line_start) = match self.line_starts.binary_search(&position) {
Ok(line) => (line, self.line_starts.get(line)),
let (line, line_start) = match self.line_starts.binary_search(position) {
Ok(idx) => (self.line_starts.get_original_index(idx).unwrap(), self.line_starts.get(idx)),
Err(0) => (0, None),
Err(line) => (line - 1, self.line_starts.get(line - 1)),
Err(idx) => (self.line_starts.get_original_index(idx).unwrap() - 1, self.line_starts.get_previous_value(idx)),
};
line_start.map_or(LineCol { line: 0, col: 0 }, |line_start| LineCol {
line,
col: position - line_start,

line_start.map_or(LineCol { line: 0, col: 0 }, |line_start| {
LineCol {
line,
col: position - line_start,
}
})
}
}
Expand Down
Loading