1
1
//! Wavelet Matrix representation of symbol sequence.
2
2
3
- use std:: { iter:: FusedIterator , io } ;
3
+ use std:: { io , iter:: FusedIterator , ops :: { Deref , DerefMut } } ;
4
4
5
5
use binout:: { AsIs , Serializer } ;
6
6
use bitm:: { BitAccess , BitVec , RankSelect101111 , CombinedSampling , Rank , Select , Select0 , SelectForRank101111 , Select0ForRank101111 , bits_to_store, ceiling_div} ;
@@ -16,24 +16,24 @@ use dyn_size_of::GetSize;
16
16
/// - `upper_index` is index of `upper_bit`,
17
17
/// - `lower_zero_index` is index of `lower_bits` to insert next item with 0 msb,
18
18
/// - `lower_one_index` is index of `lower_bits` to insert next item with 1 msb,
19
- struct LevelBuilder {
20
- upper_bit : Box < [ u64 ] > ,
19
+ struct LevelBuilder < BV > {
20
+ upper_bit : BV ,
21
21
upper_index : usize ,
22
- lower_bits : Box < [ u64 ] > ,
22
+ lower_bits : BV ,
23
23
lower_zero_index : usize ,
24
24
lower_one_index : usize ,
25
25
upper_bit_mask : u64 ,
26
26
bits_per_item : u8
27
27
}
28
28
29
- impl LevelBuilder {
29
+ impl < BV : DerefMut < Target = [ u64 ] > + BitVec > LevelBuilder < BV > {
30
30
/// Construct level builder for given level `total_len` in bits, `number_of_zeros` among the most significant bits
31
31
/// and index of most significant bit (`index_of_bit_to_extract`).
32
32
fn new ( number_of_zeros : usize , total_len : usize , index_of_bit_to_extract : u8 ) -> Self {
33
33
Self {
34
- upper_bit : Box :: with_zeroed_bits ( total_len + 1 ) , // we add one bit to ensure that rank(len) will work
34
+ upper_bit : BV :: with_zeroed_bits ( total_len + 1 ) , // we add one bit to ensure that rank(len) will work
35
35
upper_index : 0 ,
36
- lower_bits : Box :: with_zeroed_bits ( total_len * index_of_bit_to_extract as usize + 1 ) , // we add one bit to ensure that rank(len) will work
36
+ lower_bits : BV :: with_zeroed_bits ( total_len * index_of_bit_to_extract as usize + 1 ) , // we add one bit to ensure that rank(len) will work
37
37
lower_zero_index : 0 ,
38
38
lower_one_index : number_of_zeros * index_of_bit_to_extract as usize ,
39
39
upper_bit_mask : 1 <<index_of_bit_to_extract,
@@ -52,36 +52,36 @@ impl LevelBuilder {
52
52
}
53
53
54
54
/// Level of the we wavelet matrix.
55
- struct Level < S = CombinedSampling > {
55
+ struct Level < S = CombinedSampling , BV = Box < [ u64 ] > > {
56
56
/// Level content as bit vector with support for rank and select queries.
57
- content : RankSelect101111 :: < S , S > ,
57
+ content : RankSelect101111 :: < S , S , BV > ,
58
58
59
59
/// Number of zero bits in content.
60
60
number_of_zeros : usize
61
61
}
62
62
63
- impl < S > GetSize for Level < S > where RankSelect101111 < S , S > : GetSize {
63
+ impl < S , BV > GetSize for Level < S , BV > where RankSelect101111 < S , S , BV > : GetSize {
64
64
fn size_bytes_dyn ( & self ) -> usize { self . content . size_bytes_dyn ( ) }
65
65
const USES_DYN_MEM : bool = true ;
66
66
}
67
67
68
- impl < S > Level < S > where RankSelect101111 < S , S > : From < Box < [ u64 ] > > {
68
+ impl < S , BV > Level < S , BV > where RankSelect101111 < S , S , BV > : From < BV > {
69
69
/// Constructs level with given `content` that contain given number of zero bits.
70
- fn new ( content : Box :: < [ u64 ] > , number_of_zeros : usize ) -> Self {
70
+ # [ inline ] fn new ( content : BV , number_of_zeros : usize ) -> Self {
71
71
//let (bits, number_of_ones) = ArrayWithRank::build(level);
72
72
//Self { bits, zeros: level_len - number_of_ones }
73
73
Self { content : content. into ( ) , number_of_zeros }
74
74
}
75
75
}
76
76
77
- impl < S > Level < S > where S : SelectForRank101111 {
78
- fn try_select ( & self , rank : usize , len : usize ) -> Option < usize > {
77
+ impl < S , BV > Level < S , BV > where S : SelectForRank101111 , BV : Deref < Target = [ u64 ] > {
78
+ # [ inline ] fn try_select ( & self , rank : usize , len : usize ) -> Option < usize > {
79
79
self . content . try_select ( rank) . filter ( |i| * i < len)
80
80
}
81
81
}
82
82
83
- impl < S > Level < S > where S : Select0ForRank101111 {
84
- fn try_select0 ( & self , rank : usize , len : usize ) -> Option < usize > {
83
+ impl < S , BV > Level < S , BV > where S : Select0ForRank101111 , BV : Deref < Target = [ u64 ] > {
84
+ # [ inline ] fn try_select0 ( & self , rank : usize , len : usize ) -> Option < usize > {
85
85
self . content . try_select0 ( rank) . filter ( |i| * i < len)
86
86
}
87
87
}
@@ -93,7 +93,7 @@ impl<S> Level<S> where S: Select0ForRank101111 {
93
93
/// - *select* - see [`Self::select`],
94
94
/// - *rank* - see [`Self::rank`].
95
95
///
96
- /// By default [`bitm::CombinedSampling`] is used as a select strategy for internal bit vectors
96
+ /// By default [`bitm::CombinedSampling`] is used as a select strategy `S` for internal bit vectors
97
97
/// (see [`bitm::RankSelect101111`]), but this can be changed to [`bitm::BinaryRankSearch`]
98
98
/// to save a bit of space (about 0.78%) at the cost of slower *select* queries.
99
99
///
@@ -107,12 +107,12 @@ impl<S> Level<S> where S: Select0ForRank101111 {
107
107
/// Additionally, our implementation draws some ideas (like elimination of recursion)
108
108
/// from the Go implementation by Daisuke Okanohara,
109
109
/// available at <https://github.com/hillbig/waveletTree/>.
110
- pub struct Sequence < S = CombinedSampling > {
111
- levels : Box < [ Level < S > ] > ,
110
+ pub struct Sequence < S = CombinedSampling , BV = Box < [ u64 ] > > {
111
+ levels : Box < [ Level < S , BV > ] > ,
112
112
len : usize
113
113
}
114
114
115
- impl < S > Sequence < S > {
115
+ impl < S , BV > Sequence < S , BV > {
116
116
/// Returns number of stored items.
117
117
#[ inline] pub fn len ( & self ) -> usize { self . len }
118
118
@@ -151,7 +151,7 @@ impl Sequence<CombinedSampling> {
151
151
}
152
152
}
153
153
154
- impl < S > Sequence < S > where S : SelectForRank101111 +Select0ForRank101111 {
154
+ impl < S , BV > Sequence < S , BV > where S : SelectForRank101111 +Select0ForRank101111 , BV : BitVec + DerefMut < Target = [ u64 ] > {
155
155
156
156
/// Constructs [`Sequence`] with `content_len` `bits_per_item`-bit
157
157
/// items exposed by iterator returned by `content` function,
@@ -162,7 +162,7 @@ impl<S> Sequence<S> where S: SelectForRank101111+Select0ForRank101111 {
162
162
assert ! ( bits_per_item > 0 && bits_per_item <= 63 ) ;
163
163
let mut levels = Vec :: with_capacity ( bits_per_item as usize ) ;
164
164
if bits_per_item == 1 {
165
- let mut level = Box :: with_zeroed_bits ( content_len+1 ) ;
165
+ let mut level = BV :: with_zeroed_bits ( content_len+1 ) ;
166
166
for ( i, e) in content ( ) . into_iter ( ) . enumerate ( ) {
167
167
level. init_bit ( i, e != 0 ) ;
168
168
}
@@ -223,6 +223,9 @@ impl<S> Sequence<S> where S: SelectForRank101111+Select0ForRank101111 {
223
223
|| { ( 0 ..content_len) . map ( |index| content. get_fragment ( index, bits_per_item) ) } ,
224
224
content_len, bits_per_item)
225
225
}
226
+ }
227
+
228
+ impl < S , BV > Sequence < S , BV > where S : SelectForRank101111 +Select0ForRank101111 , BV : BitVec +Deref < Target = [ u64 ] > {
226
229
227
230
/// Returns an item with given `index`. The result is undefined if `index` is out of bounds.
228
231
pub unsafe fn get_unchecked ( & self , mut index : usize ) -> u64 {
@@ -319,6 +322,9 @@ impl<S> Sequence<S> where S: SelectForRank101111+Select0ForRank101111 {
319
322
pub fn iter ( & self ) -> impl Iterator < Item = u64 > + DoubleEndedIterator + FusedIterator + ' _ {
320
323
( 0 ..self . len ( ) ) . map ( |i| unsafe { self . get_unchecked ( i) } )
321
324
}
325
+ }
326
+
327
+ impl < S , BV > Sequence < S , BV > where S : SelectForRank101111 +Select0ForRank101111 , BV : BitVec +Deref < Target = [ u64 ] > +FromIterator < u64 > {
322
328
323
329
/// Reads `self` from the `input`.
324
330
///
@@ -329,19 +335,20 @@ impl<S> Sequence<S> where S: SelectForRank101111+Select0ForRank101111 {
329
335
let mut levels = Vec :: with_capacity ( bits_per_item as usize ) ;
330
336
for _ in 0 ..bits_per_item {
331
337
let number_of_zeros = AsIs :: read ( input) ?;
332
- let content = AsIs :: read_n ( input, ceiling_div ( len+1 , 64 ) ) ?;
333
- levels. push ( Level :: < S > :: new ( content, number_of_zeros) )
338
+ //let content = AsIs::read_n(input, ceiling_div(len+1, 64))?;
339
+ let content = <AsIs as Serializer < u64 > >:: read_n_iter ( input, ceiling_div ( len+1 , 64 ) ) . collect :: <io:: Result :: < BV > >( ) ?;
340
+ levels. push ( Level :: < S , BV > :: new ( content, number_of_zeros) )
334
341
}
335
342
Ok ( Self { levels : levels. into_boxed_slice ( ) , len } )
336
343
}
337
344
}
338
345
339
- impl < S > GetSize for Sequence < S > where RankSelect101111 < S , S > : GetSize {
346
+ impl < S , BV > GetSize for Sequence < S , BV > where RankSelect101111 < S , S , BV > : GetSize {
340
347
fn size_bytes_dyn ( & self ) -> usize { self . levels . size_bytes_dyn ( ) }
341
348
const USES_DYN_MEM : bool = true ;
342
349
}
343
350
344
- impl < S > Sequence < S > {
351
+ impl < S , BV > Sequence < S , BV > where BV : Deref < Target = [ u64 ] > {
345
352
/// Returns number of bytes which `write` will write.
346
353
pub fn write_bytes ( & self ) -> usize {
347
354
AsIs :: size ( self . len ) +
0 commit comments