1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
use bit_set::BitSet;
use directory::WritePtr;
use std::io::Write;
use std::io;
use directory::ReadOnlySource;
use DocId;
use common::HasLen;
pub fn write_delete_bitset(delete_bitset: &BitSet, writer: &mut WritePtr) -> io::Result<()> {
let max_doc = delete_bitset.capacity();
let mut byte = 0u8;
let mut shift = 0u8;
for doc in 0..max_doc {
if delete_bitset.contains(doc) {
byte |= 1 << shift;
}
if shift == 7 {
writer.write_all(&[byte])?;
shift = 0;
byte = 0;
} else {
shift += 1;
}
}
if max_doc % 8 > 0 {
writer.write_all(&[byte])?;
}
writer.flush()
}
#[derive(Clone)]
pub struct DeleteBitSet {
data: ReadOnlySource,
len: usize,
}
impl DeleteBitSet {
pub fn open(data: ReadOnlySource) -> DeleteBitSet {
let num_deleted: usize = data.as_slice()
.iter()
.map(|b| b.count_ones() as usize)
.sum();
DeleteBitSet {
data,
len: num_deleted,
}
}
pub fn empty() -> DeleteBitSet {
DeleteBitSet {
data: ReadOnlySource::empty(),
len: 0,
}
}
pub fn has_deletes(&self) -> bool {
self.len() > 0
}
#[inline]
pub fn is_deleted(&self, doc: DocId) -> bool {
if self.len == 0 {
false
} else {
let byte_offset = doc / 8u32;
let b: u8 = (*self.data)[byte_offset as usize];
let shift = (doc & 7u32) as u8;
b & (1u8 << shift) != 0
}
}
}
impl HasLen for DeleteBitSet {
fn len(&self) -> usize {
self.len
}
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use bit_set::BitSet;
use directory::*;
use super::*;
fn test_delete_bitset_helper(bitset: &BitSet) {
let test_path = PathBuf::from("test");
let mut directory = RAMDirectory::create();
{
let mut writer = directory.open_write(&*test_path).unwrap();
write_delete_bitset(bitset, &mut writer).unwrap();
}
{
let source = directory.open_read(&test_path).unwrap();
let delete_bitset = DeleteBitSet::open(source);
let n = bitset.capacity();
for doc in 0..n {
assert_eq!(bitset.contains(doc), delete_bitset.is_deleted(doc as DocId));
}
assert_eq!(delete_bitset.len(), bitset.len());
}
}
#[test]
fn test_delete_bitset() {
{
let mut bitset = BitSet::with_capacity(10);
bitset.insert(1);
bitset.insert(9);
test_delete_bitset_helper(&bitset);
}
{
let mut bitset = BitSet::with_capacity(8);
bitset.insert(1);
bitset.insert(2);
bitset.insert(3);
bitset.insert(5);
bitset.insert(7);
test_delete_bitset_helper(&bitset);
}
}
}