1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
use directory::{ReadOnlySource, SourceRead};
use termdict::{TermDictionary, TermDictionaryImpl};
use postings::{BlockSegmentPostings, SegmentPostings};
use postings::TermInfo;
use schema::IndexRecordOption;
use schema::Term;
use std::cmp;
use fastfield::DeleteBitSet;
use schema::Schema;
use compression::CompressedIntStream;
pub struct InvertedIndexReader {
termdict: TermDictionaryImpl,
postings_source: ReadOnlySource,
positions_source: ReadOnlySource,
delete_bitset: DeleteBitSet,
schema: Schema,
}
impl InvertedIndexReader {
pub(crate) fn new(
termdict_source: ReadOnlySource,
postings_source: ReadOnlySource,
positions_source: ReadOnlySource,
delete_bitset: DeleteBitSet,
schema: Schema,
) -> InvertedIndexReader {
InvertedIndexReader {
termdict: TermDictionaryImpl::from_source(termdict_source),
postings_source,
positions_source,
delete_bitset,
schema,
}
}
pub fn get_term_info(&self, term: &Term) -> Option<TermInfo> {
self.termdict.get(term.value_bytes())
}
pub fn terms(&self) -> &TermDictionaryImpl {
&self.termdict
}
pub fn reset_block_postings_from_terminfo(
&self,
term_info: &TermInfo,
block_postings: &mut BlockSegmentPostings,
) {
let offset = term_info.postings_offset as usize;
let end_source = self.postings_source.len();
let postings_slice = self.postings_source.slice(offset, end_source);
let postings_reader = SourceRead::from(postings_slice);
block_postings.reset(term_info.doc_freq as usize, postings_reader);
}
pub fn read_block_postings_from_terminfo(
&self,
term_info: &TermInfo,
option: IndexRecordOption,
) -> BlockSegmentPostings {
let offset = term_info.postings_offset as usize;
let postings_data = self.postings_source.slice_from(offset);
let has_freq = option.has_freq();
BlockSegmentPostings::from_data(
term_info.doc_freq as usize,
SourceRead::from(postings_data),
has_freq,
)
}
pub fn read_postings_from_terminfo(
&self,
term_info: &TermInfo,
option: IndexRecordOption,
) -> SegmentPostings {
let block_postings = self.read_block_postings_from_terminfo(term_info, option);
let delete_bitset = self.delete_bitset.clone();
let position_stream = {
if option.has_positions() {
let position_offset = term_info.positions_offset;
let positions_source = self.positions_source.slice_from(position_offset as usize);
let mut stream = CompressedIntStream::wrap(positions_source);
stream.skip(term_info.positions_inner_offset as usize);
Some(stream)
} else {
None
}
};
SegmentPostings::from_block_postings(block_postings, delete_bitset, position_stream)
}
pub fn read_postings(&self, term: &Term, option: IndexRecordOption) -> Option<SegmentPostings> {
let field = term.field();
let field_entry = self.schema.get_field_entry(field);
let term_info = get!(self.get_term_info(term));
let maximum_option = get!(field_entry.field_type().get_index_record_option());
let best_effort_option = cmp::min(maximum_option, option);
Some(self.read_postings_from_terminfo(&term_info, best_effort_option))
}
pub fn doc_freq(&self, term: &Term) -> u32 {
self.get_term_info(term)
.map(|term_info| term_info.doc_freq)
.unwrap_or(0u32)
}
}