mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 18:45:06 +08:00
Take stop word in account
This commit is contained in:
parent
823da19745
commit
2d1727697d
@ -21,6 +21,7 @@ pub fn extract_docid_word_positions<R: io::Read>(
|
|||||||
mut obkv_documents: grenad::Reader<R>,
|
mut obkv_documents: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
searchable_fields: &Option<HashSet<FieldId>>,
|
searchable_fields: &Option<HashSet<FieldId>>,
|
||||||
|
stop_words: Option<&fst::Set<&[u8]>>,
|
||||||
) -> Result<(RoaringBitmap, grenad::Reader<File>)> {
|
) -> Result<(RoaringBitmap, grenad::Reader<File>)> {
|
||||||
let max_memory = indexer.max_memory_by_thread();
|
let max_memory = indexer.max_memory_by_thread();
|
||||||
|
|
||||||
@ -35,6 +36,10 @@ pub fn extract_docid_word_positions<R: io::Read>(
|
|||||||
|
|
||||||
let mut key_buffer = Vec::new();
|
let mut key_buffer = Vec::new();
|
||||||
let mut field_buffer = String::new();
|
let mut field_buffer = String::new();
|
||||||
|
let mut config = AnalyzerConfig::default();
|
||||||
|
if let Some(stop_words) = stop_words {
|
||||||
|
config.stop_words(stop_words);
|
||||||
|
}
|
||||||
let analyzer = Analyzer::<Vec<u8>>::new(AnalyzerConfig::default());
|
let analyzer = Analyzer::<Vec<u8>>::new(AnalyzerConfig::default());
|
||||||
|
|
||||||
while let Some((key, value)) = obkv_documents.next()? {
|
while let Some((key, value)) = obkv_documents.next()? {
|
||||||
|
@ -37,6 +37,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx: Sender<TypedChunk>,
|
lmdb_writer_sx: Sender<TypedChunk>,
|
||||||
searchable_fields: Option<HashSet<FieldId>>,
|
searchable_fields: Option<HashSet<FieldId>>,
|
||||||
faceted_fields: HashSet<FieldId>,
|
faceted_fields: HashSet<FieldId>,
|
||||||
|
stop_words: Option<fst::Set<&[u8]>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
|
let result: Result<(Vec<_>, (Vec<_>, Vec<_>))> = obkv_chunks
|
||||||
.par_bridge()
|
.par_bridge()
|
||||||
@ -54,6 +55,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
documents_chunk.clone(),
|
documents_chunk.clone(),
|
||||||
indexer.clone(),
|
indexer.clone(),
|
||||||
&searchable_fields,
|
&searchable_fields,
|
||||||
|
stop_words.as_ref(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// send documents_ids to DB writer
|
// send documents_ids to DB writer
|
||||||
|
@ -231,6 +231,9 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
// get filterable fields for facet databases
|
// get filterable fields for facet databases
|
||||||
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;
|
||||||
|
|
||||||
|
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||||
|
// let stop_words = stop_words.as_ref();
|
||||||
|
|
||||||
// Run extraction pipeline in parallel.
|
// Run extraction pipeline in parallel.
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
let params = GrenadParameters {
|
let params = GrenadParameters {
|
||||||
@ -255,6 +258,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
lmdb_writer_sx,
|
lmdb_writer_sx,
|
||||||
searchable_fields,
|
searchable_fields,
|
||||||
faceted_fields,
|
faceted_fields,
|
||||||
|
stop_words,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user