mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
fix suggestions
This commit is contained in:
parent
8b149c9aa3
commit
8e64a24d19
@ -395,7 +395,6 @@ mod tests {
|
|||||||
let mut writer = db.main_write_txn().unwrap();
|
let mut writer = db.main_write_txn().unwrap();
|
||||||
|
|
||||||
let word = normalize_str(word);
|
let word = normalize_str(word);
|
||||||
println!("synonym: {}", word);
|
|
||||||
|
|
||||||
let alternatives = self
|
let alternatives = self
|
||||||
.index
|
.index
|
||||||
@ -1261,7 +1260,6 @@ mod tests {
|
|||||||
|
|
||||||
let builder = store.query_builder();
|
let builder = store.query_builder();
|
||||||
let SortResult { documents, .. } = builder.query(&reader, Some("telephone"), 0..20).unwrap();
|
let SortResult { documents, .. } = builder.query(&reader, Some("telephone"), 0..20).unwrap();
|
||||||
println!("documents: {:#?}", documents);
|
|
||||||
let mut iter = documents.into_iter();
|
let mut iter = documents.into_iter();
|
||||||
|
|
||||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||||
@ -1297,7 +1295,6 @@ mod tests {
|
|||||||
let builder = store.query_builder();
|
let builder = store.query_builder();
|
||||||
let SortResult { documents, .. } = builder.query(&reader, Some("télephone"), 0..20).unwrap();
|
let SortResult { documents, .. } = builder.query(&reader, Some("télephone"), 0..20).unwrap();
|
||||||
let mut iter = documents.into_iter();
|
let mut iter = documents.into_iter();
|
||||||
// this test was in the opposite order, I am not sure why...
|
|
||||||
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
assert_matches!(iter.next(), Some(Document { id: DocumentId(0), matches, .. }) => {
|
||||||
let mut iter = matches.into_iter();
|
let mut iter = matches.into_iter();
|
||||||
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, .. }));
|
assert_matches!(iter.next(), Some(SimpleMatch { query_index: 0, .. }));
|
||||||
|
@ -14,10 +14,7 @@ const WORD_LENGTH_LIMIT: usize = 80;
|
|||||||
|
|
||||||
type Word = Vec<u8>; // TODO make it be a SmallVec
|
type Word = Vec<u8>; // TODO make it be a SmallVec
|
||||||
|
|
||||||
pub struct RawIndexer<'a, A>
|
pub struct RawIndexer<'a, A> {
|
||||||
where
|
|
||||||
A: AsRef<[u8]>
|
|
||||||
{
|
|
||||||
word_limit: usize, // the maximum number of indexed words
|
word_limit: usize, // the maximum number of indexed words
|
||||||
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
|
words_doc_indexes: BTreeMap<Word, Vec<DocIndex>>,
|
||||||
docs_words: HashMap<DocumentId, Vec<Word>>,
|
docs_words: HashMap<DocumentId, Vec<Word>>,
|
||||||
@ -73,25 +70,24 @@ where
|
|||||||
number_of_words
|
number_of_words
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn index_text_seq<'s, I>(&mut self, id: DocumentId, indexed_pos: IndexedPos, iter: I)
|
pub fn index_text_seq<'s, I>(&mut self, id: DocumentId, indexed_pos: IndexedPos, text_iter: I)
|
||||||
where
|
where
|
||||||
I: IntoIterator<Item = &'s str>,
|
I: IntoIterator<Item = &'s str>,
|
||||||
{
|
{
|
||||||
let mut byte_offset = 0;
|
let mut byte_offset = 0;
|
||||||
let mut word_offset = 0;
|
let mut word_offset = 0;
|
||||||
|
|
||||||
for s in iter.into_iter() {
|
for text in text_iter.into_iter() {
|
||||||
let current_byte_offset = byte_offset;
|
let current_byte_offset = byte_offset;
|
||||||
let current_word_offset = word_offset;
|
let current_word_offset = word_offset;
|
||||||
|
|
||||||
let analyzed_text = self.analyzer.analyze(s);
|
let analyzed_text = self.analyzer.analyze(text);
|
||||||
let tokens = process_tokens(analyzed_text.tokens())
|
let tokens = process_tokens(analyzed_text.tokens())
|
||||||
.map(|(i, mut t)| {
|
.map(|(i, mut t)| {
|
||||||
t.byte_start = t.byte_start + current_byte_offset;
|
t.byte_start = t.byte_start + current_byte_offset;
|
||||||
t.byte_end = t.byte_end + current_byte_offset;
|
t.byte_end = t.byte_end + current_byte_offset;
|
||||||
(i, t)
|
(i + current_word_offset, t)
|
||||||
})
|
})
|
||||||
.map(|(i, t)| (i + current_word_offset, t))
|
|
||||||
.enumerate();
|
.enumerate();
|
||||||
|
|
||||||
for (token_pos, (word_pos, token)) in tokens {
|
for (token_pos, (word_pos, token)) in tokens {
|
||||||
@ -143,21 +139,22 @@ where
|
|||||||
|
|
||||||
fn process_tokens<'a>(tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<Item = (usize, Token<'a>)> {
|
fn process_tokens<'a>(tokens: impl Iterator<Item = Token<'a>>) -> impl Iterator<Item = (usize, Token<'a>)> {
|
||||||
tokens
|
tokens
|
||||||
.scan((0, None), |(offset, sepkind), token| {
|
.scan((0, None), |(offset, prev_kind), token| {
|
||||||
match token.kind {
|
match token.kind {
|
||||||
TokenKind::Word | TokenKind::StopWord | TokenKind::Any => {
|
TokenKind::Word | TokenKind::StopWord | TokenKind::Any => {
|
||||||
*offset += match *sepkind {
|
*offset += match *prev_kind {
|
||||||
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
Some(TokenKind::Separator(SeparatorKind::Hard)) => 8,
|
||||||
Some(_) => 1,
|
Some(_) => 1,
|
||||||
None => 0,
|
None => 0,
|
||||||
};
|
};
|
||||||
*sepkind = Some(token.kind)
|
*prev_kind = Some(token.kind)
|
||||||
}
|
}
|
||||||
TokenKind::Separator(SeparatorKind::Hard) => {
|
TokenKind::Separator(SeparatorKind::Hard) => {
|
||||||
*sepkind = Some(token.kind);
|
*prev_kind = Some(token.kind);
|
||||||
}
|
}
|
||||||
TokenKind::Separator(SeparatorKind::Soft) if sepkind.is_none() => {
|
TokenKind::Separator(SeparatorKind::Soft)
|
||||||
*sepkind = Some(token.kind);
|
if *prev_kind != Some(TokenKind::Separator(SeparatorKind::Hard)) => {
|
||||||
|
*prev_kind = Some(token.kind);
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
@ -226,12 +223,12 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_process_token() {
|
fn test_process_token() {
|
||||||
let text = " Zut, l’aspirateur, j’ai oublié de l’éteindre !";
|
let text = " 為一包含一千多萬目詞的帶標記平衡語料庫";
|
||||||
let stopwords = Set::default();
|
let stopwords = Set::default();
|
||||||
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stopwords));
|
let analyzer = Analyzer::new(AnalyzerConfig::default_with_stopwords(&stopwords));
|
||||||
let analyzer = analyzer.analyze(text);
|
let analyzer = analyzer.analyze(text);
|
||||||
let tokens: Vec<_> = process_tokens(analyzer.tokens()).collect();
|
let tokens: Vec<_> = process_tokens(analyzer.tokens()).map(|(_, t)| t.text().to_string()).collect();
|
||||||
println!("tokens: {:?}", tokens);
|
assert_eq!(tokens, ["为", "一", "包含", "一千多万", "目", "词", "的", "带", "标记", "平衡", "语料库"]);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -102,8 +102,6 @@ async fn placeholder_search_witch_crop() {
|
|||||||
"cropLength": 20
|
"cropLength": 20
|
||||||
});
|
});
|
||||||
|
|
||||||
println!("here");
|
|
||||||
|
|
||||||
test_post_get_search!(server, query, |response, status_code| {
|
test_post_get_search!(server, query, |response, status_code| {
|
||||||
assert_eq!(status_code, 200);
|
assert_eq!(status_code, 200);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user