mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-12-02 10:05:05 +08:00
4ebae7784c
Forcing it to be something internal will permit to avoid possible miss comparisons to be done with other types.
105 lines
3.0 KiB
Rust
105 lines
3.0 KiB
Rust
pub mod automaton;
|
|
pub mod database;
|
|
pub mod data;
|
|
pub mod rank;
|
|
pub mod tokenizer;
|
|
pub mod vec_read_only;
|
|
mod common_words;
|
|
|
|
pub use rocksdb;
|
|
|
|
pub use self::tokenizer::Tokenizer;
|
|
pub use self::common_words::CommonWords;
|
|
|
|
/// Represent an internally generated document unique identifier.
|
|
///
|
|
/// It is used to inform the database the document you want to deserialize.
|
|
/// Helpful for custom ranking.
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
|
pub struct DocumentId(pub u64);
|
|
|
|
/// This structure represent the position of a word
|
|
/// in a document and its attributes.
|
|
///
|
|
/// This is stored in the map, generated at index time,
|
|
/// extracted and interpreted at search time.
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
|
#[repr(C)]
|
|
pub struct DocIndex {
|
|
/// The document identifier where the word was found.
|
|
pub document_id: DocumentId,
|
|
|
|
/// The attribute identifier in the document
|
|
/// where the word was found.
|
|
///
|
|
/// This is an `u8` therefore a document
|
|
/// can not have more than `2^8` attributes.
|
|
pub attribute: u8,
|
|
|
|
/// The index where the word was found in the attribute.
|
|
///
|
|
/// Only the first 1000 words are indexed.
|
|
pub attribute_index: u32,
|
|
}
|
|
|
|
/// This structure represent a matching word with informations
|
|
/// on the location of the word in the document.
|
|
///
|
|
/// The order of the field is important because it defines
|
|
/// the way these structures are ordered between themselves.
|
|
///
|
|
/// The word in itself is not important.
|
|
// TODO do data oriented programming ? very arrays ?
|
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, PartialOrd, Ord, Hash)]
|
|
pub struct Match {
|
|
/// The word index in the query sentence.
|
|
/// Same as the `attribute_index` but for the query words.
|
|
///
|
|
/// Used to retrieve the automaton that match this word.
|
|
pub query_index: u32,
|
|
|
|
/// The distance the word has with the query word
|
|
/// (i.e. the Levenshtein distance).
|
|
pub distance: u8,
|
|
|
|
/// The attribute in which the word is located
|
|
/// (i.e. Title is 0, Description is 1).
|
|
///
|
|
/// This is an `u8` therefore a document
|
|
/// can not have more than `2^8` attributes.
|
|
pub attribute: u8,
|
|
|
|
/// Where does this word is located in the attribute string
|
|
/// (i.e. at the start or the end of the attribute).
|
|
///
|
|
/// The index in the attribute is limited to a maximum of `2^32`
|
|
/// this is because we index only the first 1000 words
|
|
/// in an attribute.
|
|
pub attribute_index: u32,
|
|
|
|
/// Whether the word that match is an exact match or a prefix.
|
|
pub is_exact: bool,
|
|
}
|
|
|
|
impl Match {
|
|
pub fn zero() -> Self {
|
|
Match {
|
|
query_index: 0,
|
|
distance: 0,
|
|
attribute: 0,
|
|
attribute_index: 0,
|
|
is_exact: false,
|
|
}
|
|
}
|
|
|
|
pub fn max() -> Self {
|
|
Match {
|
|
query_index: u32::max_value(),
|
|
distance: u8::max_value(),
|
|
attribute: u8::max_value(),
|
|
attribute_index: u32::max_value(),
|
|
is_exact: true,
|
|
}
|
|
}
|
|
}
|