2019-10-31 22:00:36 +08:00
use std ::cmp ::Ordering ;
use std ::collections ::{ HashMap , HashSet } ;
use std ::convert ::From ;
use std ::error ;
use std ::fmt ;
2020-03-02 21:34:29 +08:00
use std ::hash ::{ Hash , Hasher } ;
2019-10-31 22:00:36 +08:00
use std ::time ::{ Duration , Instant } ;
2020-01-23 18:30:18 +08:00
use indexmap ::IndexMap ;
use log ::error ;
2020-04-07 02:05:02 +08:00
use meilisearch_core ::Filter ;
2020-01-23 18:30:18 +08:00
use meilisearch_core ::criterion ::* ;
use meilisearch_core ::settings ::RankingRule ;
2020-01-24 01:33:23 +08:00
use meilisearch_core ::{ Highlight , Index , MainT , RankedMap } ;
2020-03-26 18:34:50 +08:00
use meilisearch_tokenizer ::is_cjk ;
2020-01-23 18:30:18 +08:00
use meilisearch_schema ::{ FieldId , Schema } ;
use serde ::{ Deserialize , Serialize } ;
use serde_json ::Value ;
2020-03-02 21:34:29 +08:00
use siphasher ::sip ::SipHasher ;
2020-01-23 18:30:18 +08:00
2019-10-31 22:00:36 +08:00
#[ derive(Debug) ]
pub enum Error {
SearchDocuments ( String ) ,
RetrieveDocument ( u64 , String ) ,
DocumentNotFound ( u64 ) ,
CropFieldWrongType ( String ) ,
2020-04-07 02:05:02 +08:00
FilterParsing ( String ) ,
2019-10-31 22:00:36 +08:00
AttributeNotFoundOnDocument ( String ) ,
AttributeNotFoundOnSchema ( String ) ,
MissingFilterValue ,
UnknownFilteredAttribute ,
Internal ( String ) ,
}
impl error ::Error for Error { }
impl fmt ::Display for Error {
fn fmt ( & self , f : & mut fmt ::Formatter ) -> fmt ::Result {
use Error ::* ;
match self {
SearchDocuments ( err ) = > write! ( f , " impossible to search documents; {} " , err ) ,
RetrieveDocument ( id , err ) = > write! (
f ,
" impossible to retrieve the document with id: {}; {} " ,
id , err
) ,
DocumentNotFound ( id ) = > write! ( f , " document {} not found " , id ) ,
CropFieldWrongType ( field ) = > {
write! ( f , " the field {} cannot be cropped it's not a string " , field )
}
AttributeNotFoundOnDocument ( field ) = > {
write! ( f , " field {} is not found on document " , field )
}
AttributeNotFoundOnSchema ( field ) = > write! ( f , " field {} is not found on schema " , field ) ,
MissingFilterValue = > f . write_str ( " a filter doesn't have a value to compare it with " ) ,
UnknownFilteredAttribute = > {
f . write_str ( " a filter is specifying an unknown schema attribute " )
}
Internal ( err ) = > write! ( f , " internal error; {} " , err ) ,
2020-04-07 02:05:02 +08:00
FilterParsing ( err ) = > write! ( f , " filter parsing error: {} " , err ) ,
2019-10-31 22:00:36 +08:00
}
}
}
2019-11-26 18:06:55 +08:00
impl From < meilisearch_core ::Error > for Error {
fn from ( error : meilisearch_core ::Error ) -> Self {
2020-04-07 02:05:02 +08:00
use meilisearch_core ::pest_error ::LineColLocation ::* ;
match error {
meilisearch_core ::Error ::FilterParseError ( e ) = > {
let ( line , column ) = match e . line_col {
Span ( ( line , _ ) , ( column , _ ) ) = > ( line , column ) ,
Pos ( ( line , column ) ) = > ( line , column ) ,
} ;
let message = format! ( " parsing error on line {} at column {} : {} " , line , column , e . variant . message ( ) ) ;
Error ::FilterParsing ( message )
} ,
_ = > Error ::Internal ( error . to_string ( ) ) ,
}
2019-10-31 22:00:36 +08:00
}
}
2020-01-30 01:30:21 +08:00
impl From < heed ::Error > for Error {
fn from ( error : heed ::Error ) -> Self {
Error ::Internal ( error . to_string ( ) )
}
}
2019-10-31 22:00:36 +08:00
pub trait IndexSearchExt {
fn new_search ( & self , query : String ) -> SearchBuilder ;
}
impl IndexSearchExt for Index {
fn new_search ( & self , query : String ) -> SearchBuilder {
SearchBuilder {
index : self ,
query ,
offset : 0 ,
limit : 20 ,
attributes_to_crop : None ,
attributes_to_retrieve : None ,
attributes_to_highlight : None ,
filters : None ,
timeout : Duration ::from_millis ( 30 ) ,
matches : false ,
}
}
}
pub struct SearchBuilder < ' a > {
index : & ' a Index ,
query : String ,
offset : usize ,
limit : usize ,
attributes_to_crop : Option < HashMap < String , usize > > ,
attributes_to_retrieve : Option < HashSet < String > > ,
attributes_to_highlight : Option < HashSet < String > > ,
filters : Option < String > ,
timeout : Duration ,
matches : bool ,
}
impl < ' a > SearchBuilder < ' a > {
pub fn offset ( & mut self , value : usize ) -> & SearchBuilder {
self . offset = value ;
self
}
pub fn limit ( & mut self , value : usize ) -> & SearchBuilder {
self . limit = value ;
self
}
pub fn attributes_to_crop ( & mut self , value : HashMap < String , usize > ) -> & SearchBuilder {
self . attributes_to_crop = Some ( value ) ;
self
}
pub fn attributes_to_retrieve ( & mut self , value : HashSet < String > ) -> & SearchBuilder {
self . attributes_to_retrieve = Some ( value ) ;
self
}
pub fn add_retrievable_field ( & mut self , value : String ) -> & SearchBuilder {
let attributes_to_retrieve = self . attributes_to_retrieve . get_or_insert ( HashSet ::new ( ) ) ;
attributes_to_retrieve . insert ( value ) ;
self
}
pub fn attributes_to_highlight ( & mut self , value : HashSet < String > ) -> & SearchBuilder {
self . attributes_to_highlight = Some ( value ) ;
self
}
pub fn filters ( & mut self , value : String ) -> & SearchBuilder {
self . filters = Some ( value ) ;
self
}
pub fn timeout ( & mut self , value : Duration ) -> & SearchBuilder {
self . timeout = value ;
self
}
pub fn get_matches ( & mut self ) -> & SearchBuilder {
self . matches = true ;
self
}
2019-11-26 23:12:06 +08:00
pub fn search ( & self , reader : & heed ::RoTxn < MainT > ) -> Result < SearchResult , Error > {
2019-10-31 22:00:36 +08:00
let schema = self . index . main . schema ( reader ) ;
let schema = schema . map_err ( | e | Error ::Internal ( e . to_string ( ) ) ) ? ;
let schema = match schema {
Some ( schema ) = > schema ,
None = > return Err ( Error ::Internal ( String ::from ( " missing schema " ) ) ) ,
} ;
let ranked_map = self . index . main . ranked_map ( reader ) ;
let ranked_map = ranked_map . map_err ( | e | Error ::Internal ( e . to_string ( ) ) ) ? ;
let ranked_map = ranked_map . unwrap_or_default ( ) ;
// Change criteria
let mut query_builder = match self . get_criteria ( reader , & ranked_map , & schema ) ? {
Some ( criteria ) = > self . index . query_builder_with_criteria ( criteria ) ,
None = > self . index . query_builder ( ) ,
} ;
2020-04-07 02:05:02 +08:00
if let Some ( filter_expression ) = & self . filters {
let filter = Filter ::parse ( filter_expression , & schema ) ? ;
query_builder . with_filter ( move | id | {
let index = & self . index ;
let reader = & reader ;
let filter = & filter ;
match filter . test ( reader , index , id ) {
Ok ( res ) = > res ,
Err ( e ) = > {
log ::warn! ( " unexpected error during filtering: {} " , e ) ;
false
}
2019-10-31 22:00:36 +08:00
}
2020-04-07 02:05:02 +08:00
} ) ;
2019-10-31 22:00:36 +08:00
}
query_builder . with_fetch_timeout ( self . timeout ) ;
2020-03-02 21:34:29 +08:00
if let Some ( field ) = self . index . main . distinct_attribute ( reader ) ? {
if let Some ( field_id ) = schema . id ( & field ) {
query_builder . with_distinct ( 1 , move | id | {
match self . index . document_attribute_bytes ( reader , id , field_id ) {
Ok ( Some ( bytes ) ) = > {
let mut s = SipHasher ::new ( ) ;
bytes . hash ( & mut s ) ;
Some ( s . finish ( ) )
}
_ = > None ,
}
} ) ;
}
}
2019-12-23 03:55:11 +08:00
let start = Instant ::now ( ) ;
2020-03-25 19:44:38 +08:00
let result = query_builder . query ( reader , & self . query , self . offset .. ( self . offset + self . limit ) ) ;
let ( docs , nb_hits ) = result . map_err ( | e | Error ::SearchDocuments ( e . to_string ( ) ) ) ? ;
2020-03-25 21:00:29 +08:00
let time_ms = start . elapsed ( ) . as_millis ( ) as usize ;
2019-10-31 22:00:36 +08:00
let mut hits = Vec ::with_capacity ( self . limit ) ;
2020-03-25 19:44:38 +08:00
for doc in docs {
2019-10-31 22:00:36 +08:00
// retrieve the content of document in kv store
let mut fields : Option < HashSet < & str > > = None ;
if let Some ( attributes_to_retrieve ) = & self . attributes_to_retrieve {
let mut set = HashSet ::new ( ) ;
for field in attributes_to_retrieve {
set . insert ( field . as_str ( ) ) ;
}
fields = Some ( set ) ;
}
2019-11-15 19:04:46 +08:00
let document : IndexMap < String , Value > = self
2019-10-31 22:00:36 +08:00
. index
. document ( reader , fields . as_ref ( ) , doc . id )
. map_err ( | e | Error ::RetrieveDocument ( doc . id . 0 , e . to_string ( ) ) ) ?
. ok_or ( Error ::DocumentNotFound ( doc . id . 0 ) ) ? ;
2019-12-12 23:36:42 +08:00
let has_attributes_to_highlight = self . attributes_to_highlight . is_some ( ) ;
let has_attributes_to_crop = self . attributes_to_crop . is_some ( ) ;
let mut formatted = if has_attributes_to_highlight | | has_attributes_to_crop {
document . clone ( )
} else {
IndexMap ::new ( )
} ;
2019-10-31 22:00:36 +08:00
let mut matches = doc . highlights . clone ( ) ;
// Crops fields if needed
2019-11-15 19:04:46 +08:00
if let Some ( fields ) = & self . attributes_to_crop {
crop_document ( & mut formatted , & mut matches , & schema , fields ) ;
2019-10-31 22:00:36 +08:00
}
// Transform to readable matches
let matches = calculate_matches ( matches , self . attributes_to_retrieve . clone ( ) , & schema ) ;
2020-01-24 18:29:08 +08:00
if let Some ( attributes_to_highlight ) = & self . attributes_to_highlight {
formatted = calculate_highlights ( & formatted , & matches , attributes_to_highlight ) ;
2019-10-31 22:00:36 +08:00
}
let matches_info = if self . matches { Some ( matches ) } else { None } ;
let hit = SearchHit {
2019-11-15 19:04:46 +08:00
document ,
formatted ,
2019-10-31 22:00:36 +08:00
matches_info ,
} ;
hits . push ( hit ) ;
}
let results = SearchResult {
hits ,
offset : self . offset ,
limit : self . limit ,
2020-03-25 19:44:38 +08:00
nb_hits ,
2020-03-25 19:11:37 +08:00
exhaustive_nb_hits : false ,
2019-10-31 22:00:36 +08:00
processing_time_ms : time_ms ,
query : self . query . to_string ( ) ,
} ;
Ok ( results )
}
pub fn get_criteria (
& self ,
2019-11-26 23:12:06 +08:00
reader : & heed ::RoTxn < MainT > ,
2019-10-31 22:00:36 +08:00
ranked_map : & ' a RankedMap ,
schema : & Schema ,
) -> Result < Option < Criteria < ' a > > , Error > {
2020-01-30 01:30:21 +08:00
let ranking_rules = self . index . main . ranking_rules ( reader ) ? ;
2019-10-31 22:00:36 +08:00
if let Some ( ranking_rules ) = ranking_rules {
let mut builder = CriteriaBuilder ::with_capacity ( 7 + ranking_rules . len ( ) ) ;
2020-01-15 00:26:27 +08:00
for rule in ranking_rules {
match rule {
RankingRule ::Typo = > builder . push ( Typo ) ,
RankingRule ::Words = > builder . push ( Words ) ,
RankingRule ::Proximity = > builder . push ( Proximity ) ,
RankingRule ::Attribute = > builder . push ( Attribute ) ,
RankingRule ::WordsPosition = > builder . push ( WordsPosition ) ,
2020-01-31 18:45:57 +08:00
RankingRule ::Exactness = > builder . push ( Exactness ) ,
2020-01-30 01:30:21 +08:00
RankingRule ::Asc ( field ) = > {
match SortByAttr ::lower_is_better ( & ranked_map , & schema , & field ) {
Ok ( rule ) = > builder . push ( rule ) ,
Err ( err ) = > error! ( " Error during criteria builder; {:?} " , err ) ,
}
}
2020-03-03 00:13:23 +08:00
RankingRule ::Desc ( field ) = > {
2020-01-30 01:30:21 +08:00
match SortByAttr ::higher_is_better ( & ranked_map , & schema , & field ) {
Ok ( rule ) = > builder . push ( rule ) ,
Err ( err ) = > error! ( " Error during criteria builder; {:?} " , err ) ,
}
}
2020-02-13 17:25:37 +08:00
}
2019-10-31 22:00:36 +08:00
}
2020-01-15 00:26:27 +08:00
builder . push ( DocumentId ) ;
return Ok ( Some ( builder . build ( ) ) ) ;
2019-10-31 22:00:36 +08:00
}
Ok ( None )
}
}
#[ derive(Debug, Clone, Eq, PartialEq, PartialOrd, Serialize, Deserialize) ]
pub struct MatchPosition {
pub start : usize ,
pub length : usize ,
}
impl Ord for MatchPosition {
fn cmp ( & self , other : & Self ) -> Ordering {
match self . start . cmp ( & other . start ) {
Ordering ::Equal = > self . length . cmp ( & other . length ) ,
_ = > self . start . cmp ( & other . start ) ,
}
}
}
pub type HighlightInfos = HashMap < String , Value > ;
pub type MatchesInfos = HashMap < String , Vec < MatchPosition > > ;
// pub type RankingInfos = HashMap<String, u64>;
#[ derive(Debug, Clone, Serialize, Deserialize) ]
pub struct SearchHit {
#[ serde(flatten) ]
2019-11-15 19:04:46 +08:00
pub document : IndexMap < String , Value > ,
#[ serde(rename = " _formatted " , skip_serializing_if = " IndexMap::is_empty " ) ]
pub formatted : IndexMap < String , Value > ,
2019-10-31 22:00:36 +08:00
#[ serde(rename = " _matchesInfo " , skip_serializing_if = " Option::is_none " ) ]
pub matches_info : Option < MatchesInfos > ,
}
2020-03-25 20:59:15 +08:00
#[ derive(Debug, Clone, Serialize) ]
2019-10-31 22:00:36 +08:00
#[ serde(rename_all = " camelCase " ) ]
pub struct SearchResult {
pub hits : Vec < SearchHit > ,
pub offset : usize ,
pub limit : usize ,
2020-03-25 19:11:37 +08:00
pub nb_hits : usize ,
pub exhaustive_nb_hits : bool ,
2019-10-31 22:00:36 +08:00
pub processing_time_ms : usize ,
pub query : String ,
}
2020-03-26 18:34:50 +08:00
/// returns the start index and the length on the crop.
2020-03-26 02:51:22 +08:00
fn aligned_crop ( text : & str , match_index : usize , context : usize ) -> ( usize , usize ) {
2020-03-26 18:34:50 +08:00
let is_word_component = | c : & char | c . is_alphanumeric ( ) & & ! is_cjk ( * c ) ;
2020-03-26 02:51:22 +08:00
let word_end_index = | mut index | {
2020-03-26 18:34:50 +08:00
if text . chars ( ) . nth ( index - 1 ) . map_or ( false , | c | is_word_component ( & c ) ) {
index + = text . chars ( ) . skip ( index ) . take_while ( is_word_component ) . count ( ) ;
2020-03-26 02:51:22 +08:00
}
index
} ;
2020-03-26 18:34:50 +08:00
if context = = 0 {
// count need to be at least 1 for cjk queries to return something
return ( match_index , 1 + text . chars ( ) . skip ( match_index ) . take_while ( is_word_component ) . count ( ) ) ;
}
2020-03-26 02:51:22 +08:00
let start = match match_index . saturating_sub ( context ) {
n if n = = 0 = > n ,
n = > word_end_index ( n )
} ;
let end = word_end_index ( start + 2 * context ) ;
( start , end - start )
}
2019-10-31 22:00:36 +08:00
fn crop_text (
text : & str ,
matches : impl IntoIterator < Item = Highlight > ,
context : usize ,
) -> ( String , Vec < Highlight > ) {
let mut matches = matches . into_iter ( ) . peekable ( ) ;
let char_index = matches . peek ( ) . map ( | m | m . char_index as usize ) . unwrap_or ( 0 ) ;
2020-03-26 02:51:22 +08:00
let ( start , count ) = aligned_crop ( text , char_index , context ) ;
2020-03-26 18:34:50 +08:00
//TODO do something about the double allocation
let text = text . chars ( ) . skip ( start ) . take ( count ) . collect ::< String > ( ) . trim ( ) . to_string ( ) ;
2019-10-31 22:00:36 +08:00
2020-03-26 18:34:50 +08:00
// update matches index to match the new cropped text
2019-10-31 22:00:36 +08:00
let matches = matches
. take_while ( | m | ( m . char_index as usize ) + ( m . char_length as usize ) < = start + ( context * 2 ) )
. map ( | match_ | Highlight {
char_index : match_ . char_index - start as u16 ,
.. match_
} )
. collect ( ) ;
( text , matches )
}
fn crop_document (
document : & mut IndexMap < String , Value > ,
matches : & mut Vec < Highlight > ,
schema : & Schema ,
2019-11-15 19:04:46 +08:00
fields : & HashMap < String , usize > ,
) {
2019-10-31 22:00:36 +08:00
matches . sort_unstable_by_key ( | m | ( m . char_index , m . char_length ) ) ;
2019-11-15 19:04:46 +08:00
for ( field , length ) in fields {
2020-01-30 01:30:21 +08:00
let attribute = match schema . id ( field ) {
2019-11-15 19:04:46 +08:00
Some ( attribute ) = > attribute ,
None = > continue ,
} ;
let selected_matches = matches
. iter ( )
2020-01-15 00:26:27 +08:00
. filter ( | m | FieldId ::new ( m . attribute ) = = attribute )
2019-11-15 19:04:46 +08:00
. cloned ( ) ;
if let Some ( Value ::String ( ref mut original_text ) ) = document . get_mut ( field ) {
let ( cropped_text , cropped_matches ) =
crop_text ( original_text , selected_matches , * length ) ;
* original_text = cropped_text ;
2020-01-15 00:26:27 +08:00
matches . retain ( | m | FieldId ::new ( m . attribute ) ! = attribute ) ;
2019-11-15 19:04:46 +08:00
matches . extend_from_slice ( & cropped_matches ) ;
}
}
2019-10-31 22:00:36 +08:00
}
fn calculate_matches (
matches : Vec < Highlight > ,
attributes_to_retrieve : Option < HashSet < String > > ,
schema : & Schema ,
) -> MatchesInfos {
let mut matches_result : HashMap < String , Vec < MatchPosition > > = HashMap ::new ( ) ;
for m in matches . iter ( ) {
2020-01-30 01:30:21 +08:00
if let Some ( attribute ) = schema . name ( FieldId ::new ( m . attribute ) ) {
2020-01-15 00:26:27 +08:00
if let Some ( attributes_to_retrieve ) = attributes_to_retrieve . clone ( ) {
2020-01-30 01:30:21 +08:00
if ! attributes_to_retrieve . contains ( attribute ) {
2020-01-15 00:26:27 +08:00
continue ;
}
2020-02-13 17:25:37 +08:00
}
2020-01-30 01:30:21 +08:00
if ! schema . displayed_name ( ) . contains ( attribute ) {
2020-01-28 01:25:42 +08:00
continue ;
}
2020-01-30 01:30:21 +08:00
if let Some ( pos ) = matches_result . get_mut ( attribute ) {
2020-01-15 00:26:27 +08:00
pos . push ( MatchPosition {
start : m . char_index as usize ,
length : m . char_length as usize ,
} ) ;
} else {
let mut positions = Vec ::new ( ) ;
positions . push ( MatchPosition {
start : m . char_index as usize ,
length : m . char_length as usize ,
} ) ;
2020-01-30 01:30:21 +08:00
matches_result . insert ( attribute . to_string ( ) , positions ) ;
2019-10-31 22:00:36 +08:00
}
}
}
for ( _ , val ) in matches_result . iter_mut ( ) {
val . sort_unstable ( ) ;
val . dedup ( ) ;
}
matches_result
}
fn calculate_highlights (
2019-11-15 19:04:46 +08:00
document : & IndexMap < String , Value > ,
matches : & MatchesInfos ,
attributes_to_highlight : & HashSet < String > ,
) -> IndexMap < String , Value > {
2019-12-12 23:36:42 +08:00
let mut highlight_result = document . clone ( ) ;
2019-11-15 19:04:46 +08:00
2019-10-31 22:00:36 +08:00
for ( attribute , matches ) in matches . iter ( ) {
2019-11-15 19:04:46 +08:00
if attributes_to_highlight . contains ( attribute ) {
2019-10-31 22:00:36 +08:00
if let Some ( Value ::String ( value ) ) = document . get ( attribute ) {
let value : Vec < _ > = value . chars ( ) . collect ( ) ;
let mut highlighted_value = String ::new ( ) ;
let mut index = 0 ;
for m in matches {
if m . start > = index {
let before = value . get ( index .. m . start ) ;
let highlighted = value . get ( m . start .. ( m . start + m . length ) ) ;
if let ( Some ( before ) , Some ( highlighted ) ) = ( before , highlighted ) {
highlighted_value . extend ( before ) ;
highlighted_value . push_str ( " <em> " ) ;
highlighted_value . extend ( highlighted ) ;
highlighted_value . push_str ( " </em> " ) ;
index = m . start + m . length ;
} else {
error! ( " value: {:?}; index: {:?}, match: {:?} " , value , index , m ) ;
}
}
}
highlighted_value . extend ( value [ index .. ] . iter ( ) ) ;
highlight_result . insert ( attribute . to_string ( ) , Value ::String ( highlighted_value ) ) ;
} ;
}
}
2019-11-15 19:04:46 +08:00
2019-10-31 22:00:36 +08:00
highlight_result
}
#[ cfg(test) ]
mod tests {
use super ::* ;
2020-03-26 21:44:03 +08:00
#[ test ]
fn aligned_crops ( ) {
let text = r # "En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la Fondation."# ;
// simple test
let ( start , length ) = aligned_crop ( & text , 6 , 2 ) ;
let cropped = text . chars ( ) . skip ( start ) . take ( length ) . collect ::< String > ( ) . trim ( ) . to_string ( ) ;
assert_eq! ( " début " , cropped ) ;
// first word test
let ( start , length ) = aligned_crop ( & text , 0 , 1 ) ;
let cropped = text . chars ( ) . skip ( start ) . take ( length ) . collect ::< String > ( ) . trim ( ) . to_string ( ) ;
assert_eq! ( " En " , cropped ) ;
// last word test
let ( start , length ) = aligned_crop ( & text , 510 , 2 ) ;
let cropped = text . chars ( ) . skip ( start ) . take ( length ) . collect ::< String > ( ) . trim ( ) . to_string ( ) ;
assert_eq! ( " Fondation " , cropped ) ;
// CJK tests
let text = " this isのス foo myタイリ test " ;
// mixed charset
let ( start , length ) = aligned_crop ( & text , 5 , 3 ) ;
let cropped = text . chars ( ) . skip ( start ) . take ( length ) . collect ::< String > ( ) . trim ( ) . to_string ( ) ;
assert_eq! ( " isのス " , cropped ) ;
// split regular word / CJK word, no space
let ( start , length ) = aligned_crop ( & text , 7 , 1 ) ;
let cropped = text . chars ( ) . skip ( start ) . take ( length ) . collect ::< String > ( ) . trim ( ) . to_string ( ) ;
assert_eq! ( " のス " , cropped ) ;
}
2019-10-31 22:00:36 +08:00
#[ test ]
fn calculate_highlights ( ) {
let data = r #" {
" title " : " Fondation (Isaac ASIMOV) " ,
" description " : " En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la Fondation. "
} " #;
let document : IndexMap < String , Value > = serde_json ::from_str ( data ) . unwrap ( ) ;
let mut attributes_to_highlight = HashSet ::new ( ) ;
2019-11-15 19:04:46 +08:00
attributes_to_highlight . insert ( " title " . to_string ( ) ) ;
attributes_to_highlight . insert ( " description " . to_string ( ) ) ;
2019-10-31 22:00:36 +08:00
2019-11-15 19:04:46 +08:00
let mut matches = HashMap ::new ( ) ;
2019-10-31 22:00:36 +08:00
let mut m = Vec ::new ( ) ;
m . push ( MatchPosition {
start : 0 ,
length : 9 ,
} ) ;
matches . insert ( " title " . to_string ( ) , m ) ;
let mut m = Vec ::new ( ) ;
m . push ( MatchPosition {
start : 510 ,
length : 9 ,
} ) ;
matches . insert ( " description " . to_string ( ) , m ) ;
2019-11-15 19:04:46 +08:00
let result = super ::calculate_highlights ( & document , & matches , & attributes_to_highlight ) ;
2019-10-31 22:00:36 +08:00
2019-11-15 19:04:46 +08:00
let mut result_expected = IndexMap ::new ( ) ;
2019-10-31 22:00:36 +08:00
result_expected . insert (
" title " . to_string ( ) ,
Value ::String ( " <em>Fondation</em> (Isaac ASIMOV) " . to_string ( ) ) ,
) ;
result_expected . insert ( " description " . to_string ( ) , Value ::String ( " En ce début de trentième millénaire, l'Empire n'a jamais été aussi puissant, aussi étendu à travers toute la galaxie. C'est dans sa capitale, Trantor, que l'éminent savant Hari Seldon invente la psychohistoire, une science toute nouvelle, à base de psychologie et de mathématiques, qui lui permet de prédire l'avenir... C'est-à-dire l'effondrement de l'Empire d'ici cinq siècles et au-delà, trente mille années de chaos et de ténèbres. Pour empêcher cette catastrophe et sauver la civilisation, Seldon crée la <em>Fondation</em>. " . to_string ( ) ) ) ;
assert_eq! ( result , result_expected ) ;
}
}