Add missing exactness.matchingWords, exactness.maxMatchingWords

This commit is contained in:
Louis Dureuil 2023-07-04 15:53:38 +02:00
parent 82650eaae1
commit 48409c9183
No known key found for this signature in database
2 changed files with 47 additions and 11 deletions

View File

@ -10,7 +10,7 @@ pub enum ScoreDetails {
Fid(Rank), Fid(Rank),
Position(Rank), Position(Rank),
ExactAttribute(ExactAttribute), ExactAttribute(ExactAttribute),
Exactness(Rank), ExactWords(ExactWords),
Sort(Sort), Sort(Sort),
GeoSort(GeoSort), GeoSort(GeoSort),
} }
@ -28,7 +28,7 @@ impl ScoreDetails {
ScoreDetails::Fid(details) => Some(*details), ScoreDetails::Fid(details) => Some(*details),
ScoreDetails::Position(details) => Some(*details), ScoreDetails::Position(details) => Some(*details),
ScoreDetails::ExactAttribute(details) => Some(details.rank()), ScoreDetails::ExactAttribute(details) => Some(details.rank()),
ScoreDetails::Exactness(details) => Some(*details), ScoreDetails::ExactWords(details) => Some(details.rank()),
ScoreDetails::Sort(_) => None, ScoreDetails::Sort(_) => None,
ScoreDetails::GeoSort(_) => None, ScoreDetails::GeoSort(_) => None,
} }
@ -117,7 +117,7 @@ impl ScoreDetails {
details_map.insert("exactness".into(), exactness_details); details_map.insert("exactness".into(), exactness_details);
order += 1; order += 1;
} }
ScoreDetails::Exactness(details) => { ScoreDetails::ExactWords(details) => {
// For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule // For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
let exactness_details = details_map let exactness_details = details_map
.get_mut("exactness") .get_mut("exactness")
@ -129,9 +129,16 @@ impl ScoreDetails {
== &serde_json::json!(ExactAttribute::NoExactMatch) == &serde_json::json!(ExactAttribute::NoExactMatch)
{ {
let score = Rank::global_score( let score = Rank::global_score(
[ExactAttribute::NoExactMatch.rank(), *details].iter().copied(), [ExactAttribute::NoExactMatch.rank(), details.rank()].iter().copied(),
); );
*exactness_details.get_mut("score").expect("missing score") = score.into(); // tiny detail, but we want the score to be the last displayed field,
// so we're removing it here, adding the other fields, then adding the new score
exactness_details.remove("score");
exactness_details
.insert("matchingWords".into(), details.matching_words.into());
exactness_details
.insert("maxMatchingWords".into(), details.max_matching_words.into());
exactness_details.insert("score".into(), score.into());
} }
// do not update the order since this was already done by exactAttribute // do not update the order since this was already done by exactAttribute
} }
@ -209,8 +216,34 @@ impl Words {
Rank { rank: self.matching_words, max_rank: self.max_matching_words } Rank { rank: self.matching_words, max_rank: self.max_matching_words }
} }
pub(crate) fn from_rank(rank: Rank) -> Words { pub(crate) fn from_rank(rank: Rank) -> Self {
Words { matching_words: rank.rank, max_matching_words: rank.max_rank } Self { matching_words: rank.rank, max_matching_words: rank.max_rank }
}
}
/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
///
/// In exactness, the number of matching words can actually be 0 with a non-zero score,
/// if no words from the query appear exactly in the document.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ExactWords {
pub matching_words: u32,
pub max_matching_words: u32,
}
impl ExactWords {
pub fn rank(&self) -> Rank {
// 0 matching words means last rank (1)
Rank { rank: self.matching_words + 1, max_rank: self.max_matching_words + 1 }
}
pub(crate) fn from_rank(rank: Rank) -> Self {
// last rank (1) means that 0 words from the query appear exactly in the document.
// first rank (max_rank) means that (max_rank - 1) words from the query appear exactly in the document.
Self {
matching_words: rank.rank.saturating_sub(1),
max_matching_words: rank.max_rank.saturating_sub(1),
}
} }
} }
@ -223,7 +256,7 @@ pub struct Typo {
impl Typo { impl Typo {
pub fn rank(&self) -> Rank { pub fn rank(&self) -> Rank {
Rank { Rank {
rank: self.max_typo_count - self.typo_count + 1, rank: (self.max_typo_count + 1).saturating_sub(self.typo_count),
max_rank: (self.max_typo_count + 1), max_rank: (self.max_typo_count + 1),
} }
} }
@ -236,7 +269,10 @@ impl Typo {
// rank + typo = max_rank // rank + typo = max_rank
// typo = max_rank - rank // typo = max_rank - rank
pub fn from_rank(rank: Rank) -> Typo { pub fn from_rank(rank: Rank) -> Typo {
Typo { typo_count: rank.max_rank - rank.rank, max_typo_count: rank.max_rank - 1 } Typo {
typo_count: rank.max_rank.saturating_sub(rank.rank),
max_typo_count: rank.max_rank.saturating_sub(1),
}
} }
} }

View File

@ -1,7 +1,7 @@
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait}; use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::score_details::{Rank, ScoreDetails}; use crate::score_details::{self, Rank, ScoreDetails};
use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset}; use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
@ -87,6 +87,6 @@ impl RankingRuleGraphTrait for ExactnessGraph {
} }
fn rank_to_score(rank: Rank) -> ScoreDetails { fn rank_to_score(rank: Rank) -> ScoreDetails {
ScoreDetails::Exactness(rank) ScoreDetails::ExactWords(score_details::ExactWords::from_rank(rank))
} }
} }