mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 18:45:06 +08:00
Introduce an infos subcommand to display the facet values
This commit is contained in:
parent
a18d9a1f87
commit
8e6efe4d87
@ -78,6 +78,16 @@ enum Command {
|
|||||||
words: Vec<String>,
|
words: Vec<String>,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Outputs a CSV with the documents ids along with the facet values where it appears.
|
||||||
|
FacetValuesDocids {
|
||||||
|
/// Display the whole documents ids in details.
|
||||||
|
#[structopt(long)]
|
||||||
|
full_display: bool,
|
||||||
|
|
||||||
|
/// The field name in the document.
|
||||||
|
field_name: String,
|
||||||
|
},
|
||||||
|
|
||||||
/// Outputs the total size of all the docid-word-positions keys and values.
|
/// Outputs the total size of all the docid-word-positions keys and values.
|
||||||
TotalDocidWordPositionsSize,
|
TotalDocidWordPositionsSize,
|
||||||
|
|
||||||
@ -147,6 +157,9 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
|
|||||||
MostCommonWords { limit } => most_common_words(&index, &rtxn, limit),
|
MostCommonWords { limit } => most_common_words(&index, &rtxn, limit),
|
||||||
BiggestValues { limit } => biggest_value_sizes(&index, &rtxn, limit),
|
BiggestValues { limit } => biggest_value_sizes(&index, &rtxn, limit),
|
||||||
WordsDocids { full_display, words } => words_docids(&index, &rtxn, !full_display, words),
|
WordsDocids { full_display, words } => words_docids(&index, &rtxn, !full_display, words),
|
||||||
|
FacetValuesDocids { full_display, field_name } => {
|
||||||
|
facet_values_docids(&index, &rtxn, !full_display, field_name)
|
||||||
|
},
|
||||||
TotalDocidWordPositionsSize => total_docid_word_positions_size(&index, &rtxn),
|
TotalDocidWordPositionsSize => total_docid_word_positions_size(&index, &rtxn),
|
||||||
AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn),
|
AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn),
|
||||||
AverageNumberOfPositionsByWord => {
|
AverageNumberOfPositionsByWord => {
|
||||||
@ -256,6 +269,64 @@ fn words_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, words: Vec<Strin
|
|||||||
Ok(wtr.flush()?)
|
Ok(wtr.flush()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_name: String) -> anyhow::Result<()> {
|
||||||
|
use crate::facet::FacetType;
|
||||||
|
use crate::heed_codec::facet::{FacetValueStringCodec, FacetValueF64Codec, FacetValueI64Codec};
|
||||||
|
use heed::{BytesDecode, Error::Decoding};
|
||||||
|
|
||||||
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
|
let faceted_fields = index.faceted_fields(&rtxn)?;
|
||||||
|
|
||||||
|
let field_id = fields_ids_map.id(&field_name)
|
||||||
|
.with_context(|| format!("field {} not found", field_name))?;
|
||||||
|
let field_type = faceted_fields.get(&field_id)
|
||||||
|
.with_context(|| format!("field {} is not faceted", field_name))?;
|
||||||
|
|
||||||
|
let iter = index.facet_field_id_value_docids.prefix_iter(&rtxn, &[field_id])?;
|
||||||
|
let iter = match field_type {
|
||||||
|
FacetType::String => {
|
||||||
|
let iter = iter
|
||||||
|
.map(|result| result.and_then(|(key, value)| {
|
||||||
|
let (_, key) = FacetValueStringCodec::bytes_decode(key).ok_or(Decoding)?;
|
||||||
|
Ok((key.to_string(), value))
|
||||||
|
}));
|
||||||
|
Box::new(iter) as Box<dyn Iterator<Item=_>>
|
||||||
|
},
|
||||||
|
FacetType::Float => {
|
||||||
|
let iter = iter
|
||||||
|
.map(|result| result.and_then(|(key, value)| {
|
||||||
|
let (_, key) = FacetValueF64Codec::bytes_decode(key).ok_or(Decoding)?;
|
||||||
|
Ok((key.to_string(), value))
|
||||||
|
}));
|
||||||
|
Box::new(iter)
|
||||||
|
},
|
||||||
|
FacetType::Integer => {
|
||||||
|
let iter = iter
|
||||||
|
.map(|result| result.and_then(|(key, value)| {
|
||||||
|
let (_, key) = FacetValueI64Codec::bytes_decode(key).ok_or(Decoding)?;
|
||||||
|
Ok((key.to_string(), value))
|
||||||
|
}));
|
||||||
|
Box::new(iter)
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
let stdout = io::stdout();
|
||||||
|
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
||||||
|
wtr.write_record(&["facet_value", "documents_ids"])?;
|
||||||
|
|
||||||
|
for result in iter {
|
||||||
|
let (value, docids) = result?;
|
||||||
|
let docids = if debug {
|
||||||
|
format!("{:?}", docids)
|
||||||
|
} else {
|
||||||
|
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
||||||
|
};
|
||||||
|
wtr.write_record(&[value, docids])?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(wtr.flush()?)
|
||||||
|
}
|
||||||
|
|
||||||
fn export_words_fst(index: &Index, rtxn: &heed::RoTxn, output: PathBuf) -> anyhow::Result<()> {
|
fn export_words_fst(index: &Index, rtxn: &heed::RoTxn, output: PathBuf) -> anyhow::Result<()> {
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::Write as _;
|
use std::io::Write as _;
|
||||||
|
Loading…
Reference in New Issue
Block a user