mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-30 00:55:00 +08:00
get rids of log in milli and add logs for the bucket sort
This commit is contained in:
parent
35d8546fc3
commit
3331995976
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -3813,7 +3813,6 @@ dependencies = [
|
|||||||
"json-depth-checker",
|
"json-depth-checker",
|
||||||
"levenshtein_automata",
|
"levenshtein_automata",
|
||||||
"liquid",
|
"liquid",
|
||||||
"log",
|
|
||||||
"logging_timer",
|
"logging_timer",
|
||||||
"maplit",
|
"maplit",
|
||||||
"md5",
|
"md5",
|
||||||
|
@ -71,7 +71,6 @@ itertools = "0.11.0"
|
|||||||
puffin = "0.16.0"
|
puffin = "0.16.0"
|
||||||
|
|
||||||
# logging
|
# logging
|
||||||
log = "0.4.20"
|
|
||||||
logging_timer = "1.1.0"
|
logging_timer = "1.1.0"
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
|
||||||
|
@ -6,9 +6,9 @@ use charabia::Normalize;
|
|||||||
use fst::automaton::{Automaton, Str};
|
use fst::automaton::{Automaton, Str};
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||||
use log::error;
|
|
||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
|
use tracing::error;
|
||||||
|
|
||||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
||||||
|
@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
|
||||||
|
let entered = span.enter();
|
||||||
|
|
||||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
|
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
|
||||||
ctx,
|
ctx,
|
||||||
logger,
|
logger,
|
||||||
@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
back!();
|
back!();
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
|
drop(entered);
|
||||||
|
|
||||||
ranking_rule_scores.push(next_bucket.score);
|
ranking_rule_scores.push(next_bucket.score);
|
||||||
|
|
||||||
|
@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption};
|
|||||||
use grenad::{CompressionType, SortAlgorithm};
|
use grenad::{CompressionType, SortAlgorithm};
|
||||||
use heed::types::{Bytes, DecodeIgnore, SerdeJson};
|
use heed::types::{Bytes, DecodeIgnore, SerdeJson};
|
||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
use log::debug;
|
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
use self::incremental::FacetsUpdateIncremental;
|
use self::incremental::FacetsUpdateIncremental;
|
||||||
use super::FacetsUpdateBulk;
|
use super::FacetsUpdateBulk;
|
||||||
|
@ -78,7 +78,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
|||||||
},
|
},
|
||||||
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
|
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
|
||||||
[(field_id, name)] => {
|
[(field_id, name)] => {
|
||||||
log::info!("Primary key was not specified in index. Inferred to '{name}'");
|
tracing::info!("Primary key was not specified in index. Inferred to '{name}'");
|
||||||
PrimaryKey::Flat { name, field_id: *field_id }
|
PrimaryKey::Flat { name, field_id: *field_id }
|
||||||
}
|
}
|
||||||
multiple => {
|
multiple => {
|
||||||
|
@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
|
|||||||
if let Ok(float) = original.parse() {
|
if let Ok(float) = original.parse() {
|
||||||
output_numbers.push(float);
|
output_numbers.push(float);
|
||||||
} else {
|
} else {
|
||||||
log::warn!(
|
tracing::warn!(
|
||||||
"Internal error, could not parse a geofield that has been validated. Please open an issue."
|
"Internal error, could not parse a geofield that has been validated. Please open an issue."
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -186,12 +186,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default();
|
prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default();
|
||||||
let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?;
|
let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?;
|
||||||
if old_prompt != new_prompt {
|
if old_prompt != new_prompt {
|
||||||
log::trace!(
|
tracing::trace!(
|
||||||
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
|
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
|
||||||
);
|
);
|
||||||
VectorStateDelta::NowGenerated(new_prompt)
|
VectorStateDelta::NowGenerated(new_prompt)
|
||||||
} else {
|
} else {
|
||||||
log::trace!("⏭️ Prompt unmodified, skipping");
|
tracing::trace!("⏭️ Prompt unmodified, skipping");
|
||||||
VectorStateDelta::NoChange
|
VectorStateDelta::NoChange
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -14,8 +14,8 @@ use std::fs::File;
|
|||||||
use std::io::BufReader;
|
use std::io::BufReader;
|
||||||
|
|
||||||
use crossbeam_channel::Sender;
|
use crossbeam_channel::Sender;
|
||||||
use log::debug;
|
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
|
use tracing::debug;
|
||||||
|
|
||||||
use self::extract_docid_word_positions::extract_docid_word_positions;
|
use self::extract_docid_word_positions::extract_docid_word_positions;
|
||||||
use self::extract_facet_number_docids::extract_facet_number_docids;
|
use self::extract_facet_number_docids::extract_facet_number_docids;
|
||||||
|
@ -13,11 +13,11 @@ use std::result::Result as StdResult;
|
|||||||
use crossbeam_channel::{Receiver, Sender};
|
use crossbeam_channel::{Receiver, Sender};
|
||||||
use heed::types::Str;
|
use heed::types::Str;
|
||||||
use heed::Database;
|
use heed::Database;
|
||||||
use log::debug;
|
|
||||||
use rand::SeedableRng;
|
use rand::SeedableRng;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
use tracing::debug;
|
||||||
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
|
||||||
|
|
||||||
use self::enrich::enrich_documents_batch;
|
use self::enrich::enrich_documents_batch;
|
||||||
|
@ -517,7 +517,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log::debug!("Finished vector chunk for {}", embedder_name);
|
tracing::debug!("Finished vector chunk for {}", embedder_name);
|
||||||
}
|
}
|
||||||
TypedChunk::ScriptLanguageDocids(sl_map) => {
|
TypedChunk::ScriptLanguageDocids(sl_map) => {
|
||||||
let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");
|
let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");
|
||||||
|
@ -4,7 +4,7 @@ use std::str;
|
|||||||
use grenad::CompressionType;
|
use grenad::CompressionType;
|
||||||
use heed::types::Bytes;
|
use heed::types::Bytes;
|
||||||
use heed::{BytesDecode, BytesEncode, Database};
|
use heed::{BytesDecode, BytesEncode, Database};
|
||||||
use log::debug;
|
use tracing::debug;
|
||||||
|
|
||||||
use crate::error::SerializationError;
|
use crate::error::SerializationError;
|
||||||
use crate::heed_codec::StrBEU16Codec;
|
use crate::heed_codec::StrBEU16Codec;
|
||||||
|
@ -73,7 +73,7 @@ impl Embedder {
|
|||||||
let device = match candle_core::Device::cuda_if_available(0) {
|
let device = match candle_core::Device::cuda_if_available(0) {
|
||||||
Ok(device) => device,
|
Ok(device) => device,
|
||||||
Err(error) => {
|
Err(error) => {
|
||||||
log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
|
tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
|
||||||
candle_core::Device::Cpu
|
candle_core::Device::Cpu
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -135,12 +135,16 @@ impl Embedder {
|
|||||||
let retry_duration = match result {
|
let retry_duration = match result {
|
||||||
Ok(embeddings) => return Ok(embeddings),
|
Ok(embeddings) => return Ok(embeddings),
|
||||||
Err(retry) => {
|
Err(retry) => {
|
||||||
log::warn!("Failed: {}", retry.error);
|
tracing::warn!("Failed: {}", retry.error);
|
||||||
tokenized |= retry.must_tokenize();
|
tokenized |= retry.must_tokenize();
|
||||||
retry.into_duration(attempt)
|
retry.into_duration(attempt)
|
||||||
}
|
}
|
||||||
}?;
|
}?;
|
||||||
log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis());
|
tracing::warn!(
|
||||||
|
"Attempt #{}, retrying after {}ms.",
|
||||||
|
attempt,
|
||||||
|
retry_duration.as_millis()
|
||||||
|
);
|
||||||
tokio::time::sleep(retry_duration).await;
|
tokio::time::sleep(retry_duration).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -206,7 +210,7 @@ impl Embedder {
|
|||||||
.map_err(EmbedError::openai_unexpected)
|
.map_err(EmbedError::openai_unexpected)
|
||||||
.map_err(Retry::retry_later)?;
|
.map_err(Retry::retry_later)?;
|
||||||
|
|
||||||
log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
|
tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
|
||||||
|
|
||||||
return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
|
return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
|
||||||
error_response.error,
|
error_response.error,
|
||||||
@ -227,7 +231,7 @@ impl Embedder {
|
|||||||
texts: &[S],
|
texts: &[S],
|
||||||
) -> Result<Vec<Embeddings<f32>>, Retry> {
|
) -> Result<Vec<Embeddings<f32>>, Retry> {
|
||||||
for text in texts {
|
for text in texts {
|
||||||
log::trace!("Received prompt: {}", text.as_ref())
|
tracing::trace!("Received prompt: {}", text.as_ref())
|
||||||
}
|
}
|
||||||
let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts };
|
let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts };
|
||||||
let response = self
|
let response = self
|
||||||
@ -247,7 +251,7 @@ impl Embedder {
|
|||||||
.map_err(EmbedError::openai_unexpected)
|
.map_err(EmbedError::openai_unexpected)
|
||||||
.map_err(Retry::retry_later)?;
|
.map_err(Retry::retry_later)?;
|
||||||
|
|
||||||
log::trace!("response: {:?}", response.data);
|
tracing::trace!("response: {:?}", response.data);
|
||||||
|
|
||||||
Ok(response
|
Ok(response
|
||||||
.data
|
.data
|
||||||
|
Loading…
Reference in New Issue
Block a user