get rids of log in milli and add logs for the bucket sort

This commit is contained in:
Tamo 2024-02-06 10:49:23 +01:00
parent 35d8546fc3
commit 3331995976
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
14 changed files with 24 additions and 18 deletions

1
Cargo.lock generated
View File

@ -3813,7 +3813,6 @@ dependencies = [
"json-depth-checker", "json-depth-checker",
"levenshtein_automata", "levenshtein_automata",
"liquid", "liquid",
"log",
"logging_timer", "logging_timer",
"maplit", "maplit",
"md5", "md5",

View File

@ -71,7 +71,6 @@ itertools = "0.11.0"
puffin = "0.16.0" puffin = "0.16.0"
# logging # logging
log = "0.4.20"
logging_timer = "1.1.0" logging_timer = "1.1.0"
csv = "1.3.0" csv = "1.3.0"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }

View File

@ -6,9 +6,9 @@ use charabia::Normalize;
use fst::automaton::{Automaton, Str}; use fst::automaton::{Automaton, Str};
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
use log::error;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap; use roaring::bitmap::RoaringBitmap;
use tracing::error;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};

View File

@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
continue; continue;
} }
let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id());
let entered = span.enter();
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket( let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
ctx, ctx,
logger, logger,
@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
back!(); back!();
continue; continue;
}; };
drop(entered);
ranking_rule_scores.push(next_bucket.score); ranking_rule_scores.push(next_bucket.score);

View File

@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption};
use grenad::{CompressionType, SortAlgorithm}; use grenad::{CompressionType, SortAlgorithm};
use heed::types::{Bytes, DecodeIgnore, SerdeJson}; use heed::types::{Bytes, DecodeIgnore, SerdeJson};
use heed::BytesEncode; use heed::BytesEncode;
use log::debug;
use time::OffsetDateTime; use time::OffsetDateTime;
use tracing::debug;
use self::incremental::FacetsUpdateIncremental; use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk; use super::FacetsUpdateBulk;

View File

@ -78,7 +78,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
}, },
[] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
[(field_id, name)] => { [(field_id, name)] => {
log::info!("Primary key was not specified in index. Inferred to '{name}'"); tracing::info!("Primary key was not specified in index. Inferred to '{name}'");
PrimaryKey::Flat { name, field_id: *field_id } PrimaryKey::Flat { name, field_id: *field_id }
} }
multiple => { multiple => {

View File

@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
if let Ok(float) = original.parse() { if let Ok(float) = original.parse() {
output_numbers.push(float); output_numbers.push(float);
} else { } else {
log::warn!( tracing::warn!(
"Internal error, could not parse a geofield that has been validated. Please open an issue." "Internal error, could not parse a geofield that has been validated. Please open an issue."
) )
} }

View File

@ -186,12 +186,12 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default(); prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default();
let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?; let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?;
if old_prompt != new_prompt { if old_prompt != new_prompt {
log::trace!( tracing::trace!(
"🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}"
); );
VectorStateDelta::NowGenerated(new_prompt) VectorStateDelta::NowGenerated(new_prompt)
} else { } else {
log::trace!("⏭️ Prompt unmodified, skipping"); tracing::trace!("⏭️ Prompt unmodified, skipping");
VectorStateDelta::NoChange VectorStateDelta::NoChange
} }
} else { } else {

View File

@ -14,8 +14,8 @@ use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use crossbeam_channel::Sender; use crossbeam_channel::Sender;
use log::debug;
use rayon::prelude::*; use rayon::prelude::*;
use tracing::debug;
use self::extract_docid_word_positions::extract_docid_word_positions; use self::extract_docid_word_positions::extract_docid_word_positions;
use self::extract_facet_number_docids::extract_facet_number_docids; use self::extract_facet_number_docids::extract_facet_number_docids;

View File

@ -13,11 +13,11 @@ use std::result::Result as StdResult;
use crossbeam_channel::{Receiver, Sender}; use crossbeam_channel::{Receiver, Sender};
use heed::types::Str; use heed::types::Str;
use heed::Database; use heed::Database;
use log::debug;
use rand::SeedableRng; use rand::SeedableRng;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
use tracing::debug;
use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
use self::enrich::enrich_documents_batch; use self::enrich::enrich_documents_batch;

View File

@ -517,7 +517,7 @@ pub(crate) fn write_typed_chunk_into_index(
} }
} }
log::debug!("Finished vector chunk for {}", embedder_name); tracing::debug!("Finished vector chunk for {}", embedder_name);
} }
TypedChunk::ScriptLanguageDocids(sl_map) => { TypedChunk::ScriptLanguageDocids(sl_map) => {
let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids"); let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");

View File

@ -4,7 +4,7 @@ use std::str;
use grenad::CompressionType; use grenad::CompressionType;
use heed::types::Bytes; use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Database}; use heed::{BytesDecode, BytesEncode, Database};
use log::debug; use tracing::debug;
use crate::error::SerializationError; use crate::error::SerializationError;
use crate::heed_codec::StrBEU16Codec; use crate::heed_codec::StrBEU16Codec;

View File

@ -73,7 +73,7 @@ impl Embedder {
let device = match candle_core::Device::cuda_if_available(0) { let device = match candle_core::Device::cuda_if_available(0) {
Ok(device) => device, Ok(device) => device,
Err(error) => { Err(error) => {
log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error);
candle_core::Device::Cpu candle_core::Device::Cpu
} }
}; };

View File

@ -135,12 +135,16 @@ impl Embedder {
let retry_duration = match result { let retry_duration = match result {
Ok(embeddings) => return Ok(embeddings), Ok(embeddings) => return Ok(embeddings),
Err(retry) => { Err(retry) => {
log::warn!("Failed: {}", retry.error); tracing::warn!("Failed: {}", retry.error);
tokenized |= retry.must_tokenize(); tokenized |= retry.must_tokenize();
retry.into_duration(attempt) retry.into_duration(attempt)
} }
}?; }?;
log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis()); tracing::warn!(
"Attempt #{}, retrying after {}ms.",
attempt,
retry_duration.as_millis()
);
tokio::time::sleep(retry_duration).await; tokio::time::sleep(retry_duration).await;
} }
@ -206,7 +210,7 @@ impl Embedder {
.map_err(EmbedError::openai_unexpected) .map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?; .map_err(Retry::retry_later)?;
log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt.");
return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens( return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens(
error_response.error, error_response.error,
@ -227,7 +231,7 @@ impl Embedder {
texts: &[S], texts: &[S],
) -> Result<Vec<Embeddings<f32>>, Retry> { ) -> Result<Vec<Embeddings<f32>>, Retry> {
for text in texts { for text in texts {
log::trace!("Received prompt: {}", text.as_ref()) tracing::trace!("Received prompt: {}", text.as_ref())
} }
let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts }; let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts };
let response = self let response = self
@ -247,7 +251,7 @@ impl Embedder {
.map_err(EmbedError::openai_unexpected) .map_err(EmbedError::openai_unexpected)
.map_err(Retry::retry_later)?; .map_err(Retry::retry_later)?;
log::trace!("response: {:?}", response.data); tracing::trace!("response: {:?}", response.data);
Ok(response Ok(response
.data .data