From 3331995976484b237e051f4c89bb18208cd8f738 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 6 Feb 2024 10:49:23 +0100 Subject: [PATCH] get rids of log in milli and add logs for the bucket sort --- Cargo.lock | 1 - milli/Cargo.toml | 1 - milli/src/search/mod.rs | 2 +- milli/src/search/new/bucket_sort.rs | 4 ++++ milli/src/update/facet/mod.rs | 2 +- milli/src/update/index_documents/enrich.rs | 2 +- .../extract/extract_fid_docid_facet_values.rs | 2 +- .../extract/extract_vector_points.rs | 4 ++-- milli/src/update/index_documents/extract/mod.rs | 2 +- milli/src/update/index_documents/mod.rs | 2 +- milli/src/update/index_documents/typed_chunk.rs | 2 +- milli/src/update/words_prefix_integer_docids.rs | 2 +- milli/src/vector/hf.rs | 2 +- milli/src/vector/openai.rs | 14 +++++++++----- 14 files changed, 24 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 86318ebd3..8dfd5e55e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3813,7 +3813,6 @@ dependencies = [ "json-depth-checker", "levenshtein_automata", "liquid", - "log", "logging_timer", "maplit", "md5", diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 10ad2fb8f..4bc05d2cc 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -71,7 +71,6 @@ itertools = "0.11.0" puffin = "0.16.0" # logging -log = "0.4.20" logging_timer = "1.1.0" csv = "1.3.0" candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 7bac5ea0c..e411bd032 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -6,9 +6,9 @@ use charabia::Normalize; use fst::automaton::{Automaton, Str}; use fst::{IntoStreamer, Streamer}; use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA}; -use log::error; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; +use tracing::error; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index b439b87ec..e7bafaf70 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -166,6 +166,9 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( continue; } + let span = tracing::trace_span!(target: "search::bucket_sort", "next_bucket", id = ranking_rules[cur_ranking_rule_index].id()); + let entered = span.enter(); + let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket( ctx, logger, @@ -175,6 +178,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( back!(); continue; }; + drop(entered); ranking_rule_scores.push(next_bucket.score); diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index ad8a838c8..400507c97 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -85,8 +85,8 @@ use charabia::normalizer::{Normalize, NormalizerOption}; use grenad::{CompressionType, SortAlgorithm}; use heed::types::{Bytes, DecodeIgnore, SerdeJson}; use heed::BytesEncode; -use log::debug; use time::OffsetDateTime; +use tracing::debug; use self::incremental::FacetsUpdateIncremental; use super::FacetsUpdateBulk; diff --git a/milli/src/update/index_documents/enrich.rs b/milli/src/update/index_documents/enrich.rs index 82ea335ae..162136912 100644 --- a/milli/src/update/index_documents/enrich.rs +++ b/milli/src/update/index_documents/enrich.rs @@ -78,7 +78,7 @@ pub fn enrich_documents_batch( }, [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)), [(field_id, name)] => { - log::info!("Primary key was not specified in index. Inferred to '{name}'"); + tracing::info!("Primary key was not specified in index. Inferred to '{name}'"); PrimaryKey::Flat { name, field_id: *field_id } } multiple => { diff --git a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 2449e01cd..1f8af372d 100644 --- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -431,7 +431,7 @@ fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues { if let Ok(float) = original.parse() { output_numbers.push(float); } else { - log::warn!( + tracing::warn!( "Internal error, could not parse a geofield that has been validated. Please open an issue." ) } diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index 06fb94d8c..f59029e50 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -186,12 +186,12 @@ pub fn extract_vector_points( prompt.render(obkv, DelAdd::Deletion, field_id_map).unwrap_or_default(); let new_prompt = prompt.render(obkv, DelAdd::Addition, field_id_map)?; if old_prompt != new_prompt { - log::trace!( + tracing::trace!( "🚀 Changing prompt from\n{old_prompt}\n===to===\n{new_prompt}" ); VectorStateDelta::NowGenerated(new_prompt) } else { - log::trace!("⏭️ Prompt unmodified, skipping"); + tracing::trace!("⏭️ Prompt unmodified, skipping"); VectorStateDelta::NoChange } } else { diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index f0fd97965..357cdf8d7 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -14,8 +14,8 @@ use std::fs::File; use std::io::BufReader; use crossbeam_channel::Sender; -use log::debug; use rayon::prelude::*; +use tracing::debug; use self::extract_docid_word_positions::extract_docid_word_positions; use self::extract_facet_number_docids::extract_facet_number_docids; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index cbcde19fc..ca3d6bdd1 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -13,11 +13,11 @@ use std::result::Result as StdResult; use crossbeam_channel::{Receiver, Sender}; use heed::types::Str; use heed::Database; -use log::debug; use rand::SeedableRng; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use slice_group_by::GroupBy; +use tracing::debug; use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use self::enrich::enrich_documents_batch; diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 7db0279ba..37399dcd5 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -517,7 +517,7 @@ pub(crate) fn write_typed_chunk_into_index( } } - log::debug!("Finished vector chunk for {}", embedder_name); + tracing::debug!("Finished vector chunk for {}", embedder_name); } TypedChunk::ScriptLanguageDocids(sl_map) => { let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids"); diff --git a/milli/src/update/words_prefix_integer_docids.rs b/milli/src/update/words_prefix_integer_docids.rs index 23a676bc8..a05eb8721 100644 --- a/milli/src/update/words_prefix_integer_docids.rs +++ b/milli/src/update/words_prefix_integer_docids.rs @@ -4,7 +4,7 @@ use std::str; use grenad::CompressionType; use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Database}; -use log::debug; +use tracing::debug; use crate::error::SerializationError; use crate::heed_codec::StrBEU16Codec; diff --git a/milli/src/vector/hf.rs b/milli/src/vector/hf.rs index 7acb09aa8..fcb4d12d8 100644 --- a/milli/src/vector/hf.rs +++ b/milli/src/vector/hf.rs @@ -73,7 +73,7 @@ impl Embedder { let device = match candle_core::Device::cuda_if_available(0) { Ok(device) => device, Err(error) => { - log::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); + tracing::warn!("could not initialize CUDA device for Hugging Face embedder, defaulting to CPU: {}", error); candle_core::Device::Cpu } }; diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index 53e8a041b..ff449ac73 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -135,12 +135,16 @@ impl Embedder { let retry_duration = match result { Ok(embeddings) => return Ok(embeddings), Err(retry) => { - log::warn!("Failed: {}", retry.error); + tracing::warn!("Failed: {}", retry.error); tokenized |= retry.must_tokenize(); retry.into_duration(attempt) } }?; - log::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis()); + tracing::warn!( + "Attempt #{}, retrying after {}ms.", + attempt, + retry_duration.as_millis() + ); tokio::time::sleep(retry_duration).await; } @@ -206,7 +210,7 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - log::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); + tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens( error_response.error, @@ -227,7 +231,7 @@ impl Embedder { texts: &[S], ) -> Result>, Retry> { for text in texts { - log::trace!("Received prompt: {}", text.as_ref()) + tracing::trace!("Received prompt: {}", text.as_ref()) } let request = OpenAiRequest { model: self.options.embedding_model.name(), input: texts }; let response = self @@ -247,7 +251,7 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - log::trace!("response: {:?}", response.data); + tracing::trace!("response: {:?}", response.data); Ok(response .data