mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 02:27:40 +08:00
Introduce the primary key to the Settings builder structure
This commit is contained in:
parent
a7d6930905
commit
713acc408b
@ -55,15 +55,15 @@ pub fn base_setup(conf: &Conf) -> Index {
|
||||
options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
|
||||
options.max_readers(10);
|
||||
let index = Index::new(options, conf.database_name).unwrap();
|
||||
if let Some(primary_key) = conf.primary_key {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
index.put_primary_key(&mut wtxn, primary_key).unwrap();
|
||||
}
|
||||
|
||||
let update_builder = UpdateBuilder::new(0);
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = update_builder.settings(&mut wtxn, &index);
|
||||
|
||||
if let Some(primary_key) = conf.primary_key {
|
||||
builder.set_primary_key(primary_key.to_string());
|
||||
}
|
||||
|
||||
if let Some(criterion) = conf.criterion {
|
||||
builder.reset_filterable_fields();
|
||||
builder.reset_criteria();
|
||||
|
@ -60,6 +60,8 @@ pub enum UserError {
|
||||
MissingDocumentId { document: Object },
|
||||
MissingPrimaryKey,
|
||||
NoSpaceLeftOnDevice,
|
||||
PrimaryKeyCannotBeChanged,
|
||||
PrimaryKeyCannotBeReset,
|
||||
SerdeJson(serde_json::Error),
|
||||
UnknownInternalDocumentId { document_id: DocumentId },
|
||||
}
|
||||
@ -211,6 +213,12 @@ impl fmt::Display for UserError {
|
||||
// TODO where can we find it instead of writing the text ourselves?
|
||||
Self::NoSpaceLeftOnDevice => f.write_str("no space left on device"),
|
||||
Self::InvalidStoreFile => f.write_str("store file is not a valid database file"),
|
||||
Self::PrimaryKeyCannotBeChanged => {
|
||||
f.write_str("primary key cannot be changed if the database contains documents")
|
||||
},
|
||||
Self::PrimaryKeyCannotBeReset => {
|
||||
f.write_str("primary key cannot be reset if the database contains documents")
|
||||
},
|
||||
Self::SerdeJson(error) => error.fmt(f),
|
||||
Self::UnknownInternalDocumentId { document_id } => {
|
||||
write!(f, "an unknown internal document id have been used ({})", document_id)
|
||||
|
@ -184,7 +184,7 @@ impl Index {
|
||||
/* documents ids */
|
||||
|
||||
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
|
||||
pub fn put_documents_ids(&self, wtxn: &mut RwTxn, docids: &RoaringBitmap) -> heed::Result<()> {
|
||||
pub(crate) fn put_documents_ids(&self, wtxn: &mut RwTxn, docids: &RoaringBitmap) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, RoaringBitmapCodec>(wtxn, main_key::DOCUMENTS_IDS_KEY, docids)
|
||||
}
|
||||
|
||||
@ -202,7 +202,7 @@ impl Index {
|
||||
/* primary key */
|
||||
|
||||
/// Writes the documents primary key, this is the field name that is used to store the id.
|
||||
pub fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
|
||||
pub(crate) fn put_primary_key(&self, wtxn: &mut RwTxn, primary_key: &str) -> heed::Result<()> {
|
||||
self.set_updated_at(wtxn, &Utc::now())?;
|
||||
self.main.put::<_, Str, Str>(wtxn, main_key::PRIMARY_KEY_KEY, &primary_key)
|
||||
}
|
||||
@ -220,7 +220,7 @@ impl Index {
|
||||
/* external documents ids */
|
||||
|
||||
/// Writes the external documents ids and internal ids (i.e. `u32`).
|
||||
pub fn put_external_documents_ids<'a>(
|
||||
pub(crate) fn put_external_documents_ids<'a>(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
external_documents_ids: &ExternalDocumentsIds<'a>,
|
||||
@ -254,7 +254,7 @@ impl Index {
|
||||
|
||||
/// Writes the fields ids map which associate the documents keys with an internal field id
|
||||
/// (i.e. `u8`), this field id is used to identify fields in the obkv documents.
|
||||
pub fn put_fields_ids_map(&self, wtxn: &mut RwTxn, map: &FieldsIdsMap) -> heed::Result<()> {
|
||||
pub(crate) fn put_fields_ids_map(&self, wtxn: &mut RwTxn, map: &FieldsIdsMap) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<FieldsIdsMap>>(wtxn, main_key::FIELDS_IDS_MAP_KEY, map)
|
||||
}
|
||||
|
||||
@ -271,7 +271,7 @@ impl Index {
|
||||
|
||||
/// Writes the fields distribution which associates every field name with
|
||||
/// the number of times it occurs in the documents.
|
||||
pub fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> {
|
||||
pub(crate) fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(wtxn, main_key::FIELDS_DISTRIBUTION_KEY, distribution)
|
||||
}
|
||||
|
||||
@ -288,7 +288,7 @@ impl Index {
|
||||
|
||||
/// Writes the fields that must be displayed in the defined order.
|
||||
/// There must be not be any duplicate field id.
|
||||
pub fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||
pub(crate) fn put_displayed_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::DISPLAYED_FIELDS_KEY, &fields)
|
||||
}
|
||||
|
||||
@ -328,7 +328,7 @@ impl Index {
|
||||
/* searchable fields */
|
||||
|
||||
/// Writes the searchable fields, when this list is specified, only these are indexed.
|
||||
pub fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||
pub(crate) fn put_searchable_fields(&self, wtxn: &mut RwTxn, fields: &[&str]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeBincode<&[&str]>>(wtxn, main_key::SEARCHABLE_FIELDS_KEY, &fields)
|
||||
}
|
||||
|
||||
@ -367,7 +367,7 @@ impl Index {
|
||||
/* filterable fields */
|
||||
|
||||
/// Writes the filterable fields names in the database.
|
||||
pub fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
|
||||
pub(crate) fn put_filterable_fields(&self, wtxn: &mut RwTxn, fields: &HashSet<String>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<_>>(wtxn, main_key::FILTERABLE_FIELDS_KEY, fields)
|
||||
}
|
||||
|
||||
@ -453,7 +453,7 @@ impl Index {
|
||||
/* faceted documents ids */
|
||||
|
||||
/// Writes the documents ids that are faceted with numbers under this field id.
|
||||
pub fn put_number_faceted_documents_ids(
|
||||
pub(crate) fn put_number_faceted_documents_ids(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
field_id: FieldId,
|
||||
@ -485,7 +485,7 @@ impl Index {
|
||||
}
|
||||
|
||||
/// Writes the documents ids that are faceted with strings under this field id.
|
||||
pub fn put_string_faceted_documents_ids(
|
||||
pub(crate) fn put_string_faceted_documents_ids(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
field_id: FieldId,
|
||||
@ -532,7 +532,7 @@ impl Index {
|
||||
|
||||
/* criteria */
|
||||
|
||||
pub fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
|
||||
pub(crate) fn put_criteria(&self, wtxn: &mut RwTxn, criteria: &[Criterion]) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<&[Criterion]>>(wtxn, main_key::CRITERIA_KEY, &criteria)
|
||||
}
|
||||
|
||||
@ -550,7 +550,7 @@ impl Index {
|
||||
/* words fst */
|
||||
|
||||
/// Writes the FST which is the words dictionary of the engine.
|
||||
pub fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||
pub(crate) fn put_words_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_FST_KEY, fst.as_fst().as_bytes())
|
||||
}
|
||||
|
||||
@ -564,7 +564,7 @@ impl Index {
|
||||
|
||||
/* stop words */
|
||||
|
||||
pub fn put_stop_words<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||
pub(crate) fn put_stop_words<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::STOP_WORDS_KEY, fst.as_fst().as_bytes())
|
||||
}
|
||||
|
||||
@ -581,7 +581,7 @@ impl Index {
|
||||
|
||||
/* synonyms */
|
||||
|
||||
pub fn put_synonyms(
|
||||
pub(crate) fn put_synonyms(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
|
||||
@ -611,7 +611,7 @@ impl Index {
|
||||
/* words prefixes fst */
|
||||
|
||||
/// Writes the FST which is the words prefixes dictionnary of the engine.
|
||||
pub fn put_words_prefixes_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||
pub(crate) fn put_words_prefixes_fst<A: AsRef<[u8]>>(&self, wtxn: &mut RwTxn, fst: &fst::Set<A>) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, ByteSlice>(wtxn, main_key::WORDS_PREFIXES_FST_KEY, fst.as_fst().as_bytes())
|
||||
}
|
||||
|
||||
|
@ -375,7 +375,7 @@ impl Transform<'_, '_> {
|
||||
|
||||
// Once we have sort and deduplicated the documents we write them into a final file.
|
||||
let mut final_sorter = create_sorter(
|
||||
|_id, _obkvs| Err(InternalError::IndexingMergingKeys { process: "merging documents" }),
|
||||
|_id, _obkvs| Err(InternalError::IndexingMergingKeys { process: "documents" }),
|
||||
self.chunk_compression_type,
|
||||
self.chunk_compression_level,
|
||||
self.chunk_fusing_shrink_size,
|
||||
|
@ -72,6 +72,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
distinct_field: Setting<String>,
|
||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||
primary_key: Setting<String>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
@ -98,6 +99,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
stop_words: Setting::NotSet,
|
||||
distinct_field: Setting::NotSet,
|
||||
synonyms: Setting::NotSet,
|
||||
primary_key: Setting::NotSet,
|
||||
update_id,
|
||||
}
|
||||
}
|
||||
@ -166,6 +168,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_primary_key(&mut self) {
|
||||
self.primary_key = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_primary_key(&mut self, primary_key: String) {
|
||||
self.primary_key = Setting::Set(primary_key);
|
||||
}
|
||||
|
||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep, u64) + Sync
|
||||
@ -423,6 +433,31 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_primary_key(&mut self) -> Result<()> {
|
||||
match self.primary_key {
|
||||
Setting::Set(ref primary_key) => {
|
||||
if self.index.number_of_documents(&self.wtxn)? == 0 {
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
fields_ids_map.insert(primary_key).ok_or(UserError::AttributeLimitReached)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
self.index.put_primary_key(self.wtxn, primary_key)?;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(UserError::PrimaryKeyCannotBeChanged.into())
|
||||
}
|
||||
},
|
||||
Setting::Reset => {
|
||||
if self.index.number_of_documents(&self.wtxn)? == 0 {
|
||||
self.index.delete_primary_key(self.wtxn)?;
|
||||
Ok(())
|
||||
} else {
|
||||
Err(UserError::PrimaryKeyCannotBeReset.into())
|
||||
}
|
||||
},
|
||||
Setting::NotSet => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
||||
where
|
||||
F: Fn(UpdateIndexingStep, u64) + Sync
|
||||
@ -436,6 +471,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||
self.update_filterable()?;
|
||||
self.update_distinct_field()?;
|
||||
self.update_criteria()?;
|
||||
self.update_primary_key()?;
|
||||
|
||||
// If there is new faceted fields we indicate that we must reindex as we must
|
||||
// index new fields as facets. It means that the distinct attribute,
|
||||
@ -462,8 +498,9 @@ mod tests {
|
||||
use maplit::{btreeset, hashmap, hashset};
|
||||
use big_s::S;
|
||||
|
||||
use crate::{Criterion, FilterCondition, SearchResult};
|
||||
use crate::error::Error;
|
||||
use crate::update::{IndexDocuments, UpdateFormat};
|
||||
use crate::{Criterion, FilterCondition, SearchResult};
|
||||
|
||||
use super::*;
|
||||
|
||||
@ -977,4 +1014,54 @@ mod tests {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
FilterCondition::from_str(&rtxn, &index, "toto = 32").unwrap_err();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn setting_primary_key() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
// Set the primary key settings
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_primary_key(S("mykey"));
|
||||
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey"));
|
||||
|
||||
// Then index some documents with the "mykey" primary key.
|
||||
let content = &br#"[
|
||||
{ "mykey": 1, "name": "kevin", "age": 23 },
|
||||
{ "mykey": 2, "name": "kevina", "age": 21 },
|
||||
{ "mykey": 3, "name": "benoit", "age": 34 },
|
||||
{ "mykey": 4, "name": "bernard", "age": 34 },
|
||||
{ "mykey": 5, "name": "bertrand", "age": 34 },
|
||||
{ "mykey": 6, "name": "bernie", "age": 34 },
|
||||
{ "mykey": 7, "name": "ben", "age": 34 }
|
||||
]"#[..];
|
||||
let mut builder = IndexDocuments::new(&mut wtxn, &index, 1);
|
||||
builder.update_format(UpdateFormat::Json);
|
||||
builder.disable_autogenerate_docids();
|
||||
builder.execute(content, |_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
// We now try to reset the primary key
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.reset_primary_key();
|
||||
|
||||
let err = builder.execute(|_, _| ()).unwrap_err();
|
||||
assert!(matches!(err, Error::UserError(UserError::PrimaryKeyCannotBeReset)));
|
||||
|
||||
// But if we clear the database...
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let builder = ClearDocuments::new(&mut wtxn, &index, 0);
|
||||
builder.execute().unwrap();
|
||||
|
||||
// ...we can change the primary key
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_primary_key(S("myid"));
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
use milli::{Search, SearchResult, Criterion};
|
||||
use big_s::S;
|
||||
use milli::update::Settings;
|
||||
use milli::{Search, SearchResult, Criterion};
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
use Criterion::*;
|
||||
@ -189,7 +190,9 @@ fn criteria_mixup() {
|
||||
eprintln!("Testing with criteria order: {:?}", &criteria);
|
||||
//update criteria
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
index.put_criteria(&mut wtxn, &criteria).unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, 0);
|
||||
builder.set_criteria(criteria.iter().map(ToString::to_string).collect());
|
||||
builder.execute(|_, _| ()).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let mut rtxn = index.read_txn().unwrap();
|
||||
|
Loading…
Reference in New Issue
Block a user