integrate the new Settings in the dumps

This commit is contained in:
tamo 2021-05-10 20:48:06 +02:00
parent d767990424
commit 7d748fa384
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
5 changed files with 16 additions and 13 deletions

View File

@ -8,7 +8,7 @@ use serde_json::{Map, Value};
use crate::helpers::EnvSizer;
pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Facets, Settings, Checked, Unchecked, UpdateResult};
pub use updates::{Facets, Settings, Checked, Unchecked};
use serde::{de::Deserializer, Deserialize};
mod search;

View File

@ -8,9 +8,10 @@ use log::info;
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
use serde::{Deserialize, Serialize};
use super::{deserialize_some, Index};
use crate::index_controller::UpdateResult;
use super::{deserialize_some, Index};
#[derive(Clone, Default, Debug)]
pub struct Checked;
@ -35,7 +36,11 @@ pub struct Settings<T> {
)]
pub searchable_attributes: Option<Option<Vec<String>>>,
#[serde(default)]
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub attributes_for_faceting: Option<Option<HashMap<String, String>>>,
#[serde(

View File

@ -244,9 +244,6 @@ pub fn load_dump(
// finally we can move all the unprocessed update file into our new DB
let update_path = tmp_dir_path.join("update_files");
let files: Vec<_> = std::fs::read_dir(&db_path.join("updates"))?
.map(|file| file.unwrap().path())
.collect();
let db_update_path = db_path.join("updates/update_files");
eprintln!("path {:?} exists: {:?}", update_path, update_path.exists());
eprintln!(

View File

@ -1,8 +1,8 @@
use std::collections::{BTreeMap, BTreeSet};
use std::{collections::{BTreeMap, BTreeSet}, marker::PhantomData};
use log::warn;
use serde::{Deserialize, Serialize};
use crate::index_controller;
use crate::{index::Unchecked, index_controller};
use crate::index::deserialize_some;
use super::*;
@ -27,7 +27,7 @@ struct Settings {
}
/// we need to **always** be able to convert the old settings to the settings currently being used
impl From<Settings> for index_controller::Settings {
impl From<Settings> for index_controller::Settings<Unchecked> {
fn from(settings: Settings) -> Self {
if settings.synonyms.flatten().is_some() {
error!("`synonyms` are not yet implemented and thus will be ignored");
@ -63,6 +63,7 @@ impl From<Settings> for index_controller::Settings {
}).collect())),
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())),
_kind: PhantomData,
}
}
}
@ -89,9 +90,9 @@ pub fn import_index(size: usize, dump_path: &Path, index_path: &Path, primary_ke
// extract `settings.json` file and import content
let settings = import_settings(&dump_path)?;
let settings: index_controller::Settings = settings.into();
let settings: index_controller::Settings<Unchecked> = settings.into();
let update_builder = UpdateBuilder::new(0);
index.update_settings(&settings, update_builder)?;
index.update_settings(&settings.check(), update_builder)?;
let update_builder = UpdateBuilder::new(1);
let file = File::open(&dump_path.join("documents.jsonl"))?;

View File

@ -1,11 +1,11 @@
use heed::EnvOpenOptions;
use milli::{update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}};
use crate::index::Index;
use crate::index::{Checked, Index};
use crate::index_controller::Settings;
use std::{fs::File, path::Path, sync::Arc};
/// Extract Settings from `settings.json` file present at provided `dir_path`
fn import_settings(dir_path: &Path) -> anyhow::Result<Settings> {
fn import_settings(dir_path: &Path) -> anyhow::Result<Settings<Checked>> {
let path = dir_path.join("settings.json");
let file = File::open(path)?;
let reader = std::io::BufReader::new(file);