636: Remove unused `infos`, `http-ui`, and `milli/fuzz`, crates r=ManyTheFish a=loiclec

We haven't used the `infos/`, `http-ui/` and `milli/fuzz/` crates in a long time. They are not properly maintained and probably do not work correctly anymore.

This PR removes these crates entirely from the workspace to reduce the amount of code we need to maintain.

Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
This commit is contained in:
bors[bot] 2022-09-14 12:39:57 +00:00 committed by GitHub
commit d94339a858
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 2 additions and 3637 deletions

View File

@ -45,19 +45,6 @@ We recommend using the `--release` flag to test the full performance.
cargo test cargo test
``` ```
### Querying the engine via the web interface
To help you develop your feature you might need to use a web interface! You can query the engine by going to [the HTML page itself](http://127.0.0.1:9700).
### Compile and run the HTTP debug server
You can specify the number of threads to use to index documents and many other settings too.
```bash
cd http-ui
cargo run --release -- --db my-database.mdb -vvv --indexing-jobs 8
```
### Index your documents ### Index your documents
It can index a massive amount of documents in not much time, I already achieved to index: It can index a massive amount of documents in not much time, I already achieved to index:

View File

@ -1,6 +1,6 @@
[workspace] [workspace]
resolver = "2" resolver = "2"
members = ["milli", "filter-parser", "flatten-serde-json", "json-depth-checker", "http-ui", "benchmarks", "infos", "helpers", "cli"] members = ["milli", "filter-parser", "flatten-serde-json", "json-depth-checker", "benchmarks", "helpers", "cli"]
default-members = ["milli"] default-members = ["milli"]
[profile.dev] [profile.dev]

View File

@ -1,5 +1,5 @@
<p align="center"> <p align="center">
<img alt="the milli logo" src="http-ui/public/logo-black.svg"> <img alt="the milli logo" src="logo-black.svg">
</p> </p>
<p align="center">a concurrent indexer combined with fast and relevant search algorithms</p> <p align="center">a concurrent indexer combined with fast and relevant search algorithms</p>
@ -19,8 +19,6 @@ This repository contains crates to quickly debug the engine:
- The `filter-parser` crate contains the parser for the Meilisearch filter syntax. - The `filter-parser` crate contains the parser for the Meilisearch filter syntax.
- The `flatten-serde-json` crate contains the library that flattens serde-json `Value` objects like Elasticsearch does. - The `flatten-serde-json` crate contains the library that flattens serde-json `Value` objects like Elasticsearch does.
- The `helpers` crate is only used to do operations on the database. - The `helpers` crate is only used to do operations on the database.
- The `http-ui` crate is a simple HTTP dashboard to test the features like for real!
- The `infos` crate is used to dump the internal data-structure and ensure correctness.
- The `json-depth-checker` crate is used to indicate if a JSON must be flattened. - The `json-depth-checker` crate is used to indicate if a JSON must be flattened.
## How to use it? ## How to use it?

View File

@ -1,47 +0,0 @@
[package]
name = "http-ui"
description = "The HTTP user interface of the milli search engine"
version = "0.33.4"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
publish = false
[dependencies]
anyhow = "1.0.56"
byte-unit = { version = "4.0.14", default-features = false, features = ["std"] }
crossbeam-channel = "0.5.2"
memmap2 = "0.5.3"
milli = { path = "../milli" }
mimalloc = { version = "0.1.29", default-features = false }
once_cell = "1.10.0"
rayon = "1.5.1"
structopt = { version = "0.3.26", default-features = false, features = ["wrap_help"] }
tempfile = "3.3.0"
# http server
askama = "0.11.1"
askama_warp = "0.12.0"
bytes = "1.1.0"
either = "1.6.1"
flate2 = "1.0.22"
futures = "0.3.21"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.79", features = ["preserve_order"] }
tokio = { version = "1.17.0", features = ["full"] }
tokio-stream = { version = "0.1.8", default-features = false, features = ["sync"] }
warp = "0.3.2"
# logging
fst = "0.4.7"
log = "0.4.17"
stderrlog = "0.5.1"
# Temporary fix for bitvec, remove once fixed. (https://github.com/bitvecto-rs/bitvec/issues/105)
bimap = "0.6.2"
csv = "1.1.6"
funty = "2.0.0"
[dev-dependencies]
maplit = "1.0.2"
serde_test = "1.0.136"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -1,5 +0,0 @@
/*
2020 Jason Mulligan <jason.mulligan@avoidwork.com>
@version 6.1.0
*/
"use strict";!function(e){var x=/^(b|B)$/,M={iec:{bits:["b","Kib","Mib","Gib","Tib","Pib","Eib","Zib","Yib"],bytes:["B","KiB","MiB","GiB","TiB","PiB","EiB","ZiB","YiB"]},jedec:{bits:["b","Kb","Mb","Gb","Tb","Pb","Eb","Zb","Yb"],bytes:["B","KB","MB","GB","TB","PB","EB","ZB","YB"]}},w={iec:["","kibi","mebi","gibi","tebi","pebi","exbi","zebi","yobi"],jedec:["","kilo","mega","giga","tera","peta","exa","zetta","yotta"]};function t(e){var i,t,o,n,b,r,a,l,s,d,u,c,f,p,B,y=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{},g=[],v=0,m=void 0,h=void 0;if(isNaN(e))throw new TypeError("Invalid number");return t=!0===y.bits,u=!0===y.unix,i=y.base||2,d=void 0!==y.round?y.round:u?1:2,r=void 0!==y.locale?y.locale:"",a=y.localeOptions||{},c=void 0!==y.separator?y.separator:"",f=void 0!==y.spacer?y.spacer:u?"":" ",B=y.symbols||{},p=2===i&&y.standard||"jedec",s=y.output||"string",n=!0===y.fullform,b=y.fullforms instanceof Array?y.fullforms:[],m=void 0!==y.exponent?y.exponent:-1,o=2<i?1e3:1024,(l=(h=Number(e))<0)&&(h=-h),(-1===m||isNaN(m))&&(m=Math.floor(Math.log(h)/Math.log(o)))<0&&(m=0),8<m&&(m=8),"exponent"===s?m:(0===h?(g[0]=0,g[1]=u?"":M[p][t?"bits":"bytes"][m]):(v=h/(2===i?Math.pow(2,10*m):Math.pow(1e3,m)),t&&o<=(v*=8)&&m<8&&(v/=o,m++),g[0]=Number(v.toFixed(0<m?d:0)),g[0]===o&&m<8&&void 0===y.exponent&&(g[0]=1,m++),g[1]=10===i&&1===m?t?"kb":"kB":M[p][t?"bits":"bytes"][m],u&&(g[1]="jedec"===p?g[1].charAt(0):0<m?g[1].replace(/B$/,""):g[1],x.test(g[1])&&(g[0]=Math.floor(g[0]),g[1]=""))),l&&(g[0]=-g[0]),g[1]=B[g[1]]||g[1],!0===r?g[0]=g[0].toLocaleString():0<r.length?g[0]=g[0].toLocaleString(r,a):0<c.length&&(g[0]=g[0].toString().replace(".",c)),"array"===s?g:(n&&(g[1]=b[m]?b[m]:w[p][m]+(t?"bit":"byte")+(1===g[0]?"":"s")),"object"===s?{value:g[0],symbol:g[1],exponent:m}:g.join(f)))}t.partial=function(i){return function(e){return t(e,i)}},"undefined"!=typeof exports?module.exports=t:"function"==typeof define&&void 0!==define.amd?define(function(){return t}):e.filesize=t}("undefined"!=typeof window?window:global);

File diff suppressed because one or more lines are too long

View File

@ -1,6 +0,0 @@
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.085 190L242.907 86H276.196L246.375 190H213.085Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M0 190L29.8215 86H63.1111L33.2896 190H0Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M124.986 0L57.5772 235.083L60.7752 236H90.6038L158.276 0H124.986Z" fill="#494949"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.273 0L127.601 236H160.891L228.563 0H195.273Z" fill="#494949"/>
</svg>

Before

Width:  |  Height:  |  Size: 585 B

View File

@ -1,6 +0,0 @@
<svg width="277" height="236" viewBox="0 0 277 236" fill="none" xmlns="http://www.w3.org/2000/svg">
<path fill-rule="evenodd" clip-rule="evenodd" d="M213.508 190L243.33 86H276.619L246.798 190H213.508Z" fill="#B5B5B5"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M0.422791 190L30.2443 86H63.5339L33.7124 190H0.422791Z" fill="#B5B5B5"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M125.409 0L58 235.083L61.198 236H91.0266L158.699 0H125.409Z" fill="#B5B5B5"/>
<path fill-rule="evenodd" clip-rule="evenodd" d="M195.696 0L128.024 236H161.313L228.985 0H195.696Z" fill="#B5B5B5"/>
</svg>

Before

Width:  |  Height:  |  Size: 592 B

View File

@ -1,154 +0,0 @@
var request = null;
var timeoutID = null;
var display_facets = false;
$('#query, #filters').on('input', function () {
var query = $('#query').val();
var filters = $('#filters').val();
var timeoutMs = 100;
if (timeoutID !== null) {
window.clearTimeout(timeoutID);
}
timeoutID = window.setTimeout(function () {
request = $.ajax({
type: "POST",
url: "query",
contentType: 'application/json',
data: JSON.stringify({
'query': query,
'filters': filters,
"facetDistribution": display_facets,
}),
contentType: 'application/json',
success: function (data, textStatus, request) {
results.innerHTML = '';
facets.innerHTML = '';
let timeSpent = request.getResponseHeader('Time-Ms');
let numberOfDocuments = data.documents.length;
count.innerHTML = data.numberOfCandidates.toLocaleString();
time.innerHTML = `${timeSpent}ms`;
time.classList.remove('fade-in-out');
for (facet_name in data.facets) {
for (value in data.facets[facet_name]) {
const elem = document.createElement('span');
const count = data.facets[facet_name][value];
elem.classList.add("tag");
elem.setAttribute('data-name', facet_name);
elem.setAttribute('data-value', value);
elem.innerHTML = `${facet_name}:${value} (${count})`;
facets.appendChild(elem);
}
}
for (element of data.documents) {
const elem = document.createElement('li');
elem.classList.add("document");
const ol = document.createElement('ol');
for (const prop in element) {
const field = document.createElement('li');
field.classList.add("field");
const attribute = document.createElement('div');
attribute.classList.add("attribute");
attribute.innerHTML = prop;
const content = document.createElement('div');
content.classList.add("content");
// Stringify Objects and Arrays to avoid [Object object]
if (typeof element[prop] === 'object' && element[prop] !== null) {
content.innerHTML = JSON.stringify(element[prop]);
} else {
content.innerHTML = element[prop];
}
field.appendChild(attribute);
field.appendChild(content);
ol.appendChild(field);
}
elem.appendChild(ol);
results.appendChild(elem);
}
// When we click on a tag we append the facet value
// at the end of the facet query.
$('#facets .tag').on('click', function () {
let name = $(this).attr("data-name");
let value = $(this).attr("data-value");
let facet_query = $('#filters').val().trim();
if (facet_query === "") {
$('#filters').val(`${name} = "${value}"`).trigger('input');
} else {
$('#filters').val(`${facet_query} AND ${name} = "${value}"`).trigger('input');
}
});
},
beforeSend: function () {
if (request !== null) {
request.abort();
time.classList.add('fade-in-out');
}
},
});
}, timeoutMs);
});
function diffArray(arr1, arr2) {
return arr1.concat(arr2).filter(function (val) {
if (!(arr1.includes(val) && arr2.includes(val)))
return val;
});
}
function selectedFacetsToArray(facets_obj) {
var array = [];
for (const facet_name in facets_obj) {
var subarray = [];
for (const facet_value of facets_obj[facet_name]) {
subarray.push(`${facet_name}:${facet_value}`);
}
array.push(subarray);
}
return array;
}
$('#display_facets').click(function() {
if (display_facets) {
display_facets = false;
$('#display_facets').html("Display facets")
$('#display_facets').removeClass("is-danger");
$('#display_facets').addClass("is-success");
$('#facets').hide();
} else {
display_facets = true;
$('#display_facets').html("Hide facets")
$('#display_facets').addClass("is-danger");
$('#display_facets').removeClass("is-success");
$('#facets').show();
}
});
// Make the number of document a little bit prettier
$('#docs-count').text(function(index, text) {
return parseInt(text).toLocaleString()
});
// Make the database a little bit easier to read
$('#db-size').text(function(index, text) {
return filesize(parseInt(text))
});
// We trigger the input when we load the script.
$(window).on('load', function () {
// We execute a placeholder search when the input is empty.
$('#query').trigger('input');
});

View File

@ -1,144 +0,0 @@
#results {
max-width: 900px;
margin: 20px auto 0 auto;
padding: 0;
}
#facets .tag {
margin-right: 1em;
margin-bottom: 1em;
}
#facets {
display: none;
max-width: 900px;
margin: 20px auto 0 auto;
padding: 0;
max-height: 16em;
overflow: scroll;
}
#display_facets {
margin: 20px auto 0 auto;
padding: 5px;
max-height: 16em;
overflow: scroll;
}
#facets .tag:hover {
cursor: pointer;
}
#logo-white {
display: none;
}
#logo-black {
display: inherit;
}
.notification {
display: flex;
justify-content: center;
}
.document {
padding: 20px 20px;
background-color: #f5f5f5;
border-radius: 4px;
margin-bottom: 20px;
display: flex;
}
.document ol {
flex: 0 0 75%;
max-width: 75%;
padding: 0;
margin: 0;
}
.document .image {
max-width: 25%;
flex: 0 0 25%;
padding-left: 30px;
box-sizing: border-box;
}
.document .image img {
width: 100%;
}
.field {
list-style-type: none;
display: flex;
flex-wrap: wrap;
}
.field:not(:last-child) {
margin-bottom: 7px;
}
.attribute {
flex: 0 0 35%;
max-width: 35%;
text-align: right;
padding-right: 10px;
box-sizing: border-box;
text-transform: uppercase;
opacity: 0.7;
}
.content {
max-width: 65%;
flex: 0 0 65%;
box-sizing: border-box;
padding-left: 10px;
color: rgba(0,0,0,.9);
}
.content mark {
background-color: hsl(204, 86%, 88%);
color: hsl(204, 86%, 25%);
}
@keyframes fadeInOut {
0% { opacity: 1; }
30% { opacity: 0.3; }
100% { opacity: 1; }
}
.fade-in-out {
animation: fadeInOut ease 1s infinite;
}
@media (prefers-color-scheme:dark) {
#logo-white {
display: inherit;
}
#logo-black {
display: none;
}
.hero.is-light {
background-color: #242424;
color: inherit;
}
.hero.is-light .title {
color: inherit;
}
.document {
background-color: #242424;
}
.content {
color: #dbdbdb;
}
.content mark {
background-color: hsl(0, 0%, 35%);
color: hsl(0,0%,90.2%);
}
}

View File

@ -1,102 +0,0 @@
$(window).on('load', function () {
let wsProtcol = "ws";
if (window.location.protocol === 'https') {
wsProtcol = 'wss';
}
let url = wsProtcol + '://' + window.location.hostname + ':' + window.location.port + '/updates/ws';
var socket = new WebSocket(url);
socket.onmessage = function (event) {
let status = JSON.parse(event.data);
if (status.type == 'Pending') {
const elem = document.createElement('li');
elem.classList.add("document");
elem.setAttribute("id", 'update-' + status.update_id);
const ol = document.createElement('ol');
const field = document.createElement('li');
field.classList.add("field");
const attributeUpdateId = document.createElement('div');
attributeUpdateId.classList.add("attribute");
attributeUpdateId.innerHTML = "update id";
const contentUpdateId = document.createElement('div');
contentUpdateId.classList.add("updateId");
contentUpdateId.classList.add("content");
contentUpdateId.innerHTML = status.update_id;
field.appendChild(attributeUpdateId);
field.appendChild(contentUpdateId);
const attributeUpdateStatus = document.createElement('div');
attributeUpdateStatus.classList.add("attribute");
attributeUpdateStatus.innerHTML = "update status";
const contentUpdateStatus = document.createElement('div');
contentUpdateStatus.classList.add("updateStatus");
contentUpdateStatus.classList.add("content");
contentUpdateStatus.innerHTML = 'pending';
field.appendChild(attributeUpdateStatus);
field.appendChild(contentUpdateStatus);
ol.appendChild(field);
elem.appendChild(ol);
prependChild(results, elem);
}
if (status.type == "Progressing") {
const id = 'update-' + status.update_id;
const content = $(`#${id} .updateStatus.content`);
let html;
let { type, step, total_steps, current, total } = status.meta;
if (type === 'DocumentsAddition') {
// If the total is null or undefined then the progress results is infinity.
let progress = Math.round(current / total * 100);
// We must divide the progress by the total number of indexing steps.
progress = progress / total_steps;
// And mark the previous steps as processed.
progress = progress + (step * 100 / total_steps);
// Generate the appropriate html bulma progress bar.
html = `<progress class="progress" title="${progress}%" value="${progress}" max="100"></progress>`;
} else {
html = `<progress class="progress" max="100"></progress>`;
}
content.html(html);
}
if (status.type == "Processed") {
const id = 'update-' + status.update_id;
const content = $(`#${id} .updateStatus.content`);
content.html('processed ' + JSON.stringify(status.meta));
}
if (status.type == "Aborted") {
const id = 'update-' + status.update_id;
const content = $(`#${id} .updateStatus.content`);
content.html('aborted ' + JSON.stringify(status.meta));
}
}
});
function prependChild(parent, newFirstChild) {
parent.insertBefore(newFirstChild, parent.firstChild)
}
// Make the number of document a little bit prettier
$('#docs-count').text(function(index, text) {
return parseInt(text).toLocaleString()
});
// Make the database a little bit easier to read
$('#db-size').text(function(index, text) {
return filesize(parseInt(text))
});

File diff suppressed because it is too large Load Diff

View File

@ -1,362 +0,0 @@
#![allow(unused)]
use std::path::Path;
use std::sync::Arc;
use crossbeam_channel::Sender;
use heed::types::{ByteSlice, DecodeIgnore, OwnedType, SerdeJson};
use heed::{Database, Env, EnvOpenOptions};
use milli::heed;
use serde::{Deserialize, Serialize};
pub type BEU64 = heed::zerocopy::U64<heed::byteorder::BE>;
#[derive(Clone)]
pub struct UpdateStore<M, N> {
env: Env,
pending_meta: Database<OwnedType<BEU64>, SerdeJson<M>>,
pending: Database<OwnedType<BEU64>, ByteSlice>,
processed_meta: Database<OwnedType<BEU64>, SerdeJson<N>>,
aborted_meta: Database<OwnedType<BEU64>, SerdeJson<M>>,
notification_sender: Sender<()>,
}
pub trait UpdateHandler<M, N> {
fn handle_update(&mut self, update_id: u64, meta: M, content: &[u8]) -> heed::Result<N>;
}
impl<M, N, F> UpdateHandler<M, N> for F
where
F: FnMut(u64, M, &[u8]) -> heed::Result<N> + Send + 'static,
{
fn handle_update(&mut self, update_id: u64, meta: M, content: &[u8]) -> heed::Result<N> {
self(update_id, meta, content)
}
}
impl<M: 'static, N: 'static> UpdateStore<M, N> {
pub fn open<P, U>(
mut options: EnvOpenOptions,
path: P,
mut update_handler: U,
) -> heed::Result<Arc<UpdateStore<M, N>>>
where
P: AsRef<Path>,
U: UpdateHandler<M, N> + Send + 'static,
M: for<'a> Deserialize<'a>,
N: Serialize,
{
options.max_dbs(4);
let env = options.open(path)?;
let pending_meta = env.create_database(Some("pending-meta"))?;
let pending = env.create_database(Some("pending"))?;
let processed_meta = env.create_database(Some("processed-meta"))?;
let aborted_meta = env.create_database(Some("aborted-meta"))?;
let (notification_sender, notification_receiver) = crossbeam_channel::bounded(1);
// Send a first notification to trigger the process.
let _ = notification_sender.send(());
let update_store = Arc::new(UpdateStore {
env,
pending,
pending_meta,
processed_meta,
aborted_meta,
notification_sender,
});
let update_store_cloned = update_store.clone();
std::thread::spawn(move || {
// Block and wait for something to process.
for () in notification_receiver {
loop {
match update_store_cloned.process_pending_update(&mut update_handler) {
Ok(Some(_)) => (),
Ok(None) => break,
Err(e) => eprintln!("error while processing update: {}", e),
}
}
}
});
Ok(update_store)
}
/// Returns the new biggest id to use to store the new update.
fn new_update_id(&self, txn: &heed::RoTxn) -> heed::Result<u64> {
let last_pending =
self.pending_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
let last_processed =
self.processed_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
let last_aborted =
self.aborted_meta.remap_data_type::<DecodeIgnore>().last(txn)?.map(|(k, _)| k.get());
let last_update_id =
[last_pending, last_processed, last_aborted].iter().copied().flatten().max();
match last_update_id {
Some(last_id) => Ok(last_id + 1),
None => Ok(0),
}
}
/// Registers the update content in the pending store and the meta
/// into the pending-meta store. Returns the new unique update id.
pub fn register_update(&self, meta: &M, content: &[u8]) -> heed::Result<u64>
where
M: Serialize,
{
let mut wtxn = self.env.write_txn()?;
// We ask the update store to give us a new update id, this is safe,
// no other update can have the same id because we use a write txn before
// asking for the id and registering it so other update registering
// will be forced to wait for a new write txn.
let update_id = self.new_update_id(&wtxn)?;
let update_key = BEU64::new(update_id);
self.pending_meta.put(&mut wtxn, &update_key, meta)?;
self.pending.put(&mut wtxn, &update_key, content)?;
wtxn.commit()?;
if let Err(e) = self.notification_sender.try_send(()) {
assert!(!e.is_disconnected(), "update notification channel is disconnected");
}
Ok(update_id)
}
/// Executes the user provided function on the next pending update (the one with the lowest id).
/// This is asynchronous as it let the user process the update with a read-only txn and
/// only writing the result meta to the processed-meta store *after* it has been processed.
fn process_pending_update<U>(&self, handler: &mut U) -> heed::Result<Option<(u64, N)>>
where
U: UpdateHandler<M, N>,
M: for<'a> Deserialize<'a>,
N: Serialize,
{
// Create a read transaction to be able to retrieve the pending update in order.
let rtxn = self.env.read_txn()?;
let first_meta = self.pending_meta.first(&rtxn)?;
// If there is a pending update we process and only keep
// a reader while processing it, not a writer.
match first_meta {
Some((first_id, first_meta)) => {
let first_content =
self.pending.get(&rtxn, &first_id)?.expect("associated update content");
// Process the pending update using the provided user function.
let new_meta = handler.handle_update(first_id.get(), first_meta, first_content)?;
drop(rtxn);
// Once the pending update have been successfully processed
// we must remove the content from the pending stores and
// write the *new* meta to the processed-meta store and commit.
let mut wtxn = self.env.write_txn()?;
self.pending_meta.delete(&mut wtxn, &first_id)?;
self.pending.delete(&mut wtxn, &first_id)?;
self.processed_meta.put(&mut wtxn, &first_id, &new_meta)?;
wtxn.commit()?;
Ok(Some((first_id.get(), new_meta)))
}
None => Ok(None),
}
}
/// The id and metadata of the update that is currently being processed,
/// `None` if no update is being processed.
pub fn processing_update(&self) -> heed::Result<Option<(u64, M)>>
where
M: for<'a> Deserialize<'a>,
{
let rtxn = self.env.read_txn()?;
match self.pending_meta.first(&rtxn)? {
Some((key, meta)) => Ok(Some((key.get(), meta))),
None => Ok(None),
}
}
/// Execute the user defined function with the meta-store iterators, the first
/// iterator is the *processed* meta one, the second the *aborted* meta one
/// and, the last is the *pending* meta one.
pub fn iter_metas<F, T>(&self, mut f: F) -> heed::Result<T>
where
M: for<'a> Deserialize<'a>,
N: for<'a> Deserialize<'a>,
F: for<'a> FnMut(
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<N>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<M>>,
heed::RoIter<'a, OwnedType<BEU64>, SerdeJson<M>>,
) -> heed::Result<T>,
{
let rtxn = self.env.read_txn()?;
// We get the pending, processed and aborted meta iterators.
let processed_iter = self.processed_meta.iter(&rtxn)?;
let aborted_iter = self.aborted_meta.iter(&rtxn)?;
let pending_iter = self.pending_meta.iter(&rtxn)?;
// We execute the user defined function with both iterators.
(f)(processed_iter, aborted_iter, pending_iter)
}
/// Returns the update associated meta or `None` if the update doesn't exist.
pub fn meta(&self, update_id: u64) -> heed::Result<Option<UpdateStatusMeta<M, N>>>
where
M: for<'a> Deserialize<'a>,
N: for<'a> Deserialize<'a>,
{
let rtxn = self.env.read_txn()?;
let key = BEU64::new(update_id);
if let Some(meta) = self.pending_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatusMeta::Pending(meta)));
}
if let Some(meta) = self.processed_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatusMeta::Processed(meta)));
}
if let Some(meta) = self.aborted_meta.get(&rtxn, &key)? {
return Ok(Some(UpdateStatusMeta::Aborted(meta)));
}
Ok(None)
}
/// Aborts an update, an aborted update content is deleted and
/// the meta of it is moved into the aborted updates database.
///
/// Trying to abort an update that is currently being processed, an update
/// that as already been processed or which doesn't actually exist, will
/// return `None`.
pub fn abort_update(&self, update_id: u64) -> heed::Result<Option<M>>
where
M: Serialize + for<'a> Deserialize<'a>,
{
let mut wtxn = self.env.write_txn()?;
let key = BEU64::new(update_id);
// We cannot abort an update that is currently being processed.
if self.pending_meta.first(&wtxn)?.map(|(key, _)| key.get()) == Some(update_id) {
return Ok(None);
}
let meta = match self.pending_meta.get(&wtxn, &key)? {
Some(meta) => meta,
None => return Ok(None),
};
self.aborted_meta.put(&mut wtxn, &key, &meta)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
wtxn.commit()?;
Ok(Some(meta))
}
/// Aborts all the pending updates, and not the one being currently processed.
/// Returns the update metas and ids that were successfully aborted.
pub fn abort_pendings(&self) -> heed::Result<Vec<(u64, M)>>
where
M: Serialize + for<'a> Deserialize<'a>,
{
let mut wtxn = self.env.write_txn()?;
let mut aborted_updates = Vec::new();
// We skip the first pending update as it is currently being processed.
for result in self.pending_meta.iter(&wtxn)?.skip(1) {
let (key, meta) = result?;
let id = key.get();
aborted_updates.push((id, meta));
}
for (id, meta) in &aborted_updates {
let key = BEU64::new(*id);
self.aborted_meta.put(&mut wtxn, &key, &meta)?;
self.pending_meta.delete(&mut wtxn, &key)?;
self.pending.delete(&mut wtxn, &key)?;
}
wtxn.commit()?;
Ok(aborted_updates)
}
}
#[derive(Debug, PartialEq, Eq, Hash)]
pub enum UpdateStatusMeta<M, N> {
Pending(M),
Processed(N),
Aborted(M),
}
#[cfg(test)]
mod tests {
use std::thread;
use std::time::{Duration, Instant};
use super::*;
#[test]
fn simple() {
let dir = tempfile::tempdir().unwrap();
let options = EnvOpenOptions::new();
let update_store = UpdateStore::open(options, dir, |_id, meta: String, _content: &_| {
Ok(meta + " processed")
})
.unwrap();
let meta = String::from("kiki");
let update_id = update_store.register_update(&meta, &[]).unwrap();
thread::sleep(Duration::from_millis(100));
let meta = update_store.meta(update_id).unwrap().unwrap();
assert_eq!(meta, UpdateStatusMeta::Processed(format!("kiki processed")));
}
#[test]
#[ignore]
fn long_running_update() {
let dir = tempfile::tempdir().unwrap();
let options = EnvOpenOptions::new();
let update_store = UpdateStore::open(options, dir, |_id, meta: String, _content: &_| {
thread::sleep(Duration::from_millis(400));
Ok(meta + " processed")
})
.unwrap();
let before_register = Instant::now();
let meta = String::from("kiki");
let update_id_kiki = update_store.register_update(&meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("coco");
let update_id_coco = update_store.register_update(&meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
let meta = String::from("cucu");
let update_id_cucu = update_store.register_update(&meta, &[]).unwrap();
assert!(before_register.elapsed() < Duration::from_millis(200));
thread::sleep(Duration::from_millis(400 * 3 + 100));
let meta = update_store.meta(update_id_kiki).unwrap().unwrap();
assert_eq!(meta, UpdateStatusMeta::Processed(format!("kiki processed")));
let meta = update_store.meta(update_id_coco).unwrap().unwrap();
assert_eq!(meta, UpdateStatusMeta::Processed(format!("coco processed")));
let meta = update_store.meta(update_id_cucu).unwrap().unwrap();
assert_eq!(meta, UpdateStatusMeta::Processed(format!("cucu processed")));
}
}

View File

@ -1,102 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="/bulma.min.css">
<link rel="stylesheet" href="/bulma-prefers-dark.min.css">
<link rel="stylesheet" href="/style.css">
<script type="text/javascript" src="/jquery-3.4.1.min.js"></script>
<script type="text/javascript" src="/filesize.min.js"></script>
<title>{{ db_name }} | The milli engine</title>
</head>
<body>
<section class="hero is-light">
<div class="hero-body">
<div class="container">
<div class="columns is-flex is-centered mb-6">
<figure class="image is-128x128">
<img id="logo-white" src="logo-white.svg" alt="milli logo in white">
<img id="logo-black" src="logo-black.svg" alt="milli logo in black">
</figure>
</div>
<nav class="level">
<div class="level-item has-text-centered">
<div>
<p class="heading">Database Name</p>
<p class="title">{{ db_name }}</p>
</div>
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Database Size</p>
<p class="title" id="db-size">{{ db_size }}</p>
</div>
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Number of Documents</p>
<p class="title" id="docs-count">{{ docs_count }}</p>
</div>
<button id="display_facets" class="button is-success">display facets</button>
</div>
</nav>
</div>
</div>
</section>
<section class="hero container">
<div class="notification" style="border-radius: 0 0 4px 4px;">
<nav class="level">
<!-- Left side -->
<div class="level-left">
<div class="level-item">
<div class="field has-addons has-addons-right">
<input id="query" class="input" type="text" autofocus placeholder="e.g. George Clooney">
<input id="filters" class="input" type="text" placeholder="filters like released >= 1577836800">
</div>
</div>
<div class="level-item"></div>
</div>
<!-- Right side -->
<nav class="level-right">
<div class="level-item has-text-centered">
<div>
<p class="heading">Candidates</p>
<p id="count" class="title">0</p>
</div>
</div>
<div class="level-item has-text-centered">
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Time Spent</p>
<p id="time" class="title">0ms</p>
</div>
</div>
</nav>
</nav>
</div>
</section>
<section id="facets">
<!-- facet values -->
</section>
<section>
<ol id="results" class="content">
<!-- documents matching requests -->
</ol>
</section>
</body>
<script type="text/javascript" src="/script.js"></script>
</html>

View File

@ -1,95 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<link rel="stylesheet" href="/bulma.min.css">
<link rel="stylesheet" href="/bulma-prefers-dark.min.css">
<link rel="stylesheet" href="/style.css">
<script type="text/javascript" src="/jquery-3.4.1.min.js"></script>
<script type="text/javascript" src="/filesize.min.js"></script>
<title>{{ db_name }} | Updates</title>
</head>
<body>
<section class="hero is-light">
<div class="hero-body">
<div class="container">
<a href="/">
<div class="columns is-flex is-centered mb-6">
<figure class="image is-128x128">
<img id="logo-white" src="logo-white.svg" alt="milli logo in white">
<img id="logo-black" src="logo-black.svg" alt="milli logo in black">
</figure>
</div>
</a>
<nav class="level">
<div class="level-item has-text-centered">
<div>
<p class="heading">Database Name</p>
<p class="title">{{ db_name }}</p>
</div>
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Database Size</p>
<p class="title" id="db-size">{{ db_size }}</p>
</div>
</div>
<div class="level-item has-text-centered">
<div>
<p class="heading">Number of Documents</p>
<p class="title" id="docs-count">{{ docs_count }}</p>
</div>
</div>
</nav>
</div>
</div>
</section>
<section>
<ol id="results" class="content">
{% for update in updates %}
{% match update %}
{% when UpdateStatus::Pending with { update_id, meta } %}
<li id="update-{{ update_id }}" class="document">
<ol>
<li class="field">
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
<div class="attribute">update status</div><div class="updateStatus content">pending</div>
</li>
</ol>
</li>
{% when UpdateStatus::Processed with { update_id, meta } %}
<li id="update-{{ update_id }}" class="document">
<ol>
<li class="field">
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
<div class="attribute">update status</div><div class="updateStatus content">{{ meta }}</div>
</li>
</ol>
</li>
{% when UpdateStatus::Aborted with { update_id, meta } %}
<li id="update-{{ update_id }}" class="document">
<ol>
<li class="field">
<div class="attribute">update id</div><div class="updateId content">{{ update_id }}</div>
<div class="attribute">update status</div><div class="updateStatus content">aborted</div>
</li>
</ol>
</li>
{% else %}
{% endmatch %}
{% endfor %}
</ol>
</section>
</body>
<script type="text/javascript" src="/updates-script.js"></script>
</html>

View File

@ -1,17 +0,0 @@
[package]
name = "infos"
version = "0.33.4"
authors = ["Clément Renault <clement@meilisearch.com>"]
edition = "2018"
publish = false
[dependencies]
anyhow = "1.0.56"
byte-unit = { version = "4.0.14", default-features = false, features = ["std"] }
csv = "1.1.6"
milli = { path = "../milli" }
mimalloc = { version = "0.1.29", default-features = false }
roaring = "0.9.0"
serde_json = "1.0.79"
stderrlog = "0.5.1"
structopt = { version = "0.3.26", default-features = false }

File diff suppressed because it is too large Load Diff

View File

@ -1,26 +0,0 @@
# Milli
## Fuzzing milli
Currently you can only fuzz the indexation.
To execute the fuzzer run:
```
cargo +nightly fuzz run indexing
```
To execute the fuzzer on multiple thread you can also run:
```
cargo +nightly fuzz run -j4 indexing
```
Since the fuzzer is going to create a lot of temporary file to let milli index its documents
I would also recommand to execute it on a ramdisk.
Here is how to setup a ramdisk on linux:
```
sudo mount -t tmpfs none path/to/your/ramdisk
```
And then set the [TMPDIR](https://doc.rust-lang.org/std/env/fn.temp_dir.html) environment variable
to make the fuzzer create its file in it:
```
export TMPDIR=path/to/your/ramdisk
```

View File

@ -1,5 +0,0 @@
Cargo.lock
target/
/corpus/
/artifacts/

View File

@ -1,34 +0,0 @@
[package]
name = "milli-fuzz"
version = "0.0.0"
authors = ["Automatically generated"]
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies]
arbitrary = "1.0"
libfuzzer-sys = "0.4"
serde_json = { version = "1.0.62", features = ["preserve_order"] }
anyhow = "1.0"
tempfile = "3.3"
arbitrary-json = "0.1.0"
mimalloc = { version = "0.1.29", default-features = false }
[dependencies.milli]
path = ".."
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[profile.release]
debug = true
[[bin]]
name = "indexing"
path = "fuzz_targets/indexing.rs"
test = false
doc = false

View File

@ -1,114 +0,0 @@
#![no_main]
use std::collections::HashSet;
use std::io::{BufWriter, Cursor, Read, Seek, Write};
use anyhow::{bail, Result};
use arbitrary_json::ArbitraryValue;
use libfuzzer_sys::fuzz_target;
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::heed::EnvOpenOptions;
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{Index, Object};
use serde_json::{Map, Value};
#[global_allocator]
static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
/// reads json from input and write an obkv batch to writer.
pub fn read_json(input: impl Read, writer: impl Write + Seek) -> Result<usize> {
let writer = BufWriter::new(writer);
let mut builder = DocumentsBatchBuilder::new(writer);
let values: Vec<Object> = serde_json::from_reader(input)?;
if builder.documents_count() == 0 {
bail!("Empty payload");
}
for object in values {
builder.append_json_object(&object)?;
}
let count = builder.documents_count();
let vector = builder.into_inner()?;
Ok(count as usize)
}
fn index_documents(
index: &mut milli::Index,
documents: DocumentsBatchReader<Cursor<Vec<u8>>>,
) -> Result<()> {
let config = IndexerConfig::default();
let mut wtxn = index.write_txn()?;
let indexing_config = IndexDocumentsConfig::default();
let mut builder = IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ())?;
builder.add_documents(documents)?;
builder.execute().unwrap();
wtxn.commit()?;
Ok(())
}
fn create_index() -> Result<milli::Index> {
let dir = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024 * 1024); // 10 GB
options.max_readers(1);
let index = Index::new(options, dir.path())?;
let config = IndexerConfig::default();
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index, &config);
let displayed_fields =
["id", "title", "album", "artist", "genre", "country", "released", "duration"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_displayed_fields(displayed_fields);
let searchable_fields = ["title", "album", "artist"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let faceted_fields: HashSet<String> =
["released-timestamp", "duration-float", "genre", "country", "artist"]
.iter()
.map(|s| s.to_string())
.collect();
builder.set_filterable_fields(faceted_fields.clone());
builder.set_sortable_fields(faceted_fields);
builder.set_distinct_field("same".to_string());
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();
Ok(index)
}
fuzz_target!(|batches: Vec<Vec<ArbitraryValue>>| {
if let Ok(mut index) = create_index() {
for batch in batches {
let documents: Vec<Value> =
batch.into_iter().map(|value| serde_json::Value::from(value)).collect();
let json = Value::Array(documents);
let json = serde_json::to_string(&json).unwrap();
let mut documents = Cursor::new(Vec::new());
// We ignore all malformed documents
if let Ok(_) = read_json(json.as_bytes(), &mut documents) {
documents.rewind().unwrap();
let documents = DocumentsBatchReader::from_reader(documents).unwrap();
// A lot of errors can come out of milli and we don't know which ones are normal or not
// so we are only going to look for the unexpected panics.
let _ = index_documents(&mut index, documents);
}
}
index.prepare_for_closing().wait();
}
});