diff --git a/.gitignore b/.gitignore
index c38aa51d3..c0747b6e0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,7 @@
/target
-/Cargo.lock
-meilidb/Cargo.lock
-meilidb-core/Cargo.lock
-**/*.rs.bk
+Cargo.lock
**/*.csv
**/*.json_lines
-**/*.rdb
+**/*.rs.bk
+/*.mdb
+/query-history.txt
diff --git a/Cargo.toml b/Cargo.toml
index 84a45aa9f..0903eab10 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,8 +1,6 @@
[workspace]
members = [
- "meilidb",
"meilidb-core",
- "meilidb-data",
"meilidb-schema",
"meilidb-tokenizer",
]
diff --git a/LICENSE b/LICENSE
index 4589babfa..25a8574b1 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,21 +1,13 @@
-MIT License
+“Commons Clause” License Condition v1.0
-Copyright (c) 2018 Clément Renault
+The Software is provided to you by the Licensor under the License, as defined below, subject to the following condition.
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
+Without limiting other conditions in the License, the grant of rights under the License will not include, and the License does not grant to you, the right to Sell the Software.
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+For purposes of the foregoing, “Sell” means practicing any or all of the rights granted to you under the License to provide to third parties, for a fee or other consideration (including without limitation fees for hosting or consulting/ support services related to the Software), a product or service whose value derives, entirely or substantially, from the functionality of the Software. Any license notice or attribution required by the License must also include this Commons Clause License Condition notice.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
+Software: MeiliDB
+
+License: MIT
+
+Licensor: MEILI SAS
diff --git a/README.md b/README.md
index d92653895..372389242 100644
--- a/README.md
+++ b/README.md
@@ -6,19 +6,19 @@
[![Rust 1.31+](https://img.shields.io/badge/rust-1.31+-lightgray.svg)](
https://www.rust-lang.org)
-A _full-text search database_ using a key-value store internally.
+A _full-text search database_ based on the fast [LMDB key-value store](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database).
## Features
-- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L95-L101) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
-- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/criterion/mod.rs#L22-L29) and can apply them in any custom order
-- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L146), useful for paginating results
-- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L68) and [filter](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L57) returned documents based on context defined rules
-- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/examples/movies/schema-movies.toml)
-- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-tokenizer/src/lib.rs#L99) can index latin and kanji based languages
-- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/lib.rs#L117-L120), useful to highlight matched words in results
-- Accepts query time search config like the [searchable fields](https://github.com/meilisearch/MeiliDB/blob/3d85cbf0cfa3a3103cf1e151a75a443719cdd5d7/meilidb-core/src/query_builder.rs#L79)
-- Supports run time indexing (incremental indexing)
+- Provides [6 default ranking criteria](https://github.com/Kerollmops/new-meilidb/blob/dea7e28a45dde897f97742bdd33fcf75d5673502/meilidb-core/src/criterion/mod.rs#L14-L19) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
+- Accepts [custom criteria](https://github.com/Kerollmops/new-meilidb/blob/dea7e28a45dde897f97742bdd33fcf75d5673502/meilidb-core/src/criterion/mod.rs#L24-L33) and can apply them in any custom order
+- Support [ranged queries](https://github.com/Kerollmops/new-meilidb/blob/dea7e28a45dde897f97742bdd33fcf75d5673502/meilidb-core/src/query_builder.rs#L255-L260), useful for paginating results
+- Can [distinct](https://github.com/Kerollmops/new-meilidb/blob/dea7e28a45dde897f97742bdd33fcf75d5673502/meilidb-core/src/query_builder.rs#L241-L246) and [filter](https://github.com/Kerollmops/new-meilidb/blob/dea7e28a45dde897f97742bdd33fcf75d5673502/meilidb-core/src/query_builder.rs#L223-L235) returned documents based on context defined rules
+- Can store complete documents or only [user schema specified fields](https://github.com/Kerollmops/new-meilidb/blob/dea7e28a45dde897f97742bdd33fcf75d5673502/meilidb-schema/src/lib.rs#L265-L279)
+- The [default tokenizer](https://github.com/Kerollmops/new-meilidb/blob/dea7e28a45dde897f97742bdd33fcf75d5673502/meilidb-tokenizer/src/lib.rs) can index latin and kanji based languages
+- Returns [the matching text areas](https://github.com/Kerollmops/new-meilidb/blob/dea7e28a45dde897f97742bdd33fcf75d5673502/meilidb-core/src/lib.rs#L66-L88), useful to highlight matched words in results
+- Accepts query time search config like the [searchable attributes](https://github.com/Kerollmops/new-meilidb/blob/dea7e28a45dde897f97742bdd33fcf75d5673502/meilidb-core/src/query_builder.rs#L248-L252)
+- Supports run time indexing (incremental indexing)
@@ -64,19 +64,18 @@ Currently MeiliDB do not provide an http server but you can run these two exampl
It creates an index named _movies_ and insert _19 700_ (in batches of _1000_) movies into it.
```bash
-cargo run --release --example create-database -- \
- --schema examples/movies/schema-movies.toml \
- --update-group-size 1000 \
- movies.mdb \
- examples/movies/movies.csv
+cargo run --release --example from_file -- \
+ index example.mdb datasets/movies/data.csv \
+ --schema datasets/movies/schema.toml \
+ --update-group-size 1000
```
Once this is done, you can query this database using the second binary example.
```bash
-cargo run --release --example query-database -- \
- movies.mdb \
- --fetch-timeout-ms 50 \
- -n 4 \
- id title overview release_date poster
+cargo run --release --example from_file -- \
+ search example.mdb
+ --number 4 \
+ --filter '!adult' \
+ id popularity adult original_title
```
diff --git a/ci/meilidb.sh b/ci/meilidb.sh
deleted file mode 100755
index 35bf88fef..000000000
--- a/ci/meilidb.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-
-cd "$(dirname "$0")"/..
-set -ex
-
-export RUSTFLAGS="-D warnings"
-
-cargo check --no-default-features
-cargo check --bins --examples --tests
-cargo test
-
-if [[ "$TRAVIS_RUST_VERSION" == "nightly" ]]; then
- cargo check --no-default-features --features nightly
- cargo test --features nightly
-fi
diff --git a/examples/movies/README.md b/datasets/movies/README.md
similarity index 100%
rename from examples/movies/README.md
rename to datasets/movies/README.md
diff --git a/examples/movies/movies.csv b/datasets/movies/movies.csv
similarity index 100%
rename from examples/movies/movies.csv
rename to datasets/movies/movies.csv
diff --git a/examples/movies/schema-movies.toml b/datasets/movies/schema-movies.toml
similarity index 100%
rename from examples/movies/schema-movies.toml
rename to datasets/movies/schema-movies.toml
diff --git a/examples/kaggle/kaggle.csv b/examples/kaggle/kaggle.csv
deleted file mode 100644
index 6bf7a70d3..000000000
--- a/examples/kaggle/kaggle.csv
+++ /dev/null
@@ -1,122 +0,0 @@
-id,title,description,image
-711158459,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform. Read more about the PS4 on ebay guides.",http://thumbs2.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
-711158460,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform. Read more about the PS4 on ebay guides.",http://thumbs3.ebaystatic.com/d/l225/m/mJNDmSyIS3vUasKIJEBy4Cw.jpg
-711158461,Sony PlayStation 4 PS4 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform. Read more about the PS4 on ebay guides.",http://thumbs4.ebaystatic.com/d/l225/m/m10NZXArmiIkpkTDDkAUVvA.jpg
-711158462,Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black,,http://thumbs2.ebaystatic.com/d/l225/m/mZZXTmAE8WZDH1l_E_PPAkg.jpg
-711158463,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform. Read more about the PS4 on ebay guides.",http://thumbs3.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
-711158464,Sony PlayStation 4 (PS4) (Latest Model)- 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform. Read more about the PS4 on ebay guides.",http://thumbs4.ebaystatic.com/d/l225/m/mzvzEUIknaQclZ801YCY1ew.jpg
-711158465,BRAND NEW Sony PlayStation 4 BUNDLE 500gb,,http://thumbs4.ebaystatic.com/d/l225/m/m9TQTiWcWig7SeQh9algLZg.jpg
-711158466,"Sony PlayStation 4 500GB, Dualshock Wireless Control, HDMI Gaming Console Refurb","The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform. Read more about the PS4 on ebay guides.",http://thumbs4.ebaystatic.com/d/l225/m/mTZYG5N6xWfBi4Ok03HmpMw.jpg
-711158467,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console w/ 2 Controllers,,http://thumbs2.ebaystatic.com/d/l225/m/mX5Qphrygqeoi7tAH5eku2A.jpg
-711158468,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console *NEW*,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform. Read more about the PS4 on ebay guides.",http://thumbs2.ebaystatic.com/d/l225/m/mGjN4IrJ0O8kKD_TYMWgGgQ.jpg
-711158469,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console..wth Mortal Kombat X,,http://thumbs2.ebaystatic.com/d/l225/m/mrpqSNXwlnUVKnEscE4348w.jpg
-711158470,Genuine SONY PS4 Playstation 4 500GB Gaming Console - Black,,http://thumbs4.ebaystatic.com/d/l225/m/myrPBFCpb4H5rHI8NyiS2zA.jpg
-711158471,[Sony] Playstation 4 PS4 Video Game Console Black - Latest Model,,http://thumbs4.ebaystatic.com/d/l225/m/mce0c7mCuv3xpjllJXx093w.jpg
-711158472,Sony PlayStation 4 (Latest Model) 500 GB Jet Black Console,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform. Read more about the PS4 on ebay guides.",http://thumbs2.ebaystatic.com/d/l225/m/miVSA1xPO5fCNdYzEMc8rSQ.jpg
-711158473,Sony PlayStation 4 - 500 GB Jet Black Console - WITH LAST OF US REMASTERED,,http://thumbs2.ebaystatic.com/d/l225/m/mLjnOxv2GWkrkCtgsDGhJ6A.jpg
-711158474,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs3.ebaystatic.com/d/l225/m/mjMittBaXmm_n4AMpETBXhQ.jpg
-711158475,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/m1n1qrJ7-VGbe7xQvGdeD6Q.jpg
-711158476,"Sony PlayStation 4 - 500 GB Jet Black Console (3 controllers,3 games included)",,http://thumbs3.ebaystatic.com/d/l225/m/mIoGIj9FZG7HoEVkPlnyizA.jpg
-711158477,Sony PlayStation 4 500GB Console with 2 Controllers,"The PlayStation 4 system opens the door to an incredible journey through immersive new gaming worlds and a deeply connected gaming community. Step into living, breathing worlds where you are hero of your epic journey. Explore gritty urban environments, vast galactic landscapes, and fantastic historical settings brought to life on an epic scale, without limits. With an astounding launch lineup and over 180 games in development the PS4 system offers more top-tier blockbusters and inventive indie hits than any other next-gen console. The PS4 system is developer inspired, gamer focused. The PS4 system learns how you play and intuitively curates the content you use most often. Fire it up, and your PS4 system points the way to new, amazing experiences you can jump into alone or with friends. Create your own legend using a sophisticated, intuitive network built for gamers. Broadcast your gameplay live and direct to the world, complete with your commentary. Or immortalize your most epic moments and share at the press of a button. Access the best in music, movies, sports and television. PS4 system doesn t require a membership fee to access your digital entertainment subscriptions. You get the full spectrum of entertainment that matters to you on the PS4 system. PlayStation 4: The Best Place to Play The PlayStation 4 system provides dynamic, connected gaming, powerful graphics and speed, intelligent personalization, deeply integrated social capabilities, and innovative second-screen features. Combining unparalleled content, immersive gaming experiences, all of your favorite digital entertainment apps, and PlayStation exclusives, the PS4 system focuses on the gamers.Gamer Focused, Developer InspiredThe PS4 system focuses on the gamer, ensuring that the very best games and the most immersive experiences are possible on the platform. Read more about the PS4 on ebay guides.",http://thumbs2.ebaystatic.com/d/l225/m/m4fuJ5Ibrj450-TZ83FAkIQ.jpg
-711158478,Sony - PlayStation 4 500GB The Last of Us Remastered Bundle - Black,,http://thumbs3.ebaystatic.com/d/l225/m/mzXSIw8Hlnff8IjXJQrXJSw.jpg
-711158479,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/m-9S63CgFoUijY3ZTyNs3KA.jpg
-711158480,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs1.ebaystatic.com/d/l225/m/mdF9Bisg9wXjv_R9Y_13MWw.jpg
-711158481,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console*,,http://thumbs1.ebaystatic.com/d/l225/m/m4_OQHMmIOCa8uEkBepRR5A.jpg
-711158482,Sony PlayStation 4 (Latest Model)- 500 GB Jet Black Console,,http://thumbs2.ebaystatic.com/d/l225/m/mZ0nR8iz-QAfLssJZMp3L5Q.jpg
-711158483,[Sony] Playstation 4 PS4 1105A Video Game Console 500GB White - Latest Model,,http://thumbs4.ebaystatic.com/d/l225/m/m8iTz5cLQLNjD9D3O2jT3IQ.jpg
-711158484,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs2.ebaystatic.com/d/l225/m/mrraWCpvP5YKk5rYgotVDLg.jpg
-711158485,Obagi Elastiderm Eye Treatment Cream 0.5 oz / 15g Authentic NiB Sealed [5],,http://thumbs1.ebaystatic.com/d/l225/m/mJ4ekz6_bDT5G7wYtjM-qRg.jpg
-711158486,Lancome Renergie Eye Anti-Wrinkle & Firming Eye Cream 0.5oz New,,http://thumbs2.ebaystatic.com/d/l225/m/mxwwyDQraZ-TEtr_Y6qRi7Q.jpg
-711158487,OZ Naturals - The BEST Eye Gel - Eye Cream For Dark Circles Puffiness and,,http://thumbs2.ebaystatic.com/d/l225/m/mk2Z-hX5sT4kUxfG6g_KFpg.jpg
-711158488,Elastiderm Eye Cream (0.5oz/15g),,http://thumbs3.ebaystatic.com/d/l225/m/mHxb5WUc5MtGzCT2UXgY_hg.jpg
-711158489,new CLINIQUE Repairwear Laser Focus Wrinkle Correcting Eye Cream 0.17 oz/ 5 ml,,http://thumbs1.ebaystatic.com/d/l225/m/mQSX2wfrSeGy3uA8Q4SbOKw.jpg
-711158490,NIB Full Size Dermalogica Multivitamin Power Firm Eye Cream,,http://thumbs4.ebaystatic.com/d/l225/m/m2hxo12e5NjXgGiKIaCvTLA.jpg
-711158491,24K Gold Collagen Anti-Dark Circles Anti-Aging Bio Essence Repairing Eye Cream,,http://thumbs4.ebaystatic.com/d/l225/m/mt96efUK5cPAe60B9aGmgMA.jpg
-711158492,Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream Full Size .5oz 15mL,,http://thumbs3.ebaystatic.com/d/l225/m/mZyV3wKejCMx9RrnC8X-eMw.jpg
-711158493,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs4.ebaystatic.com/d/l225/m/m9hX_z_DFnbNCTh0VFv3KcQ.jpg
-711158494,3 Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17 oz/5 ml Each,,http://thumbs1.ebaystatic.com/d/l225/m/mYiHsrGffCg_qgkTbUWZU1A.jpg
-711158495,Lancome High Resolution Eye Cream .95 Oz Refill-3X .25 Oz Plus .20 Oz Lot,,http://thumbs1.ebaystatic.com/d/l225/m/mFuQxKoEKQ6wtk2bGxfKwow.jpg
-711158496,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream 5ml,,http://thumbs4.ebaystatic.com/d/l225/m/mLBRCDiELUnYos-vFmIcc7A.jpg
-711158497,Neutrogena Rapid Wrinkle Repair Eye Cream -0.5 Oz. -New-,,http://thumbs4.ebaystatic.com/d/l225/m/mE1RWpCOxkCGuuiJBX6HiBQ.jpg
-711158498,20g Snail Repair Eye Cream Natural Anti-Dark Circles Puffiness Aging Wrinkles,,http://thumbs4.ebaystatic.com/d/l225/m/mh4gBNzINDwds_r778sJRjg.jpg
-711158499,Vichy-Neovadiol GF Eye & Lip Contour Cream 0.5 Fl. Oz,,http://thumbs4.ebaystatic.com/d/l225/m/m_6f0ofCm7PTzuithYuZx3w.jpg
-711158500,Obagi Elastiderm Eye Cream 0.5 oz. New In Box. 100% Authentic! New Packaging!,,http://thumbs2.ebaystatic.com/d/l225/m/ma0PK-ASBXUiHERR19MyImA.jpg
-711158501,NEW! Clinique Repairwear Laser Focus Wrinkle Correcting Eye Cream .17oz / 5ml,,http://thumbs3.ebaystatic.com/d/l225/m/m72NaXYlcXcEeqQFKWvsdZA.jpg
-711158502,Kiehl's CREAMY EYE TREATMENT cream with AVOCADO 0.5 oz FULL SIZE,,http://thumbs3.ebaystatic.com/d/l225/m/mOI407HnILb_tf-RgdvfYyA.jpg
-711158503,Clinique repairwear laser focus wrinkle correcting eye cream .5 oz 15ml,,http://thumbs4.ebaystatic.com/d/l225/m/mQwNVst3bYG6QXouubmLaJg.jpg
-711158504,Caudalie Premier Cru The Eye Cream La Creme New Anti Aging Eye Treatment,,http://thumbs1.ebaystatic.com/d/l225/m/mM4hPTAWXeOjovNk9s_Cqag.jpg
-711158505,Jeunesse Instantly Ageless -- New Box Of 50 Sachets -- Eye - Face Wrinkle Cream,,http://thumbs2.ebaystatic.com/d/l225/m/m5EfWbi6ZYs4JpYcsl0Ubaw.jpg
-711158506,VELOUR SKIN EYE CREAM .5 FL OZ 15ML NEW NIP ANTI-AGING WRINKLE CREAM,,http://thumbs1.ebaystatic.com/d/l225/m/m2uEf6q1yASH8FkWqYdOv1w.jpg
-711158507,Shiseido White Lucent Anti-Dark Circles/Puffiness Eye Cream 15ml/.53oz Full Size,,http://thumbs1.ebaystatic.com/d/l225/m/m_CtzoqU2Vgv4GKx8ONS6qw.jpg
-711158508,Murad Resurgence Renewing Eye Cream Anti-Aging .25 oz NEW Dark Circles Wrinkle,,http://thumbs1.ebaystatic.com/d/l225/m/mhWJC10iowgUDGm4KMQKNMg.jpg
-711158509,D-Link DIR-615 300Mbps Wireless-N Router 4-Port w/Firewall,,http://thumbs3.ebaystatic.com/d/l225/m/mdSBH9ROXRn3TBb8OFDT6jA.jpg
-711158510,Triton MOF001 2 1/4hp dual mode precision Router. New!! *3 day auction*,,http://thumbs1.ebaystatic.com/d/l225/m/mozWd2SBskbDBlWAKsMlVew.jpg
-711158511,Porter-Cable 3-1/4 HP Five-Speed Router 7518 - Power Tools Routers,,http://thumbs2.ebaystatic.com/d/l225/m/mpZDTXpiyesDrZh_FLMyqXQ.jpg
-711158512,Linksys EA6900 AC1900 Wi-Fi Wireless Router Dual Band with Gigabit &USB 3.0 Port,,http://thumbs4.ebaystatic.com/d/l225/m/m3OfBSnHBDhhs_Ve-DSBKQw.jpg
-711158513,Linksys EA6500 1300 Mbps 4-Port Gigabit Wireless AC Router,,http://thumbs1.ebaystatic.com/d/l225/m/m7cfymJPc7CLADoTiEYFzwA.jpg
-711158514,Makita RT0700CX3 1-1/4 Horsepower Compact Router Kit / Trimmer NEW,,http://thumbs2.ebaystatic.com/d/l225/m/mr-F3rCxDYsLcj8hnmaRN4A.jpg
-711158515,NETGEAR R6250 AC1600 Smart WiFi Dual Band Gigabit Router 802.11ac 300 1300 Mbps,,http://thumbs4.ebaystatic.com/d/l225/m/mc8Ic8Cq2lPqPnjNGAQBBCQ.jpg
-711158516,NETGEAR Nighthawk AC1900 Dual Band Wi-Fi Gigabit Router (R7000) BRAND NEW SEALED,,http://thumbs3.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
-711158517,Netgear WNDR3400 N600 Wireless Dual Band Router (WNDR3400-100),,http://thumbs4.ebaystatic.com/d/l225/m/mKr4cNk6utJXSdVYXzwrScQ.jpg
-711158518,Netgear N600 300 Mbps 4-Port 10/100 Wireless N Router (WNDR3400),,http://thumbs2.ebaystatic.com/d/l225/m/mUPdyhbW9pzEm1VbqX0YudA.jpg
-711158519,NETGEAR N600 WNDR3400 Wireless Dual Band Router F/S,,http://thumbs1.ebaystatic.com/d/l225/m/my55jF5kHnG9ipzFycnjooA.jpg
-711158520,Netgear NIGHTHAWK AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000),,http://thumbs3.ebaystatic.com/d/l225/m/mrPLRTnWx_JXLNIp5pCBnzQ.jpg
-711158521,Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router (WNDR4500),,http://thumbs2.ebaystatic.com/d/l225/m/mXBL01faHlHm7Ukh188t3yQ.jpg
-711158522,Netgear R6300V2 AC1750 1300 Mbps 4-Port Gigabit Wireless AC Router,,http://thumbs1.ebaystatic.com/d/l225/m/mTdnFB9Z71efYJ9I5-k186w.jpg
-711158523,Makita RT0701C 1-1/4 HP Compact Router With FACTORY WARRANTY!!!,,http://thumbs2.ebaystatic.com/d/l225/m/m7AA4k3MzYFJcTlBrT3DwhA.jpg
-711158524,"CISCO LINKSYS EA4500 DUAL-BAND N9000 WIRELESS ROUTER, 802.11N, UP TO 450 MBPs",,http://thumbs4.ebaystatic.com/d/l225/m/mwfVIXD3dZYt_qpHyprd7hg.jpg
-711158525,Netgear N300 v.3 300 Mbps 5-Port 10/100 Wireless N Router (WNR2000),,http://thumbs4.ebaystatic.com/d/l225/m/mopRjvnZwbsVH9euqGov5kw.jpg
-711158526,Netgear Nighthawk R7000 2330 Mbps 4-Port Gigabit Wireless N Router...,,http://thumbs4.ebaystatic.com/d/l225/m/mns82UY4FfqYXPgqrpJ9Bzw.jpg
-711158527,Netgear N900 450 Mbps 4-Port Gigabit Wireless N Router R4500 ~ FreE ShiPPinG ~,,http://thumbs1.ebaystatic.com/d/l225/m/m_o0mSRmySgJUuqHYDIQiuA.jpg
-711158528,D-Link Wireless Router Model DIR-625,,http://thumbs2.ebaystatic.com/d/l225/m/mYPXwZMlDUjOQ3Sm3EtU37Q.jpg
-711158529,D-Link DIR-657 300 Mbps 4-Port Gigabit Wireless N Router Hd Media Router 1000,"Stream multiple media content - videos, music and more to multiple devices all at the same time without lag or skipping. The HD Fuel technology in the DIR-657 lets you watch Netflix and Vudu , play your Wii or Xbox 360 online or make Skype calls all without worrying about the skipping or latency you might experience with standard routers. It does so by automatically giving extra bandwidth for video, gaming and VoIP calls using HD Fuel QoS technology. The D-Link HD Media Router 1000(DIR-657) also comes equipped with 4 Gigabit ports to provide speeds up to 10x faster than standard 10/100 ports. What s more, it uses 802.11n technology with multiple intelligent antennas to maximize the speed and range of your wireless signal to significantly outperform 802.11g devices.",http://thumbs1.ebaystatic.com/d/l225/m/m0xyPdWrdVKe7By4QFouVeA.jpg
-711158530,D-Link DIR-860L AC1200 4-Port Cloud Router Gigabit Wireless 802.11 AC,,http://thumbs3.ebaystatic.com/d/l225/m/mk4KNj6oLm7863qCS-TqmbQ.jpg
-711158531,D-Link DIR-862L Wireless AC1600 Dual Band Gigabit Router,,http://thumbs2.ebaystatic.com/d/l225/m/m6Arw8kaZ4EUbyKjHtJZLkA.jpg
-711158532,LINKSYS AC1600 DUAL BAND SMART WI-FI ROUTER EA6400 BRAND NEW,,http://thumbs3.ebaystatic.com/d/l225/m/mdK7igTS7_TDD7ajfVqj-_w.jpg
-711158533,Netgear AC1900 1300 Mbps 4-Port Gigabit Wireless AC Router (R7000),,http://thumbs4.ebaystatic.com/d/l225/m/mdL34EQi0l-Kg-DlvF6wpqA.jpg
-711158534,Panasonic ES-LA63 Cordless Rechargeable Men's Electric Shaver,,http://thumbs3.ebaystatic.com/d/l225/m/mzKKlCxbADObevcgoNjbXRg.jpg
-711158535,Panasonic ARC 5 Best Mens Shaver,,http://thumbs4.ebaystatic.com/d/l225/m/mt34Y-u0okj-SqQm8Ng_rbQ.jpg
-711158536,Panasonic Es8092 Wet Dry Electric Razor Shaver Cordless,,http://thumbs3.ebaystatic.com/d/l225/m/mlIxTz1LsVjXiZz2CzDquJw.jpg
-711158537,Panasonic ARC4 ES-RF31-s Rechargeable Electric Shaver Wet/dry 4 Nanotech Blade,"Made for folks who need a great shave, the Panasonic electric shaver is convenient and consistent. Featuring an ergonomic design, this Panasonic ES-RF31-S is ideal for keeping a stubble-free face, so you can retain wonderfully smooth skin. With the precision blades included on the Panasonic electric shaver, you can get smooth shaves with every use. As this men's electric shaver features a gentle shaving mechanism, you can help avoid burning sensations on tender skin. Make sure you consistently get multiple perfect shaves without depleting the power with the exceptional shave time typical of this Panasonic ES-RF31-S.",http://thumbs1.ebaystatic.com/d/l225/m/mi4QM99Jq4oma5WLAL0K7Wg.jpg
-711158538,"Panasonic ES3831K Single Blade Travel Shaver, Black New","Strong and trustworthy, the Panasonic electric shaver is built for folks who are worried about a wonderful shave every day. This Panasonic ES3833S is just right for taming your beard, with an easy-to-maneuver design, so you can retain wonderfully soft skin. Spend as much time as you need getting a complete shave by making use of the outstanding shave time typical of the Panasonic electric shaver. Moreover, this men's electric shaver includes precision foil blades, so you can get wonderful shaves over a prolonged period. With the gentle shaving mechanism on this Panasonic ES3833S, you can help avoid burning sensations on tender skin.",http://thumbs3.ebaystatic.com/d/l225/m/mfqMoj4xDlBFXp1ZznxCGbQ.jpg
-711158539,Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades for Men,,http://thumbs1.ebaystatic.com/d/l225/m/myaZLqzt3I7O-3xXxsJ_4fQ.jpg
-711158540,Panasonic ES8103S Arc3 Electric Shaver Wet/Dry with Nanotech Blades,,http://thumbs1.ebaystatic.com/d/l225/m/mcrO4BkjBkM78XHm-aClRGg.jpg
-711158543,Panasonic ES3831K Single Blade Wet & Dry Travel Shaver - New & Sealed,,http://thumbs4.ebaystatic.com/d/l225/m/mqWDU2mHsFWAuGosMIGcIMg.jpg
-711158544,Panasonic ES8103S Arc 3 E W/O POUCH & MANUAL Men's Wet/Dry Rechargeable Shaver,,http://thumbs2.ebaystatic.com/d/l225/m/mZXgTj-fQfcgAlzOGQYkqFw.jpg
-711158545,PANASONIC ES3831K Pro-Curve Battery Operated Travel Wet/Dry Shaver,,http://thumbs1.ebaystatic.com/d/l225/m/m8McQMCfgdp50trM_YJ88cw.jpg
-711158546,PANASONIC ARC3 ES-LT33-S WET DRY WASHABLE RECHARGEABLE MEN'S ELECTRIC SHAVER NIB,,http://thumbs1.ebaystatic.com/d/l225/m/m9yUif5xyhGfh7Ag-_fcLdA.jpg
-711158547,Panasonic ES-LV81-k Arc 5 Wet & Dry Rechargeable Men's Foil Shaver New,,http://thumbs1.ebaystatic.com/d/l225/m/mEfZHzDoKrH4DBfU8e_K93A.jpg
-711158548,"NEW Panasonic ES-RF31-S 4 Blade Men's Electric Razor Wet/Dry, Factory Sealed",,http://thumbs2.ebaystatic.com/d/l225/m/mfhMhMoDkrGtqWW_IyqVGuQ.jpg
-711158549,Panasonic ES8243A E Arc4 Men's Electric Shaver Wet/Dry,"eBay item number:181670746515
-
-
- Seller assumes all responsibility for this listing.
-
- Last updated on
- Mar 23, 2015 08:55:50 PDT
- View all revisions
-
-
-
-
-
- Item specifics
-
-
-
Condition:
-
Used
- :
-
-
-
",http://thumbs4.ebaystatic.com/d/l225/m/mcxFUwt3FrGEEPzT7cfQn7w.jpg
-711158550,Panasonic ES-3833 Wet/Dry Men Shaver Razor Battery Operate Compact Travel ES3833,,http://thumbs2.ebaystatic.com/d/l225/m/mAqa9pHisKsLSk5nqMg4JJQ.jpg
-711158551,Panasonic Pro-Curve ES3831K Shaver - Dry/Wet Technology - Stainless Steel Foil,,http://thumbs3.ebaystatic.com/d/l225/m/mGqD8eGIwseT5nsM53W3uRQ.jpg
-711158552,Panasonic Wet and Dry Shaver - ES-RW30s ES-RW30-S,"The Panasonic electric shaver is well-suited to shielding particularly sensitive skin and providing a smooth shave. It's both trustworthy and transportable. Because this Panasonic ES-RW30-S has a gentle shaving mechanism, you can avoid irritation and raw feeling skin in particularly tender areas. The Panasonic electric shaver is ideal for ridding yourself of stubble, with its special design, so you can sustain wonderfully supple skin. The exceptional shave time featured on this men's electric shaver helps you to make sure you consistently receive many complete shaves without depleting the power. Plus, this Panasonic ES-RW30-S features precision blades, so you can enjoy smooth shaves for months on end.",http://thumbs1.ebaystatic.com/d/l225/m/mvPElpjXmgo0NhP-P5F8LlQ.jpg
-711158553,Panasonic ES-LF51-A Arc4 Electric Shaver Wet/Dry with Flexible Pivoting Head,,http://thumbs3.ebaystatic.com/d/l225/m/mC_zAQrMQKPLHdENU7N3UjQ.jpg
-711158554,Panasonic ES8103S Arc3 Men's Electric Shaver Wet/Dry with Nanotech Blades,,http://thumbs3.ebaystatic.com/d/l225/m/moBByNwPn93-g-oBBceS2kw.jpg
-711158555,panasonic ARC3 shaver es8103s,,http://thumbs1.ebaystatic.com/d/l225/m/mJlAp6t6OMIOaYgKnyelIMg.jpg
-711158556,Panasonic ES-534 Men's Electric Shaver New ES534 Battery Operated Compact Travel,,http://thumbs3.ebaystatic.com/d/l225/m/mDr2kpZLVSdy1KTPVYK2YUg.jpg
-711158557,Panasonic Portable Shaving Machine Cclippers Washable Single Blade Shaver+Brush,,http://thumbs3.ebaystatic.com/d/l225/m/mJdzJPoOALps0Lv4WtW2b0A.jpg
-711158559,Baratza Solis Maestro Conical Burr Coffee Bean Grinder Works Great Nice Cond,,http://thumbs4.ebaystatic.com/d/l225/m/mdjbD7YFR6JRq-pkeajhK7w.jpg
-711158560,Proctor Silex Fresh Grind Electric Coffee Bean Grinder White,,http://thumbs4.ebaystatic.com/d/l225/m/mtXoRn5Ytmqz0GLHYmBUxpA.jpg
-711158561,Cuisinart 8-oz. Supreme Grind Automatic Burr Coffee Grinder,,http://thumbs4.ebaystatic.com/d/l225/m/my_9cXPvwwRVFqo6MXWfpag.jpg
diff --git a/examples/kaggle/schema-kaggle.toml b/examples/kaggle/schema-kaggle.toml
deleted file mode 100644
index bd729904b..000000000
--- a/examples/kaggle/schema-kaggle.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-# This schema has been generated ...
-# The order in which the attributes are declared is important,
-# it specify the attribute xxx...
-
-identifier = "id"
-
-[attributes.id]
-displayed = true
-
-[attributes.title]
-displayed = true
-indexed = true
-
-[attributes.description]
-displayed = true
-indexed = true
-
-[attributes.image]
-displayed = true
diff --git a/meilidb-core/Cargo.toml b/meilidb-core/Cargo.toml
index fce1ecdc8..f9dcb6d74 100644
--- a/meilidb-core/Cargo.toml
+++ b/meilidb-core/Cargo.toml
@@ -1,34 +1,55 @@
[package]
name = "meilidb-core"
version = "0.1.0"
-authors = ["Kerollmops "]
+authors = ["Kerollmops "]
edition = "2018"
[dependencies]
-byteorder = "1.3.1"
+arc-swap = "0.4.3"
+bincode = "1.1.4"
+byteorder = "1.3.2"
+crossbeam-channel = "0.3.9"
deunicode = "1.0.0"
-hashbrown = "0.6.0"
-lazy_static = "1.2.0"
-log = "0.4.6"
+env_logger = "0.7.0"
+hashbrown = { version = "0.6.0", features = ["serde"] }
+lmdb-rkv = "0.12.3"
+log = "0.4.8"
+meilidb-schema = { path = "../meilidb-schema", version = "0.1.0" }
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
-rayon = "1.2.0"
+once_cell = "1.2.0"
+ordered-float = { version = "1.0.2", features = ["serde"] }
+rkv = "0.10.2"
sdset = "0.3.2"
-serde = { version = "1.0.88", features = ["derive"] }
+serde = { version = "1.0.99", features = ["derive"] }
+serde_json = "1.0.40"
+siphasher = "0.3.0"
slice-group-by = "0.2.6"
zerocopy = "0.2.8"
-[dependencies.fst]
-git = "https://github.com/Kerollmops/fst.git"
-branch = "arc-byte-slice"
+[dependencies.rmp-serde]
+git = "https://github.com/3Hren/msgpack-rust.git"
+rev = "40b3d48"
+
+[dependencies.rmpv]
+git = "https://github.com/3Hren/msgpack-rust.git"
+rev = "40b3d48"
+features = ["with-serde"]
[dependencies.levenshtein_automata]
git = "https://github.com/Kerollmops/levenshtein-automata.git"
branch = "arc-byte-slice"
features = ["fst_automaton"]
+[dependencies.fst]
+git = "https://github.com/Kerollmops/fst.git"
+branch = "arc-byte-slice"
+
[dev-dependencies]
assert_matches = "1.3"
-
-[features]
-i128 = ["byteorder/i128"]
-nightly = ["hashbrown/nightly", "slice-group-by/nightly"]
+csv = "1.0.7"
+indexmap = { version = "1.2.0", features = ["serde-1"] }
+rustyline = { version = "5.0.0", default-features = false }
+structopt = "0.3.2"
+tempfile = "3.1.0"
+termcolor = "1.0.4"
+toml = "0.5.3"
diff --git a/meilidb/examples/query-database.rs b/meilidb-core/examples/from_file.rs
similarity index 50%
rename from meilidb/examples/query-database.rs
rename to meilidb-core/examples/from_file.rs
index 9677eead6..ec40e13b9 100644
--- a/meilidb/examples/query-database.rs
+++ b/meilidb-core/examples/from_file.rs
@@ -1,45 +1,161 @@
-#[global_allocator]
-static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
-
use std::collections::btree_map::{BTreeMap, Entry};
use std::collections::HashSet;
use std::error::Error;
-use std::io::{self, Write};
+use std::io::Write;
use std::iter::FromIterator;
-use std::path::PathBuf;
+use std::path::{Path, PathBuf};
use std::time::{Instant, Duration};
+use std::{fs, io, sync::mpsc};
-use indexmap::IndexMap;
use rustyline::{Editor, Config};
+use serde::{Serialize, Deserialize};
use structopt::StructOpt;
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
-use meilidb_core::Highlight;
-use meilidb_data::Database;
+use meilidb_core::{Highlight, Database, UpdateResult, BoxUpdateFn};
use meilidb_schema::SchemaAttr;
+const INDEX_NAME: &str = "default";
+
#[derive(Debug, StructOpt)]
-pub struct Opt {
- /// The destination where the database must be created
+struct IndexCommand {
+ /// The destination where the database must be created.
#[structopt(parse(from_os_str))]
- pub database_path: PathBuf,
+ database_path: PathBuf,
- #[structopt(long = "fetch-timeout-ms")]
- pub fetch_timeout_ms: Option,
+ /// The csv file to index.
+ #[structopt(parse(from_os_str))]
+ csv_data_path: PathBuf,
- /// Fields that must be displayed.
- pub displayed_fields: Vec,
+ /// The path to the schema.
+ #[structopt(long, parse(from_os_str))]
+ schema: PathBuf,
- /// The number of returned results
- #[structopt(short = "n", long = "number-results", default_value = "10")]
- pub number_results: usize,
-
- /// The number of characters before and after the first match
- #[structopt(short = "C", long = "context", default_value = "35")]
- pub char_context: usize,
+ #[structopt(long)]
+ update_group_size: Option,
}
-type Document = IndexMap;
+#[derive(Debug, StructOpt)]
+struct SearchCommand {
+ /// The destination where the database must be created.
+ #[structopt(parse(from_os_str))]
+ database_path: PathBuf,
+
+ /// Timeout after which the search will return results.
+ #[structopt(long)]
+ fetch_timeout_ms: Option,
+
+ /// The number of returned results
+ #[structopt(short, long, default_value = "10")]
+ number_results: usize,
+
+ /// The number of characters before and after the first match
+ #[structopt(short = "C", long, default_value = "35")]
+ char_context: usize,
+
+ /// A filter string that can be `!adult` or `adult` to
+ /// filter documents on this specfied field
+ #[structopt(short, long)]
+ filter: Option,
+
+ /// Fields that must be displayed.
+ displayed_fields: Vec,
+}
+
+#[derive(Debug, StructOpt)]
+enum Command {
+ Index(IndexCommand),
+ Search(SearchCommand),
+}
+
+impl Command {
+ fn path(&self) -> &Path {
+ match self {
+ Command::Index(command) => &command.database_path,
+ Command::Search(command) => &command.database_path,
+ }
+ }
+}
+
+#[derive(Serialize, Deserialize)]
+#[serde(transparent)]
+struct Document(indexmap::IndexMap);
+
+fn index_command(command: IndexCommand, database: Database) -> Result<(), Box> {
+ let start = Instant::now();
+
+ let (sender, receiver) = mpsc::sync_channel(100);
+ let update_fn = move |update: UpdateResult| sender.send(update.update_id).unwrap();
+ let index = database.open_index(INDEX_NAME, Some(Box::new(update_fn)))?;
+ let rkv = database.rkv.read().unwrap();
+
+ let schema = {
+ let string = fs::read_to_string(&command.schema)?;
+ toml::from_str(&string).unwrap()
+ };
+
+ let writer = rkv.write().unwrap();
+ match index.main.schema(&writer)? {
+ Some(current_schema) => {
+ if current_schema != schema {
+ return Err(meilidb_core::Error::SchemaDiffer.into())
+ }
+ writer.abort();
+ },
+ None => index.schema_update(writer, schema)?,
+ }
+
+ let mut rdr = csv::Reader::from_path(command.csv_data_path)?;
+ let mut raw_record = csv::StringRecord::new();
+ let headers = rdr.headers()?.clone();
+
+ let mut max_update_id = 0;
+ let mut i = 0;
+ let mut end_of_file = false;
+
+ while !end_of_file {
+ let mut additions = index.documents_addition();
+
+ loop {
+ end_of_file = !rdr.read_record(&mut raw_record)?;
+ if end_of_file { break }
+
+ let document: Document = match raw_record.deserialize(Some(&headers)) {
+ Ok(document) => document,
+ Err(e) => {
+ eprintln!("{:?}", e);
+ continue;
+ }
+ };
+
+ additions.update_document(document);
+
+ print!("\rindexing document {}", i);
+ i += 1;
+
+ if let Some(group_size) = command.update_group_size {
+ if i % group_size == 0 { break }
+ }
+ }
+
+ println!();
+
+ let writer = rkv.write().unwrap();
+ println!("committing update...");
+ let update_id = additions.finalize(writer)?;
+ max_update_id = max_update_id.max(update_id);
+ println!("committed update {}", update_id);
+ }
+
+ println!("Waiting for update {}", max_update_id);
+ for id in receiver {
+ if id == max_update_id { break }
+ }
+
+ println!("database created in {:.2?} at: {:?}", start.elapsed(), command.database_path);
+
+ Ok(())
+}
fn display_highlights(text: &str, ranges: &[usize]) -> io::Result<()> {
let mut stdout = StandardStream::stdout(ColorChoice::Always);
@@ -138,19 +254,16 @@ fn crop_text(
(text, highlights)
}
-fn main() -> Result<(), Box> {
- let _ = env_logger::init();
- let opt = Opt::from_args();
+fn search_command(command: SearchCommand, database: Database) -> Result<(), Box> {
+ let rkv = database.rkv.read().unwrap();
+ let update_fn = None as Option::;
+ let index = database.open_index(INDEX_NAME, update_fn)?;
+ let reader = rkv.read().unwrap();
- let start = Instant::now();
- let database = Database::open(&opt.database_path)?;
+ let schema = index.main.schema(&reader)?;
+ let schema = schema.ok_or(meilidb_core::Error::SchemaMissing)?;
- let index = database.open_index("test")?.unwrap();
- let schema = index.schema();
-
- println!("database prepared for you in {:.2?}", start.elapsed());
-
- let fields = opt.displayed_fields.iter().map(String::as_str);
+ let fields = command.displayed_fields.iter().map(String::as_str);
let fields = HashSet::from_iter(fields);
let config = Config::builder().auto_add_history(true).build();
@@ -162,14 +275,29 @@ fn main() -> Result<(), Box> {
Ok(query) => {
let start_total = Instant::now();
- let builder = match opt.fetch_timeout_ms {
- Some(timeout_ms) => {
- let timeout = Duration::from_millis(timeout_ms);
- index.query_builder().with_fetch_timeout(timeout)
+ let documents = match command.filter {
+ Some(ref filter) => {
+ let filter = filter.as_str();
+ let (positive, filter) = if filter.chars().next() == Some('!') {
+ (false, &filter[1..])
+ } else {
+ (true, filter)
+ };
+
+ let attr = schema.attribute(&filter).expect("Could not find filtered attribute");
+
+ let builder = index.query_builder();
+ let builder = builder.with_filter(|document_id| {
+ let string: String = index.document_attribute(&reader, document_id, attr).unwrap().unwrap();
+ (string == "true") == positive
+ });
+ builder.query(&reader, &query, 0..command.number_results)?
},
- None => index.query_builder(),
+ None => {
+ let builder = index.query_builder();
+ builder.query(&reader, &query, 0..command.number_results)?
+ }
};
- let documents = builder.query(&query, 0..opt.number_results)?;
let mut retrieve_duration = Duration::default();
@@ -179,19 +307,20 @@ fn main() -> Result<(), Box> {
doc.highlights.sort_unstable_by_key(|m| (m.char_index, m.char_length));
let start_retrieve = Instant::now();
- let result = index.document::(Some(&fields), doc.id);
+ let result = index.document::(&reader, Some(&fields), doc.id);
retrieve_duration += start_retrieve.elapsed();
match result {
Ok(Some(document)) => {
- for (name, text) in document {
+ println!("raw-id: {:?}", doc.id);
+ for (name, text) in document.0 {
print!("{}: ", name);
let attr = schema.attribute(&name).unwrap();
let highlights = doc.highlights.iter()
.filter(|m| SchemaAttr::new(m.attribute) == attr)
.cloned();
- let (text, highlights) = crop_text(&text, highlights, opt.char_context);
+ let (text, highlights) = crop_text(&text, highlights, command.char_context);
let areas = create_highlight_areas(&text, &highlights);
display_highlights(&text, &areas)?;
println!();
@@ -214,7 +343,7 @@ fn main() -> Result<(), Box> {
println!();
}
- eprintln!("document field retrieve took {:.2?}", retrieve_duration);
+ eprintln!("whole documents fields retrieve took {:.2?}", retrieve_duration);
eprintln!("===== Found {} results in {:.2?} =====", number_of_documents, start_total.elapsed());
},
Err(err) => {
@@ -225,5 +354,18 @@ fn main() -> Result<(), Box> {
}
readline.save_history("query-history.txt").unwrap();
+
Ok(())
}
+
+fn main() -> Result<(), Box> {
+ env_logger::init();
+
+ let opt = Command::from_args();
+ let database = Database::open_or_create(opt.path())?;
+
+ match opt {
+ Command::Index(command) => index_command(command, database),
+ Command::Search(command) => search_command(command, database),
+ }
+}
diff --git a/meilidb-core/src/automaton.rs b/meilidb-core/src/automaton.rs
deleted file mode 100644
index 1ab845933..000000000
--- a/meilidb-core/src/automaton.rs
+++ /dev/null
@@ -1,44 +0,0 @@
-use lazy_static::lazy_static;
-use levenshtein_automata::{
- LevenshteinAutomatonBuilder as LevBuilder,
- DFA,
-};
-
-lazy_static! {
- static ref LEVDIST0: LevBuilder = LevBuilder::new(0, false);
- static ref LEVDIST1: LevBuilder = LevBuilder::new(1, false);
- static ref LEVDIST2: LevBuilder = LevBuilder::new(2, false);
-}
-
-#[derive(Copy, Clone)]
-enum PrefixSetting {
- Prefix,
- NoPrefix,
-}
-
-fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
- use self::PrefixSetting::{Prefix, NoPrefix};
-
- match query.len() {
- 0 ..= 4 => match setting {
- Prefix => LEVDIST0.build_prefix_dfa(query),
- NoPrefix => LEVDIST0.build_dfa(query),
- },
- 5 ..= 8 => match setting {
- Prefix => LEVDIST1.build_prefix_dfa(query),
- NoPrefix => LEVDIST1.build_dfa(query),
- },
- _ => match setting {
- Prefix => LEVDIST2.build_prefix_dfa(query),
- NoPrefix => LEVDIST2.build_dfa(query),
- },
- }
-}
-
-pub fn build_prefix_dfa(query: &str) -> DFA {
- build_dfa_with_setting(query, PrefixSetting::Prefix)
-}
-
-pub fn build_dfa(query: &str) -> DFA {
- build_dfa_with_setting(query, PrefixSetting::NoPrefix)
-}
diff --git a/meilidb-core/src/automaton/dfa.rs b/meilidb-core/src/automaton/dfa.rs
new file mode 100644
index 000000000..015fdd877
--- /dev/null
+++ b/meilidb-core/src/automaton/dfa.rs
@@ -0,0 +1,51 @@
+use once_cell::sync::OnceCell;
+use levenshtein_automata::{
+ LevenshteinAutomatonBuilder as LevBuilder,
+ DFA,
+};
+
+static LEVDIST0: OnceCell = OnceCell::new();
+static LEVDIST1: OnceCell = OnceCell::new();
+static LEVDIST2: OnceCell = OnceCell::new();
+
+#[derive(Copy, Clone)]
+enum PrefixSetting {
+ Prefix,
+ NoPrefix,
+}
+
+fn build_dfa_with_setting(query: &str, setting: PrefixSetting) -> DFA {
+ use PrefixSetting::{Prefix, NoPrefix};
+
+ match query.len() {
+ 0 ..= 4 => {
+ let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, false));
+ match setting {
+ Prefix => builder.build_prefix_dfa(query),
+ NoPrefix => builder.build_dfa(query),
+ }
+ },
+ 5 ..= 8 => {
+ let builder = LEVDIST1.get_or_init(|| LevBuilder::new(1, false));
+ match setting {
+ Prefix => builder.build_prefix_dfa(query),
+ NoPrefix => builder.build_dfa(query),
+ }
+ },
+ _ => {
+ let builder = LEVDIST2.get_or_init(|| LevBuilder::new(2, false));
+ match setting {
+ Prefix => builder.build_prefix_dfa(query),
+ NoPrefix => builder.build_dfa(query),
+ }
+ },
+ }
+}
+
+pub fn build_prefix_dfa(query: &str) -> DFA {
+ build_dfa_with_setting(query, PrefixSetting::Prefix)
+}
+
+pub fn build_dfa(query: &str) -> DFA {
+ build_dfa_with_setting(query, PrefixSetting::NoPrefix)
+}
diff --git a/meilidb-core/src/automaton/mod.rs b/meilidb-core/src/automaton/mod.rs
new file mode 100644
index 000000000..f1d864a9a
--- /dev/null
+++ b/meilidb-core/src/automaton/mod.rs
@@ -0,0 +1,219 @@
+mod dfa;
+mod query_enhancer;
+
+use std::cmp::Reverse;
+use std::vec;
+
+use fst::{IntoStreamer, Streamer};
+use levenshtein_automata::DFA;
+use meilidb_tokenizer::{split_query_string, is_cjk};
+
+use crate::store;
+use crate::error::MResult;
+
+use self::dfa::{build_dfa, build_prefix_dfa};
+use self::query_enhancer::QueryEnhancerBuilder;
+pub use self::query_enhancer::QueryEnhancer;
+
+const NGRAMS: usize = 3;
+
+pub struct AutomatonProducer {
+ automatons: Vec>,
+}
+
+impl AutomatonProducer {
+ pub fn new(
+ reader: &impl rkv::Readable,
+ query: &str,
+ main_store: store::Main,
+ synonyms_store: store::Synonyms,
+ ) -> MResult<(AutomatonProducer, QueryEnhancer)>
+ {
+ let (automatons, query_enhancer) = generate_automatons(
+ reader,
+ query,
+ main_store,
+ synonyms_store,
+ )?;
+
+ Ok((AutomatonProducer { automatons }, query_enhancer))
+ }
+
+ pub fn into_iter(self) -> vec::IntoIter> {
+ self.automatons.into_iter()
+ }
+}
+
+#[derive(Debug)]
+pub struct Automaton {
+ pub index: usize,
+ pub ngram: usize,
+ pub query_len: usize,
+ pub is_exact: bool,
+ pub is_prefix: bool,
+ pub query: String,
+}
+
+impl Automaton {
+ pub fn dfa(&self) -> DFA {
+ if self.is_prefix {
+ build_prefix_dfa(&self.query)
+ } else {
+ build_dfa(&self.query)
+ }
+ }
+
+ fn exact(index: usize, ngram: usize, query: &str) -> Automaton {
+ Automaton {
+ index,
+ ngram,
+ query_len: query.len(),
+ is_exact: true,
+ is_prefix: false,
+ query: query.to_string(),
+ }
+ }
+
+ fn prefix_exact(index: usize, ngram: usize, query: &str) -> Automaton {
+ Automaton {
+ index,
+ ngram,
+ query_len: query.len(),
+ is_exact: true,
+ is_prefix: true,
+ query: query.to_string(),
+ }
+ }
+
+ fn non_exact(index: usize, ngram: usize, query: &str) -> Automaton {
+ Automaton {
+ index,
+ ngram,
+ query_len: query.len(),
+ is_exact: false,
+ is_prefix: false,
+ query: query.to_string(),
+ }
+ }
+}
+
+pub fn normalize_str(string: &str) -> String {
+ let mut string = string.to_lowercase();
+
+ if !string.contains(is_cjk) {
+ string = deunicode::deunicode_with_tofu(&string, "");
+ }
+
+ string
+}
+
+fn generate_automatons(
+ reader: &impl rkv::Readable,
+ query: &str,
+ main_store: store::Main,
+ synonym_store: store::Synonyms,
+) -> MResult<(Vec>, QueryEnhancer)>
+{
+ let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
+ let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
+ let synonyms = match main_store.synonyms_fst(reader)? {
+ Some(synonym) => synonym,
+ None => fst::Set::default(),
+ };
+
+ let mut automaton_index = 0;
+ let mut automatons = Vec::new();
+ let mut enhancer_builder = QueryEnhancerBuilder::new(&query_words);
+
+ // We must not declare the original words to the query enhancer
+ // *but* we need to push them in the automatons list first
+ let mut original_automatons = Vec::new();
+ let mut original_words = query_words.iter().peekable();
+ while let Some(word) = original_words.next() {
+
+ let has_following_word = original_words.peek().is_some();
+ let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
+
+ let automaton = if not_prefix_dfa {
+ Automaton::exact(automaton_index, 1, word)
+ } else {
+ Automaton::prefix_exact(automaton_index, 1, word)
+ };
+ automaton_index += 1;
+ original_automatons.push(automaton);
+ }
+
+ automatons.push(original_automatons);
+
+ for n in 1..=NGRAMS {
+ let mut ngrams = query_words.windows(n).enumerate().peekable();
+ while let Some((query_index, ngram_slice)) = ngrams.next() {
+
+ let query_range = query_index..query_index + n;
+ let ngram_nb_words = ngram_slice.len();
+ let ngram = ngram_slice.join(" ");
+
+ let has_following_word = ngrams.peek().is_some();
+ let not_prefix_dfa = has_following_word || has_end_whitespace || ngram.chars().all(is_cjk);
+
+ // automaton of synonyms of the ngrams
+ let normalized = normalize_str(&ngram);
+ let lev = if not_prefix_dfa { build_dfa(&normalized) } else { build_prefix_dfa(&normalized) };
+
+ let mut stream = synonyms.search(&lev).into_stream();
+ while let Some(base) = stream.next() {
+
+ // only trigger alternatives when the last word has been typed
+ // i.e. "new " do not but "new yo" triggers alternatives to "new york"
+ let base = std::str::from_utf8(base).unwrap();
+ let base_nb_words = split_query_string(base).count();
+ if ngram_nb_words != base_nb_words { continue }
+
+ if let Some(synonyms) = synonym_store.synonyms(reader, base.as_bytes())? {
+
+ let mut stream = synonyms.into_stream();
+ while let Some(synonyms) = stream.next() {
+ let synonyms = std::str::from_utf8(synonyms).unwrap();
+ let synonyms_words: Vec<_> = split_query_string(synonyms).collect();
+ let nb_synonym_words = synonyms_words.len();
+
+ let real_query_index = automaton_index;
+ enhancer_builder.declare(query_range.clone(), real_query_index, &synonyms_words);
+
+ for synonym in synonyms_words {
+ let automaton = if nb_synonym_words == 1 {
+ Automaton::exact(automaton_index, n, synonym)
+ } else {
+ Automaton::non_exact(automaton_index, n, synonym)
+ };
+ automaton_index += 1;
+ automatons.push(vec![automaton]);
+ }
+ }
+ }
+ }
+
+ if n != 1 {
+ // automaton of concatenation of query words
+ let concat = ngram_slice.concat();
+ let normalized = normalize_str(&concat);
+
+ let real_query_index = automaton_index;
+ enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
+
+ let automaton = Automaton::exact(automaton_index, n, &normalized);
+ automaton_index += 1;
+ automatons.push(vec![automaton]);
+ }
+ }
+ }
+
+ // order automatons, the most important first,
+ // we keep the original automatons at the front.
+ automatons[1..].sort_unstable_by_key(|a| {
+ let a = a.first().unwrap();
+ (Reverse(a.is_exact), Reverse(a.ngram))
+ });
+
+ Ok((automatons, enhancer_builder.build()))
+}
diff --git a/meilidb-core/src/automaton/query_enhancer.rs b/meilidb-core/src/automaton/query_enhancer.rs
new file mode 100644
index 000000000..165c1b094
--- /dev/null
+++ b/meilidb-core/src/automaton/query_enhancer.rs
@@ -0,0 +1,398 @@
+use std::ops::Range;
+use std::cmp::Ordering::{Less, Greater, Equal};
+
+/// Return `true` if the specified range can accept the given replacements words.
+/// Returns `false` if the replacements words are already present in the original query
+/// or if there is fewer replacement words than the range to replace.
+//
+//
+// ## Ignored because already present in original
+//
+// new york city subway
+// -------- ^^^^
+// / \
+// [new york city]
+//
+//
+// ## Ignored because smaller than the original
+//
+// new york city subway
+// -------------
+// \ /
+// [new york]
+//
+//
+// ## Accepted because bigger than the original
+//
+// NYC subway
+// ---
+// / \
+// / \
+// / \
+// / \
+// / \
+// [new york city]
+//
+fn rewrite_range_with(query: &[S], range: Range, words: &[T]) -> bool
+where S: AsRef,
+ T: AsRef,
+{
+ if words.len() <= range.len() {
+ // there is fewer or equal replacement words
+ // than there is already in the replaced range
+ return false
+ }
+
+ // retrieve the part to rewrite but with the length
+ // of the replacement part
+ let original = query.iter().skip(range.start).take(words.len());
+
+ // check if the original query doesn't already contain
+ // the replacement words
+ !original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
+}
+
+type Origin = usize;
+type RealLength = usize;
+
+struct FakeIntervalTree {
+ intervals: Vec<(Range, (Origin, RealLength))>,
+}
+
+impl FakeIntervalTree {
+ fn new(mut intervals: Vec<(Range, (Origin, RealLength))>) -> FakeIntervalTree {
+ intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
+ FakeIntervalTree { intervals }
+ }
+
+ fn query(&self, point: usize) -> Option<(Range, (Origin, RealLength))> {
+ let element = self.intervals.binary_search_by(|(r, _)| {
+ if point >= r.start {
+ if point < r.end { Equal } else { Less }
+ } else { Greater }
+ });
+
+ let n = match element { Ok(n) => n, Err(n) => n };
+
+ match self.intervals.get(n) {
+ Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
+ _otherwise => None,
+ }
+ }
+}
+
+pub struct QueryEnhancerBuilder<'a, S> {
+ query: &'a [S],
+ origins: Vec,
+ real_to_origin: Vec<(Range, (Origin, RealLength))>,
+}
+
+impl> QueryEnhancerBuilder<'_, S> {
+ pub fn new(query: &[S]) -> QueryEnhancerBuilder {
+ // we initialize origins query indices based on their positions
+ let origins: Vec<_> = (0..query.len() + 1).collect();
+ let real_to_origin = origins.iter().map(|&o| (o..o+1, (o, 1))).collect();
+
+ QueryEnhancerBuilder { query, origins, real_to_origin }
+ }
+
+ /// Update the final real to origin query indices mapping.
+ ///
+ /// `range` is the original words range that this `replacement` words replace
+ /// and `real` is the first real query index of these replacement words.
+ pub fn declare(&mut self, range: Range, real: usize, replacement: &[T])
+ where T: AsRef,
+ {
+ // check if the range of original words
+ // can be rewritten with the replacement words
+ if rewrite_range_with(self.query, range.clone(), replacement) {
+
+ // this range can be replaced so we need to
+ // modify the origins accordingly
+ let offset = replacement.len() - range.len();
+
+ let previous_padding = self.origins[range.end - 1];
+ let current_offset = (self.origins[range.end] - 1) - previous_padding;
+ let diff = offset.saturating_sub(current_offset);
+ self.origins[range.end] += diff;
+
+ for r in &mut self.origins[range.end + 1..] {
+ *r += diff;
+ }
+ }
+
+ // we need to store the real number and origins relations
+ // this way it will be possible to know by how many
+ // we need to pad real query indices
+ let real_range = real..real + replacement.len().max(range.len());
+ let real_length = replacement.len();
+ self.real_to_origin.push((real_range, (range.start, real_length)));
+ }
+
+ pub fn build(self) -> QueryEnhancer {
+ QueryEnhancer {
+ origins: self.origins,
+ real_to_origin: FakeIntervalTree::new(self.real_to_origin),
+ }
+ }
+}
+
+pub struct QueryEnhancer {
+ origins: Vec,
+ real_to_origin: FakeIntervalTree,
+}
+
+impl QueryEnhancer {
+ /// Returns the query indices to use to replace this real query index.
+ pub fn replacement(&self, real: u32) -> Range {
+ let real = real as usize;
+
+ // query the fake interval tree with the real query index
+ let (range, (origin, real_length)) =
+ self.real_to_origin
+ .query(real)
+ .expect("real has never been declared");
+
+ // if `real` is the end bound of the range
+ if (range.start + real_length - 1) == real {
+ let mut count = range.len();
+ let mut new_origin = origin;
+ for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
+ let len = slice[1] - slice[0];
+ count = count.saturating_sub(len);
+ if count == 0 { new_origin = origin + i; break }
+ }
+
+ let n = real - range.start;
+ let start = self.origins[origin];
+ let end = self.origins[new_origin + 1];
+ let remaining = (end - start) - n;
+
+ Range { start: (start + n) as u32, end: (start + n + remaining) as u32 }
+
+ } else {
+ // just return the origin along with
+ // the real position of the word
+ let n = real as usize - range.start;
+ let origin = self.origins[origin];
+
+ Range { start: (origin + n) as u32, end: (origin + n + 1) as u32 }
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn original_unmodified() {
+ let query = ["new", "york", "city", "subway"];
+ // 0 1 2 3
+ let mut builder = QueryEnhancerBuilder::new(&query);
+
+ // new york = new york city
+ builder.declare(0..2, 4, &["new", "york", "city"]);
+ // ^ 4 5 6
+
+ let enhancer = builder.build();
+
+ assert_eq!(enhancer.replacement(0), 0..1); // new
+ assert_eq!(enhancer.replacement(1), 1..2); // york
+ assert_eq!(enhancer.replacement(2), 2..3); // city
+ assert_eq!(enhancer.replacement(3), 3..4); // subway
+ assert_eq!(enhancer.replacement(4), 0..1); // new
+ assert_eq!(enhancer.replacement(5), 1..2); // york
+ assert_eq!(enhancer.replacement(6), 2..3); // city
+ }
+
+ #[test]
+ fn simple_growing() {
+ let query = ["new", "york", "subway"];
+ // 0 1 2
+ let mut builder = QueryEnhancerBuilder::new(&query);
+
+ // new york = new york city
+ builder.declare(0..2, 3, &["new", "york", "city"]);
+ // ^ 3 4 5
+
+ let enhancer = builder.build();
+
+ assert_eq!(enhancer.replacement(0), 0..1); // new
+ assert_eq!(enhancer.replacement(1), 1..3); // york
+ assert_eq!(enhancer.replacement(2), 3..4); // subway
+ assert_eq!(enhancer.replacement(3), 0..1); // new
+ assert_eq!(enhancer.replacement(4), 1..2); // york
+ assert_eq!(enhancer.replacement(5), 2..3); // city
+ }
+
+ #[test]
+ fn same_place_growings() {
+ let query = ["NY", "subway"];
+ // 0 1
+ let mut builder = QueryEnhancerBuilder::new(&query);
+
+ // NY = new york
+ builder.declare(0..1, 2, &["new", "york"]);
+ // ^ 2 3
+
+ // NY = new york city
+ builder.declare(0..1, 4, &["new", "york", "city"]);
+ // ^ 4 5 6
+
+ // NY = NYC
+ builder.declare(0..1, 7, &["NYC"]);
+ // ^ 7
+
+ // NY = new york city
+ builder.declare(0..1, 8, &["new", "york", "city"]);
+ // ^ 8 9 10
+
+ // subway = underground train
+ builder.declare(1..2, 11, &["underground", "train"]);
+ // ^ 11 12
+
+ let enhancer = builder.build();
+
+ assert_eq!(enhancer.replacement(0), 0..3); // NY
+ assert_eq!(enhancer.replacement(1), 3..5); // subway
+ assert_eq!(enhancer.replacement(2), 0..1); // new
+ assert_eq!(enhancer.replacement(3), 1..3); // york
+ assert_eq!(enhancer.replacement(4), 0..1); // new
+ assert_eq!(enhancer.replacement(5), 1..2); // york
+ assert_eq!(enhancer.replacement(6), 2..3); // city
+ assert_eq!(enhancer.replacement(7), 0..3); // NYC
+ assert_eq!(enhancer.replacement(8), 0..1); // new
+ assert_eq!(enhancer.replacement(9), 1..2); // york
+ assert_eq!(enhancer.replacement(10), 2..3); // city
+ assert_eq!(enhancer.replacement(11), 3..4); // underground
+ assert_eq!(enhancer.replacement(12), 4..5); // train
+ }
+
+ #[test]
+ fn bigger_growing() {
+ let query = ["NYC", "subway"];
+ // 0 1
+ let mut builder = QueryEnhancerBuilder::new(&query);
+
+ // NYC = new york city
+ builder.declare(0..1, 2, &["new", "york", "city"]);
+ // ^ 2 3 4
+
+ let enhancer = builder.build();
+
+ assert_eq!(enhancer.replacement(0), 0..3); // NYC
+ assert_eq!(enhancer.replacement(1), 3..4); // subway
+ assert_eq!(enhancer.replacement(2), 0..1); // new
+ assert_eq!(enhancer.replacement(3), 1..2); // york
+ assert_eq!(enhancer.replacement(4), 2..3); // city
+ }
+
+ #[test]
+ fn middle_query_growing() {
+ let query = ["great", "awesome", "NYC", "subway"];
+ // 0 1 2 3
+ let mut builder = QueryEnhancerBuilder::new(&query);
+
+ // NYC = new york city
+ builder.declare(2..3, 4, &["new", "york", "city"]);
+ // ^ 4 5 6
+
+ let enhancer = builder.build();
+
+ assert_eq!(enhancer.replacement(0), 0..1); // great
+ assert_eq!(enhancer.replacement(1), 1..2); // awesome
+ assert_eq!(enhancer.replacement(2), 2..5); // NYC
+ assert_eq!(enhancer.replacement(3), 5..6); // subway
+ assert_eq!(enhancer.replacement(4), 2..3); // new
+ assert_eq!(enhancer.replacement(5), 3..4); // york
+ assert_eq!(enhancer.replacement(6), 4..5); // city
+ }
+
+ #[test]
+ fn end_query_growing() {
+ let query = ["NYC", "subway"];
+ // 0 1
+ let mut builder = QueryEnhancerBuilder::new(&query);
+
+ // NYC = new york city
+ builder.declare(1..2, 2, &["underground", "train"]);
+ // ^ 2 3
+
+ let enhancer = builder.build();
+
+ assert_eq!(enhancer.replacement(0), 0..1); // NYC
+ assert_eq!(enhancer.replacement(1), 1..3); // subway
+ assert_eq!(enhancer.replacement(2), 1..2); // underground
+ assert_eq!(enhancer.replacement(3), 2..3); // train
+ }
+
+ #[test]
+ fn multiple_growings() {
+ let query = ["great", "awesome", "NYC", "subway"];
+ // 0 1 2 3
+ let mut builder = QueryEnhancerBuilder::new(&query);
+
+ // NYC = new york city
+ builder.declare(2..3, 4, &["new", "york", "city"]);
+ // ^ 4 5 6
+
+ // subway = underground train
+ builder.declare(3..4, 7, &["underground", "train"]);
+ // ^ 7 8
+
+ let enhancer = builder.build();
+
+ assert_eq!(enhancer.replacement(0), 0..1); // great
+ assert_eq!(enhancer.replacement(1), 1..2); // awesome
+ assert_eq!(enhancer.replacement(2), 2..5); // NYC
+ assert_eq!(enhancer.replacement(3), 5..7); // subway
+ assert_eq!(enhancer.replacement(4), 2..3); // new
+ assert_eq!(enhancer.replacement(5), 3..4); // york
+ assert_eq!(enhancer.replacement(6), 4..5); // city
+ assert_eq!(enhancer.replacement(7), 5..6); // underground
+ assert_eq!(enhancer.replacement(8), 6..7); // train
+ }
+
+ #[test]
+ fn multiple_probable_growings() {
+ let query = ["great", "awesome", "NYC", "subway"];
+ // 0 1 2 3
+ let mut builder = QueryEnhancerBuilder::new(&query);
+
+ // NYC = new york city
+ builder.declare(2..3, 4, &["new", "york", "city"]);
+ // ^ 4 5 6
+
+ // subway = underground train
+ builder.declare(3..4, 7, &["underground", "train"]);
+ // ^ 7 8
+
+ // great awesome = good
+ builder.declare(0..2, 9, &["good"]);
+ // ^ 9
+
+ // awesome NYC = NY
+ builder.declare(1..3, 10, &["NY"]);
+ // ^^ 10
+
+ // NYC subway = metro
+ builder.declare(2..4, 11, &["metro"]);
+ // ^^ 11
+
+ let enhancer = builder.build();
+
+ assert_eq!(enhancer.replacement(0), 0..1); // great
+ assert_eq!(enhancer.replacement(1), 1..2); // awesome
+ assert_eq!(enhancer.replacement(2), 2..5); // NYC
+ assert_eq!(enhancer.replacement(3), 5..7); // subway
+ assert_eq!(enhancer.replacement(4), 2..3); // new
+ assert_eq!(enhancer.replacement(5), 3..4); // york
+ assert_eq!(enhancer.replacement(6), 4..5); // city
+ assert_eq!(enhancer.replacement(7), 5..6); // underground
+ assert_eq!(enhancer.replacement(8), 6..7); // train
+ assert_eq!(enhancer.replacement(9), 0..2); // good
+ assert_eq!(enhancer.replacement(10), 1..5); // NY
+ assert_eq!(enhancer.replacement(11), 2..5); // metro
+ }
+}
diff --git a/meilidb-core/src/criterion/document_id.rs b/meilidb-core/src/criterion/document_id.rs
index 34d0bd7f5..15549da24 100644
--- a/meilidb-core/src/criterion/document_id.rs
+++ b/meilidb-core/src/criterion/document_id.rs
@@ -10,7 +10,7 @@ impl Criterion for DocumentId {
lhs.id.cmp(&rhs.id)
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
"DocumentId"
}
}
diff --git a/meilidb-core/src/criterion/exact.rs b/meilidb-core/src/criterion/exact.rs
index bde3ca733..820c35aa0 100644
--- a/meilidb-core/src/criterion/exact.rs
+++ b/meilidb-core/src/criterion/exact.rs
@@ -37,7 +37,7 @@ impl Criterion for Exact {
lhs.cmp(&rhs).reverse()
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
"Exact"
}
}
diff --git a/meilidb-core/src/criterion/mod.rs b/meilidb-core/src/criterion/mod.rs
index 6ce42007c..ad02d3023 100644
--- a/meilidb-core/src/criterion/mod.rs
+++ b/meilidb-core/src/criterion/mod.rs
@@ -4,6 +4,7 @@ mod words_proximity;
mod sum_of_words_attribute;
mod sum_of_words_position;
mod exact;
+mod sort_by_attr;
mod document_id;
use std::cmp::Ordering;
@@ -16,13 +17,14 @@ pub use self::{
sum_of_words_attribute::SumOfWordsAttribute,
sum_of_words_position::SumOfWordsPosition,
exact::Exact,
+ sort_by_attr::SortByAttr,
document_id::DocumentId,
};
pub trait Criterion: Send + Sync {
fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
- fn name(&self) -> &'static str;
+ fn name(&self) -> &str;
#[inline]
fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
@@ -35,7 +37,7 @@ impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
(**self).evaluate(lhs, rhs)
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
(**self).name()
}
@@ -49,7 +51,7 @@ impl Criterion for Box {
(**self).evaluate(lhs, rhs)
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
(**self).name()
}
diff --git a/meilidb-core/src/criterion/number_of_words.rs b/meilidb-core/src/criterion/number_of_words.rs
index 43095a066..641385fb1 100644
--- a/meilidb-core/src/criterion/number_of_words.rs
+++ b/meilidb-core/src/criterion/number_of_words.rs
@@ -25,7 +25,7 @@ impl Criterion for NumberOfWords {
lhs.cmp(&rhs).reverse()
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
"NumberOfWords"
}
}
diff --git a/meilidb/src/sort_by_attr.rs b/meilidb-core/src/criterion/sort_by_attr.rs
similarity index 96%
rename from meilidb/src/sort_by_attr.rs
rename to meilidb-core/src/criterion/sort_by_attr.rs
index 83577df13..c19062dd6 100644
--- a/meilidb/src/sort_by_attr.rs
+++ b/meilidb-core/src/criterion/sort_by_attr.rs
@@ -2,9 +2,9 @@ use std::cmp::Ordering;
use std::error::Error;
use std::fmt;
-use meilidb_core::{criterion::Criterion, RawDocument};
-use meilidb_data::RankedMap;
use meilidb_schema::{Schema, SchemaAttr};
+use crate::criterion::Criterion;
+use crate::{RawDocument, RankedMap};
/// An helper struct that permit to sort documents by
/// some of their stored attributes.
@@ -101,7 +101,7 @@ impl<'a> Criterion for SortByAttr<'a> {
}
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
"SortByAttr"
}
}
diff --git a/meilidb-core/src/criterion/sum_of_typos.rs b/meilidb-core/src/criterion/sum_of_typos.rs
index 6736e6caa..9fbf0dab9 100644
--- a/meilidb-core/src/criterion/sum_of_typos.rs
+++ b/meilidb-core/src/criterion/sum_of_typos.rs
@@ -54,7 +54,7 @@ impl Criterion for SumOfTypos {
lhs.cmp(&rhs).reverse()
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
"SumOfTypos"
}
}
diff --git a/meilidb-core/src/criterion/sum_of_words_attribute.rs b/meilidb-core/src/criterion/sum_of_words_attribute.rs
index d5787ef3a..2bf052159 100644
--- a/meilidb-core/src/criterion/sum_of_words_attribute.rs
+++ b/meilidb-core/src/criterion/sum_of_words_attribute.rs
@@ -36,7 +36,7 @@ impl Criterion for SumOfWordsAttribute {
lhs.cmp(&rhs)
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
"SumOfWordsAttribute"
}
}
diff --git a/meilidb-core/src/criterion/sum_of_words_position.rs b/meilidb-core/src/criterion/sum_of_words_position.rs
index 13f26774c..d5dd10ab7 100644
--- a/meilidb-core/src/criterion/sum_of_words_position.rs
+++ b/meilidb-core/src/criterion/sum_of_words_position.rs
@@ -36,7 +36,7 @@ impl Criterion for SumOfWordsPosition {
lhs.cmp(&rhs)
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
"SumOfWordsPosition"
}
}
diff --git a/meilidb-core/src/criterion/words_proximity.rs b/meilidb-core/src/criterion/words_proximity.rs
index 10f167bef..ed3775b50 100644
--- a/meilidb-core/src/criterion/words_proximity.rs
+++ b/meilidb-core/src/criterion/words_proximity.rs
@@ -99,7 +99,7 @@ impl Criterion for WordsProximity {
lhs.cmp(&rhs)
}
- fn name(&self) -> &'static str {
+ fn name(&self) -> &str {
"WordsProximity"
}
}
diff --git a/meilidb-core/src/database.rs b/meilidb-core/src/database.rs
new file mode 100644
index 000000000..c74bfcc7a
--- /dev/null
+++ b/meilidb-core/src/database.rs
@@ -0,0 +1,177 @@
+use std::collections::HashMap;
+use std::path::Path;
+use std::sync::{Arc, RwLock};
+use std::{fs, thread};
+
+use crossbeam_channel::Receiver;
+use log::{debug, error};
+
+use crate::{store, update, Index, MResult};
+
+pub type BoxUpdateFn = Box;
+type ArcSwapFn = arc_swap::ArcSwapOption;
+
+pub struct Database {
+ pub rkv: Arc>,
+ main_store: rkv::SingleStore,
+ indexes_store: rkv::SingleStore,
+ indexes: RwLock, thread::JoinHandle<()>)>>,
+}
+
+fn update_awaiter(
+ receiver: Receiver<()>,
+ rkv: Arc>,
+ update_fn: Arc,
+ index: Index,
+)
+{
+ for () in receiver {
+ // consume all updates in order (oldest first)
+ loop {
+ let rkv = match rkv.read() {
+ Ok(rkv) => rkv,
+ Err(e) => { error!("rkv RwLock read failed: {}", e); break }
+ };
+
+ let mut writer = match rkv.write() {
+ Ok(writer) => writer,
+ Err(e) => { error!("LMDB writer transaction begin failed: {}", e); break }
+ };
+
+ match update::update_task(&mut writer, index.clone()) {
+ Ok(Some(status)) => {
+ if let Err(e) = writer.commit() { error!("update transaction failed: {}", e) }
+
+ if let Some(ref callback) = *update_fn.load() {
+ (callback)(status);
+ }
+ },
+ // no more updates to handle for now
+ Ok(None) => { debug!("no more updates"); writer.abort(); break },
+ Err(e) => { error!("update task failed: {}", e); writer.abort() },
+ }
+ }
+ }
+}
+
+impl Database {
+ pub fn open_or_create(path: impl AsRef) -> MResult {
+ let manager = rkv::Manager::singleton();
+ let mut rkv_write = manager.write().unwrap();
+
+ fs::create_dir_all(path.as_ref())?;
+
+ let rkv = rkv_write
+ .get_or_create(path.as_ref(), |path| {
+ let mut builder = rkv::Rkv::environment_builder();
+ builder.set_max_dbs(3000).set_map_size(10 * 1024 * 1024 * 1024); // 10GB
+ rkv::Rkv::from_env(path, builder)
+ })?;
+
+ drop(rkv_write);
+
+ let rkv_read = rkv.read().unwrap();
+ let create_options = rkv::store::Options::create();
+ let main_store = rkv_read.open_single("main", create_options)?;
+ let indexes_store = rkv_read.open_single("indexes", create_options)?;
+
+ // list all indexes that needs to be opened
+ let mut must_open = Vec::new();
+ let reader = rkv_read.read()?;
+ for result in indexes_store.iter_start(&reader)? {
+ let (key, _) = result?;
+ if let Ok(index_name) = std::str::from_utf8(key) {
+ must_open.push(index_name.to_owned());
+ }
+ }
+
+ drop(reader);
+
+ // open the previously aggregated indexes
+ let mut indexes = HashMap::new();
+ for index_name in must_open {
+
+ let (sender, receiver) = crossbeam_channel::bounded(100);
+ let index = store::open(&rkv_read, &index_name, sender.clone())?;
+ let update_fn = Arc::new(ArcSwapFn::empty());
+
+ let rkv_clone = rkv.clone();
+ let index_clone = index.clone();
+ let update_fn_clone = update_fn.clone();
+
+ let handle = thread::spawn(move || {
+ update_awaiter(receiver, rkv_clone, update_fn_clone, index_clone)
+ });
+
+ // send an update notification to make sure that
+ // possible previous boot updates are consumed
+ sender.send(()).unwrap();
+
+ let result = indexes.insert(index_name, (index, update_fn, handle));
+ assert!(result.is_none(), "The index should not have been already open");
+ }
+
+ drop(rkv_read);
+
+ Ok(Database { rkv, main_store, indexes_store, indexes: RwLock::new(indexes) })
+ }
+
+ pub fn open_index(
+ &self,
+ name: impl Into,
+ update_fn: Option,
+ ) -> MResult
+ {
+ let indexes_lock = self.indexes.read().unwrap();
+ let name = name.into();
+
+ match indexes_lock.get(&name) {
+ Some((index, old_update_fn, _)) => {
+ old_update_fn.swap(update_fn.map(Arc::new));
+ Ok(index.clone())
+ },
+ None => {
+ drop(indexes_lock);
+
+ let rkv_lock = self.rkv.read().unwrap();
+ let (sender, receiver) = crossbeam_channel::bounded(100);
+ let index = store::create(&rkv_lock, &name, sender)?;
+
+ let mut writer = rkv_lock.write()?;
+ let value = rkv::Value::Blob(&[]);
+ self.indexes_store.put(&mut writer, &name, &value)?;
+
+ {
+ let mut indexes_write = self.indexes.write().unwrap();
+ indexes_write.entry(name).or_insert_with(|| {
+ let rkv_clone = self.rkv.clone();
+ let index_clone = index.clone();
+
+ let update_fn = update_fn.map(Arc::new);
+ let update_fn = Arc::new(ArcSwapFn::new(update_fn));
+ let update_fn_clone = update_fn.clone();
+
+ let handle = thread::spawn(move || {
+ update_awaiter(receiver, rkv_clone, update_fn_clone, index_clone)
+ });
+
+ (index.clone(), update_fn, handle)
+ });
+ }
+
+ writer.commit()?;
+
+ Ok(index)
+ },
+ }
+ }
+
+ pub fn indexes_names(&self) -> MResult> {
+ let indexes = self.indexes.read().unwrap();
+ Ok(indexes.keys().cloned().collect())
+ }
+
+ pub fn main_store(&self) -> rkv::SingleStore {
+ self.main_store
+ }
+}
diff --git a/meilidb-core/src/error.rs b/meilidb-core/src/error.rs
new file mode 100644
index 000000000..db83e39fd
--- /dev/null
+++ b/meilidb-core/src/error.rs
@@ -0,0 +1,112 @@
+use std::{error, fmt, io};
+use crate::serde::{SerializerError, DeserializerError};
+
+pub type MResult = Result;
+
+#[derive(Debug)]
+pub enum Error {
+ Io(io::Error),
+ SchemaDiffer,
+ SchemaMissing,
+ WordIndexMissing,
+ MissingDocumentId,
+ Rkv(rkv::StoreError),
+ Fst(fst::Error),
+ RmpDecode(rmp_serde::decode::Error),
+ RmpEncode(rmp_serde::encode::Error),
+ Bincode(bincode::Error),
+ Serializer(SerializerError),
+ Deserializer(DeserializerError),
+ UnsupportedOperation(UnsupportedOperation),
+}
+
+impl From for Error {
+ fn from(error: io::Error) -> Error {
+ Error::Io(error)
+ }
+}
+
+impl From for Error {
+ fn from(error: rkv::StoreError) -> Error {
+ Error::Rkv(error)
+ }
+}
+
+impl From for Error {
+ fn from(error: fst::Error) -> Error {
+ Error::Fst(error)
+ }
+}
+
+impl From for Error {
+ fn from(error: rmp_serde::decode::Error) -> Error {
+ Error::RmpDecode(error)
+ }
+}
+
+impl From for Error {
+ fn from(error: rmp_serde::encode::Error) -> Error {
+ Error::RmpEncode(error)
+ }
+}
+
+impl From for Error {
+ fn from(error: bincode::Error) -> Error {
+ Error::Bincode(error)
+ }
+}
+
+impl From for Error {
+ fn from(error: SerializerError) -> Error {
+ Error::Serializer(error)
+ }
+}
+
+impl From for Error {
+ fn from(error: DeserializerError) -> Error {
+ Error::Deserializer(error)
+ }
+}
+
+impl From for Error {
+ fn from(op: UnsupportedOperation) -> Error {
+ Error::UnsupportedOperation(op)
+ }
+}
+
+impl fmt::Display for Error {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use self::Error::*;
+ match self {
+ Io(e) => write!(f, "{}", e),
+ SchemaDiffer => write!(f, "schemas differ"),
+ SchemaMissing => write!(f, "this index does not have a schema"),
+ WordIndexMissing => write!(f, "this index does not have a word index"),
+ MissingDocumentId => write!(f, "document id is missing"),
+ Rkv(e) => write!(f, "rkv error; {}", e),
+ Fst(e) => write!(f, "fst error; {}", e),
+ RmpDecode(e) => write!(f, "rmp decode error; {}", e),
+ RmpEncode(e) => write!(f, "rmp encode error; {}", e),
+ Bincode(e) => write!(f, "bincode error; {}", e),
+ Serializer(e) => write!(f, "serializer error; {}", e),
+ Deserializer(e) => write!(f, "deserializer error; {}", e),
+ UnsupportedOperation(op) => write!(f, "unsupported operation; {}", op),
+ }
+ }
+}
+
+impl error::Error for Error { }
+
+#[derive(Debug)]
+pub enum UnsupportedOperation {
+ SchemaAlreadyExists,
+}
+
+impl fmt::Display for UnsupportedOperation {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ use self::UnsupportedOperation::*;
+ match self {
+ SchemaAlreadyExists => write!(f, "Cannot update index which already have a schema"),
+ }
+ }
+}
diff --git a/meilidb-core/src/lib.rs b/meilidb-core/src/lib.rs
index 0a7844292..83b0d9424 100644
--- a/meilidb-core/src/lib.rs
+++ b/meilidb-core/src/lib.rs
@@ -1,25 +1,31 @@
-#![feature(checked_duration_since)]
-
#[cfg(test)]
#[macro_use] extern crate assert_matches;
mod automaton;
+mod database;
mod distinct_map;
+mod error;
+mod number;
mod query_builder;
-mod query_enhancer;
+mod ranked_map;
mod raw_document;
mod reordered_attrs;
-mod store;
+mod update;
pub mod criterion;
+pub mod raw_indexer;
+pub mod serde;
+pub mod store;
-use serde::{Serialize, Deserialize};
-use zerocopy::{AsBytes, FromBytes};
-
-use self::raw_document::raw_documents_from;
-
-pub use self::query_builder::{QueryBuilder, DistinctQueryBuilder, normalize_str};
+pub use self::database::{Database, BoxUpdateFn};
+pub use self::error::{Error, MResult};
+pub use self::number::{Number, ParseNumberError};
+pub use self::ranked_map::RankedMap;
pub use self::raw_document::RawDocument;
-pub use self::store::Store;
+pub use self::store::Index;
+pub use self::update::{UpdateStatus, UpdateResult};
+
+use zerocopy::{AsBytes, FromBytes};
+use ::serde::{Serialize, Deserialize};
/// Represent an internally generated document unique identifier.
///
diff --git a/meilidb-data/src/number.rs b/meilidb-core/src/number.rs
similarity index 100%
rename from meilidb-data/src/number.rs
rename to meilidb-core/src/number.rs
diff --git a/meilidb-core/src/query_builder.rs b/meilidb-core/src/query_builder.rs
index d0c117cad..7bbcf94fb 100644
--- a/meilidb-core/src/query_builder.rs
+++ b/meilidb-core/src/query_builder.rs
@@ -1,248 +1,40 @@
+use hashbrown::HashMap;
use std::hash::Hash;
+use std::mem;
use std::ops::Range;
use std::rc::Rc;
use std::time::{Instant, Duration};
-use std::{mem, cmp, cmp::Reverse};
-use fst::{Streamer, IntoStreamer};
-use hashbrown::HashMap;
-use levenshtein_automata::DFA;
-use log::trace;
-use meilidb_tokenizer::{is_cjk, split_query_string};
-use rayon::slice::ParallelSliceMut;
-use rayon::iter::{ParallelIterator, ParallelBridge};
+use fst::{IntoStreamer, Streamer};
use sdset::SetBuf;
use slice_group_by::{GroupBy, GroupByMut};
-use crate::automaton::{build_dfa, build_prefix_dfa};
-use crate::criterion::Criteria;
+use crate::automaton::{Automaton, AutomatonProducer, QueryEnhancer};
use crate::distinct_map::{DistinctMap, BufferedDistinctMap};
-use crate::query_enhancer::{QueryEnhancerBuilder, QueryEnhancer};
-use crate::raw_documents_from;
-use crate::reordered_attrs::ReorderedAttrs;
-use crate::{TmpMatch, Highlight, DocumentId, Store, RawDocument, Document};
+use crate::raw_document::{RawDocument, raw_documents_from};
+use crate::{Document, DocumentId, Highlight, TmpMatch, criterion::Criteria};
+use crate::{store, MResult, reordered_attrs::ReorderedAttrs};
-const NGRAMS: usize = 3;
-
-struct Automaton {
- index: usize,
- ngram: usize,
- query_len: usize,
- is_exact: bool,
- is_prefix: bool,
- query: String,
-}
-
-impl Automaton {
- fn dfa(&self) -> DFA {
- if self.is_prefix {
- build_prefix_dfa(&self.query)
- } else {
- build_dfa(&self.query)
- }
- }
-
- fn exact(index: usize, ngram: usize, query: &str) -> Automaton {
- Automaton {
- index,
- ngram,
- query_len: query.len(),
- is_exact: true,
- is_prefix: false,
- query: query.to_string(),
- }
- }
-
- fn prefix_exact(index: usize, ngram: usize, query: &str) -> Automaton {
- Automaton {
- index,
- ngram,
- query_len: query.len(),
- is_exact: true,
- is_prefix: true,
- query: query.to_string(),
- }
- }
-
- fn non_exact(index: usize, ngram: usize, query: &str) -> Automaton {
- Automaton {
- index,
- ngram,
- query_len: query.len(),
- is_exact: false,
- is_prefix: false,
- query: query.to_string(),
- }
- }
-}
-
-pub fn normalize_str(string: &str) -> String {
- let mut string = string.to_lowercase();
-
- if !string.contains(is_cjk) {
- string = deunicode::deunicode_with_tofu(&string, "");
- }
-
- string
-}
-
-fn generate_automatons(query: &str, store: &S) -> Result<(Vec, QueryEnhancer), S::Error> {
- let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
- let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
- let synonyms = store.synonyms()?;
-
- let mut automatons = Vec::new();
- let mut enhancer_builder = QueryEnhancerBuilder::new(&query_words);
-
- // We must not declare the original words to the query enhancer
- // *but* we need to push them in the automatons list first
- let mut original_words = query_words.iter().peekable();
- while let Some(word) = original_words.next() {
-
- let has_following_word = original_words.peek().is_some();
- let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
-
- let automaton = if not_prefix_dfa {
- Automaton::exact(automatons.len(), 1, word)
- } else {
- Automaton::prefix_exact(automatons.len(), 1, word)
- };
- automatons.push(automaton);
- }
-
- for n in 1..=NGRAMS {
-
- let mut ngrams = query_words.windows(n).enumerate().peekable();
- while let Some((query_index, ngram_slice)) = ngrams.next() {
-
- let query_range = query_index..query_index + n;
- let ngram_nb_words = ngram_slice.len();
- let ngram = ngram_slice.join(" ");
-
- let has_following_word = ngrams.peek().is_some();
- let not_prefix_dfa = has_following_word || has_end_whitespace || ngram.chars().all(is_cjk);
-
- // automaton of synonyms of the ngrams
- let normalized = normalize_str(&ngram);
- let lev = if not_prefix_dfa { build_dfa(&normalized) } else { build_prefix_dfa(&normalized) };
-
- let mut stream = synonyms.search(&lev).into_stream();
- while let Some(base) = stream.next() {
-
- // only trigger alternatives when the last word has been typed
- // i.e. "new " do not but "new yo" triggers alternatives to "new york"
- let base = std::str::from_utf8(base).unwrap();
- let base_nb_words = split_query_string(base).count();
- if ngram_nb_words != base_nb_words { continue }
-
- if let Some(synonyms) = store.alternatives_to(base.as_bytes())? {
-
- let mut stream = synonyms.into_stream();
- while let Some(synonyms) = stream.next() {
- let synonyms = std::str::from_utf8(synonyms).unwrap();
- let synonyms_words: Vec<_> = split_query_string(synonyms).collect();
- let nb_synonym_words = synonyms_words.len();
-
- let real_query_index = automatons.len();
- enhancer_builder.declare(query_range.clone(), real_query_index, &synonyms_words);
-
- for synonym in synonyms_words {
- let automaton = if nb_synonym_words == 1 {
- Automaton::exact(automatons.len(), n, synonym)
- } else {
- Automaton::non_exact(automatons.len(), n, synonym)
- };
- automatons.push(automaton);
- }
- }
- }
- }
-
- if n != 1 {
- // automaton of concatenation of query words
- let concat = ngram_slice.concat();
- let normalized = normalize_str(&concat);
-
- let real_query_index = automatons.len();
- enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
-
- let automaton = Automaton::exact(automatons.len(), n, &normalized);
- automatons.push(automaton);
- }
- }
- }
-
- // order automatons, the most important first,
- // we keep the original automatons at the front.
- let original_len = query_words.len();
- automatons[original_len..].sort_unstable_by_key(|a| (Reverse(a.is_exact), Reverse(a.ngram)));
-
- Ok((automatons, enhancer_builder.build()))
-}
-
-pub struct QueryBuilder<'c, S, FI = fn(DocumentId) -> bool> {
- store: S,
+pub struct QueryBuilder<'c, FI = fn(DocumentId) -> bool> {
criteria: Criteria<'c>,
searchable_attrs: Option,
filter: Option,
- fetch_timeout: Option,
-}
-
-impl<'c, S> QueryBuilder<'c, S, fn(DocumentId) -> bool> {
- pub fn new(store: S) -> Self {
- QueryBuilder::with_criteria(store, Criteria::default())
- }
-
- pub fn with_criteria(store: S, criteria: Criteria<'c>) -> Self {
- QueryBuilder { store, criteria, searchable_attrs: None, filter: None, fetch_timeout: None }
- }
-}
-
-impl<'c, S, FI> QueryBuilder<'c, S, FI>
-{
- pub fn with_filter(self, function: F) -> QueryBuilder<'c, S, F>
- where F: Fn(DocumentId) -> bool,
- {
- QueryBuilder {
- store: self.store,
- criteria: self.criteria,
- searchable_attrs: self.searchable_attrs,
- filter: Some(function),
- fetch_timeout: self.fetch_timeout,
- }
- }
-
- pub fn with_fetch_timeout(self, timeout: Duration) -> QueryBuilder<'c, S, FI> {
- QueryBuilder { fetch_timeout: Some(timeout), ..self }
- }
-
- pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'c, S, FI, F>
- where F: Fn(DocumentId) -> Option,
- K: Hash + Eq,
- {
- DistinctQueryBuilder { inner: self, function, size }
- }
-
- pub fn add_searchable_attribute(&mut self, attribute: u16) {
- let reorders = self.searchable_attrs.get_or_insert_with(ReorderedAttrs::new);
- reorders.insert_attribute(attribute);
- }
+ timeout: Duration,
+ main_store: store::Main,
+ postings_lists_store: store::PostingsLists,
+ synonyms_store: store::Synonyms,
}
fn multiword_rewrite_matches(
mut matches: Vec<(DocumentId, TmpMatch)>,
query_enhancer: &QueryEnhancer,
- timeout: Option,
) -> SetBuf<(DocumentId, TmpMatch)>
{
let mut padded_matches = Vec::with_capacity(matches.len());
// we sort the matches by word index to make them rewritable
- let start = Instant::now();
- matches.par_sort_unstable_by_key(|(id, match_)| (*id, match_.attribute, match_.word_index));
- trace!("rewrite sort by word_index took {:.2?}", start.elapsed());
+ matches.sort_unstable_by_key(|(id, match_)| (*id, match_.attribute, match_.word_index));
- let start = Instant::now();
// for each attribute of each document
for same_document_attribute in matches.linear_group_by_key(|(id, m)| (*id, m.attribute)) {
@@ -322,194 +114,248 @@ fn multiword_rewrite_matches(
padding += biggest;
}
-
- // check the timeout *after* having processed at least one element
- if timeout.map_or(false, |timeout| start.elapsed() > timeout) { break }
}
- trace!("main multiword rewrite took {:.2?}", start.elapsed());
- let start = Instant::now();
for document_matches in padded_matches.linear_group_by_key_mut(|(id, _)| *id) {
document_matches.sort_unstable();
}
- trace!("final rewrite sort took {:.2?}", start.elapsed());
SetBuf::new_unchecked(padded_matches)
}
-impl<'c, S, FI> QueryBuilder<'c, S, FI>
-where S: Store + Sync,
- S::Error: Send,
+fn fetch_raw_documents(
+ reader: &impl rkv::Readable,
+ automatons: &[Automaton],
+ query_enhancer: &QueryEnhancer,
+ searchables: Option<&ReorderedAttrs>,
+ main_store: &store::Main,
+ postings_lists_store: &store::PostingsLists,
+) -> MResult>
{
- fn query_all(&self, query: &str) -> Result, S::Error> {
- let (automatons, query_enhancer) = generate_automatons(query, &self.store)?;
- let searchables = self.searchable_attrs.as_ref();
- let store = &self.store;
- let fetch_timeout = &self.fetch_timeout;
+ let mut matches = Vec::new();
+ let mut highlights = Vec::new();
- let mut matches = Vec::new();
- let mut highlights = Vec::new();
+ for automaton in automatons {
+ let Automaton { index, is_exact, query_len, .. } = automaton;
+ let dfa = automaton.dfa();
- let timeout = fetch_timeout.map(|d| d * 75 / 100);
- let start = Instant::now();
+ let words = match main_store.words_fst(reader)? {
+ Some(words) => words,
+ None => return Ok(Vec::new()),
+ };
- let results: Vec<_> = automatons
- .into_iter()
- .par_bridge()
- .map_with((store, searchables), |(store, searchables), automaton| {
- let Automaton { index, is_exact, query_len, .. } = automaton;
- let dfa = automaton.dfa();
+ let mut stream = words.search(&dfa).into_stream();
+ while let Some(input) = stream.next() {
+ let distance = dfa.eval(input).to_u8();
+ let is_exact = *is_exact && distance == 0 && input.len() == *query_len;
- let words = match store.words() {
- Ok(words) => words,
- Err(err) => return Some(Err(err)),
- };
+ let doc_indexes = match postings_lists_store.postings_list(reader, input)? {
+ Some(doc_indexes) => doc_indexes,
+ None => continue,
+ };
- let mut stream = words.search(&dfa).into_stream();
- let mut matches = Vec::new();
- let mut highlights = Vec::new();
+ matches.reserve(doc_indexes.len());
+ highlights.reserve(doc_indexes.len());
- while let Some(input) = stream.next() {
- let distance = dfa.eval(input).to_u8();
- let is_exact = is_exact && distance == 0 && input.len() == query_len;
-
- let doc_indexes = match store.word_indexes(input) {
- Ok(Some(doc_indexes)) => doc_indexes,
- Ok(None) => continue,
- Err(err) => return Some(Err(err)),
+ for di in doc_indexes.as_ref() {
+ let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
+ if let Some(attribute) = attribute {
+ let match_ = TmpMatch {
+ query_index: *index as u32,
+ distance,
+ attribute,
+ word_index: di.word_index,
+ is_exact,
};
- matches.reserve(doc_indexes.len());
- highlights.reserve(doc_indexes.len());
+ let highlight = Highlight {
+ attribute: di.attribute,
+ char_index: di.char_index,
+ char_length: di.char_length,
+ };
- for di in doc_indexes.as_slice() {
- let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
- if let Some(attribute) = attribute {
- let match_ = TmpMatch {
- query_index: index as u32,
- distance,
- attribute,
- word_index: di.word_index,
- is_exact,
- };
-
- let highlight = Highlight {
- attribute: di.attribute,
- char_index: di.char_index,
- char_length: di.char_length,
- };
-
- matches.push((di.document_id, match_));
- highlights.push((di.document_id, highlight));
- }
- }
-
- // check the timeout *after* having processed at least one element
- if timeout.map_or(false, |timeout| start.elapsed() > timeout) { break }
+ matches.push((di.document_id, match_));
+ highlights.push((di.document_id, highlight));
}
-
- Some(Ok((matches, highlights)))
- })
- .while_some()
- .collect();
-
- for result in results {
- let (mut rcv_matches, mut rcv_highlights) = result?;
- matches.append(&mut rcv_matches);
- highlights.append(&mut rcv_highlights);
+ }
}
+ }
- trace!("main query all took {:.2?}", start.elapsed());
- trace!("{} total matches to rewrite", matches.len());
+ let matches = multiword_rewrite_matches(matches, &query_enhancer);
+ let highlights = {
+ highlights.sort_unstable_by_key(|(id, _)| *id);
+ SetBuf::new_unchecked(highlights)
+ };
- let start = Instant::now();
- let timeout = fetch_timeout.map(|d| d * 25 / 100);
- let matches = multiword_rewrite_matches(matches, &query_enhancer, timeout);
- trace!("multiword rewrite took {:.2?}", start.elapsed());
+ Ok(raw_documents_from(matches, highlights))
+}
- let start = Instant::now();
- let highlights = {
- highlights.par_sort_unstable_by_key(|(id, _)| *id);
- SetBuf::new_unchecked(highlights)
- };
- trace!("sorting highlights took {:.2?}", start.elapsed());
+impl<'c> QueryBuilder<'c> {
+ pub fn new(
+ main: store::Main,
+ postings_lists: store::PostingsLists,
+ synonyms: store::Synonyms,
+ ) -> QueryBuilder<'c>
+ {
+ QueryBuilder::with_criteria(main, postings_lists, synonyms, Criteria::default())
+ }
- trace!("{} total matches to classify", matches.len());
-
- let start = Instant::now();
- let raw_documents = raw_documents_from(matches, highlights);
- trace!("making raw documents took {:.2?}", start.elapsed());
-
- trace!("{} total documents to classify", raw_documents.len());
-
- Ok(raw_documents)
+ pub fn with_criteria(
+ main: store::Main,
+ postings_lists: store::PostingsLists,
+ synonyms: store::Synonyms,
+ criteria: Criteria<'c>,
+ ) -> QueryBuilder<'c>
+ {
+ QueryBuilder {
+ criteria,
+ searchable_attrs: None,
+ filter: None,
+ timeout: Duration::from_millis(30),
+ main_store: main,
+ postings_lists_store: postings_lists,
+ synonyms_store: synonyms,
+ }
}
}
-impl<'c, S, FI> QueryBuilder<'c, S, FI>
-where S: Store + Sync,
- S::Error: Send,
- FI: Fn(DocumentId) -> bool,
-{
- pub fn query(self, query: &str, range: Range) -> Result, S::Error> {
+impl<'c, FI> QueryBuilder<'c, FI> {
+ pub fn with_filter(self, function: F) -> QueryBuilder<'c, F>
+ where F: Fn(DocumentId) -> bool,
+ {
+ QueryBuilder {
+ criteria: self.criteria,
+ searchable_attrs: self.searchable_attrs,
+ filter: Some(function),
+ timeout: self.timeout,
+ main_store: self.main_store,
+ postings_lists_store: self.postings_lists_store,
+ synonyms_store: self.synonyms_store,
+ }
+ }
+
+ pub fn with_fetch_timeout(self, timeout: Duration) -> QueryBuilder<'c, FI> {
+ QueryBuilder { timeout, ..self }
+ }
+
+ pub fn with_distinct(self, function: F, size: usize) -> DistinctQueryBuilder<'c, FI, F>
+ where F: Fn(DocumentId) -> Option,
+ K: Hash + Eq,
+ {
+ DistinctQueryBuilder { inner: self, function, size }
+ }
+
+ pub fn add_searchable_attribute(&mut self, attribute: u16) {
+ let reorders = self.searchable_attrs.get_or_insert_with(ReorderedAttrs::new);
+ reorders.insert_attribute(attribute);
+ }
+}
+
+impl QueryBuilder<'_, FI> where FI: Fn(DocumentId) -> bool {
+ pub fn query(
+ self,
+ reader: &impl rkv::Readable,
+ query: &str,
+ range: Range,
+ ) -> MResult>
+ {
// We delegate the filter work to the distinct query builder,
// specifying a distinct rule that has no effect.
if self.filter.is_some() {
let builder = self.with_distinct(|_| None as Option<()>, 1);
- return builder.query(query, range);
+ return builder.query(reader, query, range);
}
- let start = Instant::now();
- let mut documents = self.query_all(query)?;
- trace!("query_all took {:.2?}", start.elapsed());
+ let start_processing = Instant::now();
+ let mut raw_documents_processed = Vec::with_capacity(range.len());
- let mut groups = vec![documents.as_mut_slice()];
+ let (automaton_producer, query_enhancer) = AutomatonProducer::new(
+ reader,
+ query,
+ self.main_store,
+ self.synonyms_store,
+ )?;
- 'criteria: for criterion in self.criteria.as_ref() {
- let tmp_groups = mem::replace(&mut groups, Vec::new());
- let mut documents_seen = 0;
+ let mut automaton_producer = automaton_producer.into_iter();
+ let mut automatons = Vec::new();
- for group in tmp_groups {
- // if this group does not overlap with the requested range,
- // push it without sorting and splitting it
- if documents_seen + group.len() < range.start {
- documents_seen += group.len();
- groups.push(group);
- continue;
- }
+ // aggregate automatons groups by groups after time
+ while let Some(auts) = automaton_producer.next() {
+ automatons.extend(auts);
- let start = Instant::now();
- group.par_sort_unstable_by(|a, b| criterion.evaluate(a, b));
- trace!("criterion {} sort took {:.2?}", criterion.name(), start.elapsed());
+ // we must retrieve the documents associated
+ // with the current automatons
+ let mut raw_documents = fetch_raw_documents(
+ reader,
+ &automatons,
+ &query_enhancer,
+ self.searchable_attrs.as_ref(),
+ &self.main_store,
+ &self.postings_lists_store,
+ )?;
- for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
- trace!("criterion {} produced a group of size {}", criterion.name(), group.len());
+ // stop processing when time is running out
+ if !raw_documents_processed.is_empty() && start_processing.elapsed() > self.timeout {
+ break
+ }
- documents_seen += group.len();
- groups.push(group);
+ let mut groups = vec![raw_documents.as_mut_slice()];
- // we have sort enough documents if the last document sorted is after
- // the end of the requested range, we can continue to the next criterion
- if documents_seen >= range.end { continue 'criteria }
+ 'criteria: for criterion in self.criteria.as_ref() {
+ let tmp_groups = mem::replace(&mut groups, Vec::new());
+ let mut documents_seen = 0;
+
+ for group in tmp_groups {
+ // if this group does not overlap with the requested range,
+ // push it without sorting and splitting it
+ if documents_seen + group.len() < range.start {
+ documents_seen += group.len();
+ groups.push(group);
+ continue;
+ }
+
+ group.sort_unstable_by(|a, b| criterion.evaluate(a, b));
+
+ for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
+ documents_seen += group.len();
+ groups.push(group);
+
+ // we have sort enough documents if the last document sorted is after
+ // the end of the requested range, we can continue to the next criterion
+ if documents_seen >= range.end { continue 'criteria }
+ }
}
}
+
+ // once we classified the documents related to the current
+ // automatons we save that as the next valid result
+ let iter = raw_documents.into_iter().skip(range.start).take(range.len());
+ raw_documents_processed.clear();
+ raw_documents_processed.extend(iter);
+
+ // stop processing when time is running out
+ if start_processing.elapsed() > self.timeout { break }
}
- let offset = cmp::min(documents.len(), range.start);
- let iter = documents.into_iter().skip(offset).take(range.len());
- Ok(iter.map(|d| Document::from_raw(d)).collect())
+ // make real documents now that we know
+ // those must be returned
+ let documents = raw_documents_processed
+ .into_iter()
+ .map(|d| Document::from_raw(d))
+ .collect();
+
+ Ok(documents)
}
}
-pub struct DistinctQueryBuilder<'c, I, FI, FD> {
- inner: QueryBuilder<'c, I, FI>,
+pub struct DistinctQueryBuilder<'c, FI, FD> {
+ inner: QueryBuilder<'c, FI>,
function: FD,
size: usize,
}
-impl<'c, I, FI, FD> DistinctQueryBuilder<'c, I, FI, FD>
-{
- pub fn with_filter(self, function: F) -> DistinctQueryBuilder<'c, I, F, FD>
+impl<'c, FI, FD> DistinctQueryBuilder<'c, FI, FD> {
+ pub fn with_filter(self, function: F) -> DistinctQueryBuilder<'c, F, FD>
where F: Fn(DocumentId) -> bool,
{
DistinctQueryBuilder {
@@ -519,7 +365,7 @@ impl<'c, I, FI, FD> DistinctQueryBuilder<'c, I, FI, FD>
}
}
- pub fn with_fetch_timeout(self, timeout: Duration) -> DistinctQueryBuilder<'c, I, FI, FD> {
+ pub fn with_fetch_timeout(self, timeout: Duration) -> DistinctQueryBuilder<'c, FI, FD> {
DistinctQueryBuilder {
inner: self.inner.with_fetch_timeout(timeout),
function: self.function,
@@ -532,114 +378,156 @@ impl<'c, I, FI, FD> DistinctQueryBuilder<'c, I, FI, FD>
}
}
-impl<'c, S, FI, FD, K> DistinctQueryBuilder<'c, S, FI, FD>
-where S: Store + Sync,
- S::Error: Send,
- FI: Fn(DocumentId) -> bool,
+impl<'c, FI, FD, K> DistinctQueryBuilder<'c, FI, FD>
+where FI: Fn(DocumentId) -> bool,
FD: Fn(DocumentId) -> Option,
K: Hash + Eq,
{
- pub fn query(self, query: &str, range: Range) -> Result, S::Error> {
- let start = Instant::now();
- let mut documents = self.inner.query_all(query)?;
- trace!("query_all took {:.2?}", start.elapsed());
+ pub fn query(
+ self,
+ reader: &impl rkv::Readable,
+ query: &str,
+ range: Range,
+ ) -> MResult>
+ {
+ let start_processing = Instant::now();
+ let mut raw_documents_processed = Vec::new();
- let mut groups = vec![documents.as_mut_slice()];
- let mut key_cache = HashMap::new();
+ let (automaton_producer, query_enhancer) = AutomatonProducer::new(
+ reader,
+ query,
+ self.inner.main_store,
+ self.inner.synonyms_store,
+ )?;
- let mut filter_map = HashMap::new();
- // these two variables informs on the current distinct map and
- // on the raw offset of the start of the group where the
- // range.start bound is located according to the distinct function
- let mut distinct_map = DistinctMap::new(self.size);
- let mut distinct_raw_offset = 0;
+ let mut automaton_producer = automaton_producer.into_iter();
+ let mut automatons = Vec::new();
- 'criteria: for criterion in self.inner.criteria.as_ref() {
- let tmp_groups = mem::replace(&mut groups, Vec::new());
- let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
- let mut documents_seen = 0;
+ // aggregate automatons groups by groups after time
+ while let Some(auts) = automaton_producer.next() {
+ automatons.extend(auts);
- for group in tmp_groups {
- // if this group does not overlap with the requested range,
- // push it without sorting and splitting it
- if documents_seen + group.len() < distinct_raw_offset {
- documents_seen += group.len();
- groups.push(group);
- continue;
- }
+ // we must retrieve the documents associated
+ // with the current automatons
+ let mut raw_documents = fetch_raw_documents(
+ reader,
+ &automatons,
+ &query_enhancer,
+ self.inner.searchable_attrs.as_ref(),
+ &self.inner.main_store,
+ &self.inner.postings_lists_store,
+ )?;
- let start = Instant::now();
- group.par_sort_unstable_by(|a, b| criterion.evaluate(a, b));
- trace!("criterion {} sort took {:.2?}", criterion.name(), start.elapsed());
+ // stop processing when time is running out
+ if !raw_documents_processed.is_empty() && start_processing.elapsed() > self.inner.timeout {
+ break
+ }
- for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
- // we must compute the real distinguished len of this sub-group
- for document in group.iter() {
- let filter_accepted = match &self.inner.filter {
- Some(filter) => {
- let entry = filter_map.entry(document.id);
- *entry.or_insert_with(|| (filter)(document.id))
- },
- None => true,
- };
+ let mut groups = vec![raw_documents.as_mut_slice()];
+ let mut key_cache = HashMap::new();
- if filter_accepted {
- let entry = key_cache.entry(document.id);
- let key = entry.or_insert_with(|| (self.function)(document.id).map(Rc::new));
+ let mut filter_map = HashMap::new();
+ // these two variables informs on the current distinct map and
+ // on the raw offset of the start of the group where the
+ // range.start bound is located according to the distinct function
+ let mut distinct_map = DistinctMap::new(self.size);
+ let mut distinct_raw_offset = 0;
- match key.clone() {
- Some(key) => buf_distinct.register(key),
- None => buf_distinct.register_without_key(),
+ 'criteria: for criterion in self.inner.criteria.as_ref() {
+ let tmp_groups = mem::replace(&mut groups, Vec::new());
+ let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
+ let mut documents_seen = 0;
+
+ for group in tmp_groups {
+ // if this group does not overlap with the requested range,
+ // push it without sorting and splitting it
+ if documents_seen + group.len() < distinct_raw_offset {
+ documents_seen += group.len();
+ groups.push(group);
+ continue;
+ }
+
+ group.sort_unstable_by(|a, b| criterion.evaluate(a, b));
+
+ for group in group.binary_group_by_mut(|a, b| criterion.eq(a, b)) {
+ // we must compute the real distinguished len of this sub-group
+ for document in group.iter() {
+ let filter_accepted = match &self.inner.filter {
+ Some(filter) => {
+ let entry = filter_map.entry(document.id);
+ *entry.or_insert_with(|| (filter)(document.id))
+ },
+ None => true,
};
+
+ if filter_accepted {
+ let entry = key_cache.entry(document.id);
+ let key = entry.or_insert_with(|| (self.function)(document.id).map(Rc::new));
+
+ match key.clone() {
+ Some(key) => buf_distinct.register(key),
+ None => buf_distinct.register_without_key(),
+ };
+ }
+
+ // the requested range end is reached: stop computing distinct
+ if buf_distinct.len() >= range.end { break }
}
- // the requested range end is reached: stop computing distinct
- if buf_distinct.len() >= range.end { break }
+ documents_seen += group.len();
+ groups.push(group);
+
+ // if this sub-group does not overlap with the requested range
+ // we must update the distinct map and its start index
+ if buf_distinct.len() < range.start {
+ buf_distinct.transfert_to_internal();
+ distinct_raw_offset = documents_seen;
+ }
+
+ // we have sort enough documents if the last document sorted is after
+ // the end of the requested range, we can continue to the next criterion
+ if buf_distinct.len() >= range.end { continue 'criteria }
}
-
- trace!("criterion {} produced a group of size {}", criterion.name(), group.len());
-
- documents_seen += group.len();
- groups.push(group);
-
- // if this sub-group does not overlap with the requested range
- // we must update the distinct map and its start index
- if buf_distinct.len() < range.start {
- buf_distinct.transfert_to_internal();
- distinct_raw_offset = documents_seen;
- }
-
- // we have sort enough documents if the last document sorted is after
- // the end of the requested range, we can continue to the next criterion
- if buf_distinct.len() >= range.end { continue 'criteria }
}
}
- }
- let mut out_documents = Vec::with_capacity(range.len());
- let mut seen = BufferedDistinctMap::new(&mut distinct_map);
+ // once we classified the documents related to the current
+ // automatons we save that as the next valid result
+ let mut seen = BufferedDistinctMap::new(&mut distinct_map);
+ raw_documents_processed.clear();
- for document in documents.into_iter().skip(distinct_raw_offset) {
- let filter_accepted = match &self.inner.filter {
- Some(_) => filter_map.remove(&document.id).expect("BUG: filtered not found"),
- None => true,
- };
-
- if filter_accepted {
- let key = key_cache.remove(&document.id).expect("BUG: cached key not found");
- let distinct_accepted = match key {
- Some(key) => seen.register(key),
- None => seen.register_without_key(),
+ for document in raw_documents.into_iter().skip(distinct_raw_offset) {
+ let filter_accepted = match &self.inner.filter {
+ Some(_) => filter_map.remove(&document.id).unwrap(),
+ None => true,
};
- if distinct_accepted && seen.len() > range.start {
- out_documents.push(Document::from_raw(document));
- if out_documents.len() == range.len() { break }
+ if filter_accepted {
+ let key = key_cache.remove(&document.id).unwrap();
+ let distinct_accepted = match key {
+ Some(key) => seen.register(key),
+ None => seen.register_without_key(),
+ };
+
+ if distinct_accepted && seen.len() > range.start {
+ raw_documents_processed.push(document);
+ if raw_documents_processed.len() == range.len() { break }
+ }
}
}
+
+ // stop processing when time is running out
+ if start_processing.elapsed() > self.inner.timeout { break }
}
- Ok(out_documents)
+ // make real documents now that we know
+ // those must be returned
+ let documents = raw_documents_processed
+ .into_iter()
+ .map(|d| Document::from_raw(d))
+ .collect();
+
+ Ok(documents)
}
}
@@ -650,19 +538,14 @@ mod tests {
use std::collections::{BTreeSet, HashMap};
use std::iter::FromIterator;
- use sdset::SetBuf;
use fst::{Set, IntoStreamer};
+ use sdset::SetBuf;
+ use tempfile::TempDir;
+ use crate::automaton::normalize_str;
+ use crate::database::{Database, BoxUpdateFn};
use crate::DocIndex;
- use crate::store::Store;
-
- #[derive(Default)]
- struct InMemorySetStore {
- set: Set,
- synonyms: Set,
- indexes: HashMap, SetBuf>,
- alternatives: HashMap, Set>,
- }
+ use crate::store::Index;
fn set_from_stream<'f, I, S>(stream: I) -> Set
where
@@ -693,57 +576,6 @@ mod tests {
builder.into_inner().and_then(Set::from_bytes).unwrap()
}
- impl InMemorySetStore {
- pub fn add_synonym(&mut self, word: &str, new: SetBuf<&str>) {
- let word = word.to_lowercase();
- let alternatives = self.alternatives.entry(word.as_bytes().to_vec()).or_default();
- let new = sdset_into_fstset(&new);
- *alternatives = set_from_stream(alternatives.op().add(new.into_stream()).r#union());
-
- self.synonyms = insert_key(&self.synonyms, word.as_bytes());
- }
- }
-
- impl<'a> FromIterator<(&'a str, &'a [DocIndex])> for InMemorySetStore {
- fn from_iter>(iter: I) -> Self {
- let mut tree = BTreeSet::new();
- let mut map = HashMap::new();
-
- for (word, indexes) in iter {
- let word = word.to_lowercase().into_bytes();
- tree.insert(word.clone());
- map.entry(word).or_insert_with(Vec::new).extend_from_slice(indexes);
- }
-
- InMemorySetStore {
- set: Set::from_iter(tree).unwrap(),
- synonyms: Set::default(),
- indexes: map.into_iter().map(|(k, v)| (k, SetBuf::from_dirty(v))).collect(),
- alternatives: HashMap::new(),
- }
- }
- }
-
- impl Store for InMemorySetStore {
- type Error = std::io::Error;
-
- fn words(&self) -> Result<&Set, Self::Error> {
- Ok(&self.set)
- }
-
- fn word_indexes(&self, word: &[u8]) -> Result