mirror of
https://github.com/meilisearch/meilisearch.git
synced 2024-11-23 10:37:41 +08:00
Merge pull request #336 from meilisearch/rename-to-meilisearch
Rename MeiliDB into MeiliSearch
This commit is contained in:
commit
fcc154da1c
8
.github/workflows/publish-binaries.yml
vendored
8
.github/workflows/publish-binaries.yml
vendored
@ -14,11 +14,11 @@ jobs:
|
|||||||
os: [ubuntu-latest, macos-latest]
|
os: [ubuntu-latest, macos-latest]
|
||||||
include:
|
include:
|
||||||
- os: ubuntu-latest
|
- os: ubuntu-latest
|
||||||
artifact_name: meilidb-http
|
artifact_name: meilisearch
|
||||||
asset_name: meilidb-http-linux-amd64
|
asset_name: meilisearch-linux-amd64
|
||||||
- os: macos-latest
|
- os: macos-latest
|
||||||
artifact_name: meilidb-http
|
artifact_name: meilisearch
|
||||||
asset_name: meilidb-http-macos-amd64
|
asset_name: meilisearch-macos-amd64
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: hecrj/setup-rust-action@master
|
- uses: hecrj/setup-rust-action@master
|
||||||
|
2
.github/workflows/test.yml
vendored
2
.github/workflows/test.yml
vendored
@ -5,7 +5,7 @@ name: Cargo test
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
check:
|
check:
|
||||||
name: MeiliDB
|
name: MeiliSearch
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v1
|
- uses: actions/checkout@v1
|
||||||
|
20
Cargo.lock
generated
20
Cargo.lock
generated
@ -930,7 +930,7 @@ version = "2.0.0"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilidb-core"
|
name = "meilisearch-core"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arc-swap 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"arc-swap 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -948,9 +948,9 @@ dependencies = [
|
|||||||
"indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"levenshtein_automata 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"levenshtein_automata 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"meilidb-schema 0.8.0",
|
"meilisearch-schema 0.8.0",
|
||||||
"meilidb-tokenizer 0.8.0",
|
"meilisearch-tokenizer 0.8.0",
|
||||||
"meilidb-types 0.8.0",
|
"meilisearch-types 0.8.0",
|
||||||
"once_cell 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"once_cell 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"rustyline 5.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rustyline 5.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -967,7 +967,7 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilidb-http"
|
name = "meilisearch-http"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"async-compression 0.1.0-alpha.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
"async-compression 0.1.0-alpha.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -982,8 +982,8 @@ dependencies = [
|
|||||||
"jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"main_error 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"main_error 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"meilidb-core 0.8.0",
|
"meilisearch-core 0.8.0",
|
||||||
"meilidb-schema 0.8.0",
|
"meilisearch-schema 0.8.0",
|
||||||
"pretty-bytes 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"pretty-bytes 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -1003,7 +1003,7 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilidb-schema"
|
name = "meilisearch-schema"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -1014,7 +1014,7 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilidb-tokenizer"
|
name = "meilisearch-tokenizer"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deunicode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"deunicode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -1022,7 +1022,7 @@ dependencies = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "meilidb-types"
|
name = "meilisearch-types"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
10
Cargo.toml
10
Cargo.toml
@ -1,10 +1,10 @@
|
|||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = [
|
||||||
"meilidb-core",
|
"meilisearch-core",
|
||||||
"meilidb-http",
|
"meilisearch-http",
|
||||||
"meilidb-schema",
|
"meilisearch-schema",
|
||||||
"meilidb-tokenizer",
|
"meilisearch-tokenizer",
|
||||||
"meilidb-types",
|
"meilisearch-types",
|
||||||
]
|
]
|
||||||
|
|
||||||
[profile.release]
|
[profile.release]
|
||||||
|
@ -23,7 +23,7 @@ RUN apk update --quiet
|
|||||||
RUN apk add libressl
|
RUN apk add libressl
|
||||||
RUN apk add build-base
|
RUN apk add build-base
|
||||||
|
|
||||||
COPY --from=compiler /meilisearch/target/release/meilidb-http .
|
COPY --from=compiler /meilisearch/target/release/meilisearch .
|
||||||
|
|
||||||
ENV MEILI_HTTP_ADDR 0.0.0.0:8080
|
ENV MEILI_HTTP_ADDR 0.0.0.0:8080
|
||||||
CMD ./meilidb-http
|
CMD ./meilisearch
|
||||||
|
38
README.md
38
README.md
@ -1,7 +1,7 @@
|
|||||||
# MeiliDB
|
# MeiliSearch
|
||||||
|
|
||||||
[![Build Status](https://github.com/meilisearch/MeiliDB/workflows/Cargo%20test/badge.svg)](https://github.com/meilisearch/MeiliDB/actions)
|
[![Build Status](https://github.com/meilisearch/MeiliSearch/workflows/Cargo%20test/badge.svg)](https://github.com/meilisearch/MeiliSearch/actions)
|
||||||
[![dependency status](https://deps.rs/repo/github/meilisearch/MeiliDB/status.svg)](https://deps.rs/repo/github/meilisearch/MeiliDB)
|
[![dependency status](https://deps.rs/repo/github/meilisearch/MeiliSearch/status.svg)](https://deps.rs/repo/github/meilisearch/MeiliSearch)
|
||||||
[![License](https://img.shields.io/badge/license-commons%20clause-lightgrey)](https://commonsclause.com/)
|
[![License](https://img.shields.io/badge/license-commons%20clause-lightgrey)](https://commonsclause.com/)
|
||||||
|
|
||||||
Ultra relevant and instant full-text search API.
|
Ultra relevant and instant full-text search API.
|
||||||
@ -10,24 +10,24 @@ MeiliSearch is a powerful, fast, open-source, easy to use and deploy search engi
|
|||||||
|
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L107-L113) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
|
- Provides [6 default ranking criteria](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/criterion/mod.rs#L107-L113) used to [bucket sort](https://en.wikipedia.org/wiki/Bucket_sort) documents
|
||||||
- Accepts [custom criteria](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/criterion/mod.rs#L24-L33) and can apply them in any custom order
|
- Accepts [custom criteria](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/criterion/mod.rs#L24-L33) and can apply them in any custom order
|
||||||
- Support [ranged queries](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L283), useful for paginating results
|
- Support [ranged queries](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/query_builder.rs#L283), useful for paginating results
|
||||||
- Can [distinct](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L265-L270) and [filter](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L246-L259) returned documents based on context defined rules
|
- Can [distinct](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/query_builder.rs#L265-L270) and [filter](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/query_builder.rs#L246-L259) returned documents based on context defined rules
|
||||||
- Searches for [concatenated](https://github.com/meilisearch/MeiliDB/pull/164) and [splitted query words](https://github.com/meilisearch/MeiliDB/pull/232) to improve the search quality.
|
- Searches for [concatenated](https://github.com/meilisearch/MeiliSearch/pull/164) and [splitted query words](https://github.com/meilisearch/MeiliSearch/pull/232) to improve the search quality.
|
||||||
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-schema/src/lib.rs#L265-L279)
|
- Can store complete documents or only [user schema specified fields](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-schema/src/lib.rs#L265-L279)
|
||||||
- The [default tokenizer](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-tokenizer/src/lib.rs) can index latin and kanji based languages
|
- The [default tokenizer](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-tokenizer/src/lib.rs) can index latin and kanji based languages
|
||||||
- Returns [the matching text areas](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/lib.rs#L66-L88), useful to highlight matched words in results
|
- Returns [the matching text areas](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/lib.rs#L66-L88), useful to highlight matched words in results
|
||||||
- Accepts query time search config like the [searchable attributes](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/query_builder.rs#L272-L275)
|
- Accepts query time search config like the [searchable attributes](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/query_builder.rs#L272-L275)
|
||||||
- Supports [runtime incremental indexing](https://github.com/meilisearch/MeiliDB/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilidb-core/src/store/mod.rs#L143-L173)
|
- Supports [runtime incremental indexing](https://github.com/meilisearch/MeiliSearch/blob/dc5c42821e1340e96cb90a3da472264624a26326/meilisearch-core/src/store/mod.rs#L143-L173)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
It uses [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliDB/issues/82) and provides great performances.
|
It uses [LMDB](https://en.wikipedia.org/wiki/Lightning_Memory-Mapped_Database) as the internal key-value store. The key-value store allows us to handle updates and queries with small memory and CPU overheads. The whole ranking system is [data oriented](https://github.com/meilisearch/MeiliSearch/issues/82) and provides great performances.
|
||||||
|
|
||||||
You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.
|
You can [read the deep dive](deep-dive.md) if you want more information on the engine, it describes the whole process of generating updates and handling queries or you can take a look at the [typos and ranking rules](typos-ranking-rules.md) if you want to know the default rules used to sort the documents.
|
||||||
|
|
||||||
We will be glad if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliDB/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
|
We will be glad if you submit issues and pull requests. You can help to grow this project and start contributing by checking [issues tagged "good-first-issue"](https://github.com/meilisearch/MeiliSearch/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22). It is a good start!
|
||||||
|
|
||||||
[![crates.io demo gif](misc/crates-io-demo.gif)](https://crates.meilisearch.com)
|
[![crates.io demo gif](misc/crates-io-demo.gif)](https://crates.meilisearch.com)
|
||||||
|
|
||||||
@ -37,7 +37,7 @@ We will be glad if you submit issues and pull requests. You can help to grow thi
|
|||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
You can deploy your own instant, relevant and typo-tolerant MeiliDB search engine by yourself too.
|
You can deploy your own instant, relevant and typo-tolerant MeiliSearch search engine by yourself too.
|
||||||
Something similar to the demo above can be achieve by following these little three steps first.
|
Something similar to the demo above can be achieve by following these little three steps first.
|
||||||
You will need to create your own web front display to make it pretty though.
|
You will need to create your own web front display to make it pretty though.
|
||||||
|
|
||||||
@ -57,7 +57,7 @@ RUST_LOG=info cargo run --release
|
|||||||
|
|
||||||
### Create an Index and Upload Some Documents
|
### Create an Index and Upload Some Documents
|
||||||
|
|
||||||
MeiliDB can serve multiple indexes, with different kinds of documents,
|
MeiliSearch can serve multiple indexes, with different kinds of documents,
|
||||||
therefore, it is required to create the index before sending documents to it.
|
therefore, it is required to create the index before sending documents to it.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@ -137,10 +137,10 @@ We have seen much better performances when [using jemalloc as the global allocat
|
|||||||
|
|
||||||
## Usage and Examples
|
## Usage and Examples
|
||||||
|
|
||||||
MeiliDB also provides an example binary that is mostly used for features testing.
|
MeiliSearch also provides an example binary that is mostly used for features testing.
|
||||||
Notice that the example binary is faster to index data as it does read direct CSV files and not JSON HTTP payloads.
|
Notice that the example binary is faster to index data as it does read direct CSV files and not JSON HTTP payloads.
|
||||||
|
|
||||||
The _index_ subcommand has been made to create an index and inject documents into it. Using the command line below, the index will be named _movies_ and the _19 700_ movies of the `datasets/` will be injected in MeiliDB.
|
The _index_ subcommand has been made to create an index and inject documents into it. Using the command line below, the index will be named _movies_ and the _19 700_ movies of the `datasets/` will be injected in MeiliSearch.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cargo run --release --example from_file -- \
|
cargo run --release --example from_file -- \
|
||||||
|
36
deep-dive.md
36
deep-dive.md
@ -1,8 +1,8 @@
|
|||||||
# A deep dive in MeiliDB
|
# A deep dive in MeiliSearch
|
||||||
|
|
||||||
On the 15 of May 2019.
|
On the 15 of May 2019.
|
||||||
|
|
||||||
MeiliDB is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [sled](https://github.com/spacejam/sled). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the matching words in an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).
|
MeiliSearch is a full text search engine based on a final state transducer named [fst](https://github.com/BurntSushi/fst) and a key-value store named [sled](https://github.com/spacejam/sled). The goal of a search engine is to store data and to respond to queries as accurate and fast as possible. To achieve this it must save the matching words in an [inverted index](https://en.wikipedia.org/wiki/Inverted_index).
|
||||||
|
|
||||||
<!-- MarkdownTOC autolink="true" -->
|
<!-- MarkdownTOC autolink="true" -->
|
||||||
|
|
||||||
@ -22,7 +22,7 @@ MeiliDB is a full text search engine based on a final state transducer named [fs
|
|||||||
|
|
||||||
## Where is the data stored?
|
## Where is the data stored?
|
||||||
|
|
||||||
MeiliDB is entirely backed by a key-value store like any good database (i.e. Postgres, MySQL). This brings a great flexibility in the way documents can be stored and updates handled along time.
|
MeiliSearch is entirely backed by a key-value store like any good database (i.e. Postgres, MySQL). This brings a great flexibility in the way documents can be stored and updates handled along time.
|
||||||
|
|
||||||
[sled will brings some](https://github.com/spacejam/sled/tree/434533332a3f485e6d2e467023be0a0b55d3a1af#plans) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent.
|
[sled will brings some](https://github.com/spacejam/sled/tree/434533332a3f485e6d2e467023be0a0b55d3a1af#plans) of the [A.C.I.D. properties](https://en.wikipedia.org/wiki/ACID_(computer_science)) to help us be sure the saved data is consistent.
|
||||||
|
|
||||||
@ -34,7 +34,7 @@ It contain the inverted word index, the schema and the documents fields.
|
|||||||
|
|
||||||
### The inverted word index
|
### The inverted word index
|
||||||
|
|
||||||
[The inverted word index](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs) is a sled Tree dedicated to store and give access to all documents that contains a specific word. The information stored under the word is simply a big ordered array of where in the document the word has been found. In other word, a big list of [`DocIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L35-L51).
|
[The inverted word index](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/words_index.rs) is a sled Tree dedicated to store and give access to all documents that contains a specific word. The information stored under the word is simply a big ordered array of where in the document the word has been found. In other word, a big list of [`DocIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/lib.rs#L35-L51).
|
||||||
|
|
||||||
#### A final state transducer
|
#### A final state transducer
|
||||||
|
|
||||||
@ -42,27 +42,27 @@ _...also abbreviated fst_
|
|||||||
|
|
||||||
This is the first entry point of the engine, you can read more about how it work with the beautiful blog post of @BurntSushi, [Index 1,600,000,000 Keys with Automata and Rust](https://blog.burntsushi.net/transducers/).
|
This is the first entry point of the engine, you can read more about how it work with the beautiful blog post of @BurntSushi, [Index 1,600,000,000 Keys with Automata and Rust](https://blog.burntsushi.net/transducers/).
|
||||||
|
|
||||||
To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index. When you want to search in it you can provide any automaton you want, in MeiliDB [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.
|
To make it short it is a powerful way to store all the words that are present in the indexed documents. You construct it by giving it all the words you want to index. When you want to search in it you can provide any automaton you want, in MeiliSearch [a custom levenshtein automaton](https://github.com/tantivy-search/levenshtein-automata/) is used.
|
||||||
|
|
||||||
#### Document indexes
|
#### Document indexes
|
||||||
|
|
||||||
The `fst` will only return the words that match with the search automaton but the goal of the search engine is to retrieve all matches in all the documents when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word matched.
|
The `fst` will only return the words that match with the search automaton but the goal of the search engine is to retrieve all matches in all the documents when a query is made. You want it to return some sort of position in an attribute in a document, an information about where the given word matched.
|
||||||
|
|
||||||
To make it possible we retrieve all of the `DocIndex` corresponding to all the matching words in the fst, we use the [`WordsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/words_index.rs#L11-L21) Tree to get the `DocIndexes` corresponding the words.
|
To make it possible we retrieve all of the `DocIndex` corresponding to all the matching words in the fst, we use the [`WordsIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/words_index.rs#L11-L21) Tree to get the `DocIndexes` corresponding the words.
|
||||||
|
|
||||||
### The schema
|
### The schema
|
||||||
|
|
||||||
The schema is a data structure that represents which documents attributes should be stored and which should be indexed. It is stored under a the [`MainIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/main_index.rs#L12) Tree and given to MeiliDB only at the creation of an index.
|
The schema is a data structure that represents which documents attributes should be stored and which should be indexed. It is stored under a the [`MainIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/main_index.rs#L12) Tree and given to MeiliSearch only at the creation of an index.
|
||||||
|
|
||||||
Each document attribute is associated to a unique 16 bit number named [`SchemaAttr`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/schema.rs#L186).
|
Each document attribute is associated to a unique 16 bit number named [`SchemaAttr`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/schema.rs#L186).
|
||||||
|
|
||||||
In the future, this schema type could be given along with updates, the database could be able to handled a new schema and reindex the database according to the new one.
|
In the future, this schema type could be given along with updates, the database could be able to handled a new schema and reindex the database according to the new one.
|
||||||
|
|
||||||
### Document attributes
|
### Document attributes
|
||||||
|
|
||||||
When the engine handle a query the result that the requester want is a document, not only the [`Matches`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/lib.rs#L62-L88) associated to it, fields of the original document must be returned too.
|
When the engine handle a query the result that the requester want is a document, not only the [`Matches`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/lib.rs#L62-L88) associated to it, fields of the original document must be returned too.
|
||||||
|
|
||||||
So MeiliDB again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_ in the schema. The dedicated Tree for this information is the [`DocumentsIndex`](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/documents_index.rs#L11).
|
So MeiliSearch again uses the power of the underlying key-value store and save the documents attributes marked as _STORE_ in the schema. The dedicated Tree for this information is the [`DocumentsIndex`](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/documents_index.rs#L11).
|
||||||
|
|
||||||
When a document field is saved in the key-value store its value is binary encoded using [message pack](https://github.com/3Hren/msgpack-rust), so a document must be serializable using serde.
|
When a document field is saved in the key-value store its value is binary encoded using [message pack](https://github.com/3Hren/msgpack-rust), so a document must be serializable using serde.
|
||||||
|
|
||||||
@ -70,26 +70,26 @@ When a document field is saved in the key-value store its value is binary encode
|
|||||||
|
|
||||||
## How is a request processed?
|
## How is a request processed?
|
||||||
|
|
||||||
Now that we have our inverted index we are able to return results based on a query. In the MeiliDB universe a query is a simple string containing words.
|
Now that we have our inverted index we are able to return results based on a query. In the MeiliSearch universe a query is a simple string containing words.
|
||||||
|
|
||||||
### Query lexemes
|
### Query lexemes
|
||||||
|
|
||||||
The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-tokenizer/src/lib.rs#L82-L84). Note that a tokenizer is specialized for a human language, this is the hard part.
|
The first step to be able to call the underlying structures is to split the query in words, for that we use a [custom tokenizer](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-tokenizer/src/lib.rs#L82-L84). Note that a tokenizer is specialized for a human language, this is the hard part.
|
||||||
|
|
||||||
### Automatons and query index
|
### Automatons and query index
|
||||||
|
|
||||||
So to query the fst we need an automaton, in MeiliDB we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/automaton.rs#L59-L78) with different settings.
|
So to query the fst we need an automaton, in MeiliSearch we use a [levenshtein automaton](https://en.wikipedia.org/wiki/Levenshtein_automaton), this automaton is constructed using a string and a maximum distance. According to the [Algolia's blog post](https://blog.algolia.com/inside-the-algolia-engine-part-3-query-processing/#algolia%e2%80%99s-way-of-searching-for-alternatives) we [created the DFAs](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/automaton.rs#L59-L78) with different settings.
|
||||||
|
|
||||||
Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst set. The `Stream` is able to return all the matching words. We use these words to find the whole list of `DocIndexes` associated.
|
Thanks to the power of the fst library [it is possible to union multiple automatons](https://docs.rs/fst/0.3.2/fst/map/struct.OpBuilder.html#method.union) on the same fst set. The `Stream` is able to return all the matching words. We use these words to find the whole list of `DocIndexes` associated.
|
||||||
|
|
||||||
With all these informations it is possible [to reconstruct a list of all the `DocIndexes` associated](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L103-L130) with the words queried.
|
With all these informations it is possible [to reconstruct a list of all the `DocIndexes` associated](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/query_builder.rs#L103-L130) with the words queried.
|
||||||
|
|
||||||
### Sort by criteria
|
### Sort by criteria
|
||||||
|
|
||||||
Now that we are able to get a big list of [DocIndexes](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L21-L36) it is not enough to sort them by criteria, we need more informations like the levenshtein distance or the fact that a query word match exactly the word stored in the fst. So [we stuff it a little bit](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L86-L93), and aggregate all these [Matches](https://github.com/Kerollmops/MeiliDB/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L47-L74) for each document. This way it will be easy to sort a simple vector of document using a bunch of functions.
|
Now that we are able to get a big list of [DocIndexes](https://github.com/Kerollmops/MeiliSearch/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L21-L36) it is not enough to sort them by criteria, we need more informations like the levenshtein distance or the fact that a query word match exactly the word stored in the fst. So [we stuff it a little bit](https://github.com/Kerollmops/MeiliSearch/blob/550dc1e99224e386516877450320f694947332d4/src/rank/query_builder.rs#L86-L93), and aggregate all these [Matches](https://github.com/Kerollmops/MeiliSearch/blob/550dc1e99224e386516877450320f694947332d4/src/lib.rs#L47-L74) for each document. This way it will be easy to sort a simple vector of document using a bunch of functions.
|
||||||
|
|
||||||
With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/query_builder.rs#L160-L188) using bucket sorting. [Each criterion](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-core/src/criterion/mod.rs#L95-L101) is evaluated on each subslice without copy, thanks to [GroupByMut](https://docs.rs/slice-group-by/0.2.4/slice_group_by/) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).
|
With this big list of documents and associated matches [we are able to sort only the part of the slice that we want](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/query_builder.rs#L160-L188) using bucket sorting. [Each criterion](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-core/src/criterion/mod.rs#L95-L101) is evaluated on each subslice without copy, thanks to [GroupByMut](https://docs.rs/slice-group-by/0.2.4/slice_group_by/) which, I hope [will soon be merged](https://github.com/rust-lang/rfcs/pull/2477).
|
||||||
|
|
||||||
Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the [`document` method](https://github.com/meilisearch/MeiliDB/blob/3db823de002243004612e36a19b4578d800dab97/meilidb-data/src/database/index.rs#L86).
|
Note that it is possible to customize the criteria used by using the `QueryBuilder::with_criteria` constructor, this way you can implement some custom ranking based on the document attributes using the appropriate structure and the [`document` method](https://github.com/meilisearch/MeiliSearch/blob/3db823de002243004612e36a19b4578d800dab97/meilisearch-data/src/database/index.rs#L86).
|
||||||
|
|
||||||
At this point, MeiliDB work is over 🎉
|
At this point, MeiliSearch work is over 🎉
|
||||||
|
@ -1,2 +0,0 @@
|
|||||||
pub mod meilidb;
|
|
||||||
pub mod tide;
|
|
@ -1,5 +1,5 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "meilidb-core"
|
name = "meilisearch-core"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
authors = ["Kerollmops <clement@meilisearch.com>"]
|
authors = ["Kerollmops <clement@meilisearch.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
@ -17,9 +17,9 @@ hashbrown = { version = "0.6.0", features = ["serde"] }
|
|||||||
heed = "0.5.0"
|
heed = "0.5.0"
|
||||||
levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
|
||||||
log = "0.4.8"
|
log = "0.4.8"
|
||||||
meilidb-schema = { path = "../meilidb-schema", version = "0.8.0" }
|
meilisearch-schema = { path = "../meilisearch-schema", version = "0.8.0" }
|
||||||
meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.8.0" }
|
meilisearch-tokenizer = { path = "../meilisearch-tokenizer", version = "0.8.0" }
|
||||||
meilidb-types = { path = "../meilidb-types", version = "0.8.0" }
|
meilisearch-types = { path = "../meilisearch-types", version = "0.8.0" }
|
||||||
once_cell = "1.2.0"
|
once_cell = "1.2.0"
|
||||||
ordered-float = { version = "1.0.2", features = ["serde"] }
|
ordered-float = { version = "1.0.2", features = ["serde"] }
|
||||||
sdset = "0.3.3"
|
sdset = "0.3.3"
|
@ -12,8 +12,8 @@ use serde::{Deserialize, Serialize};
|
|||||||
use structopt::StructOpt;
|
use structopt::StructOpt;
|
||||||
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
|
||||||
|
|
||||||
use meilidb_core::{Database, Highlight, ProcessedUpdateResult};
|
use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilisearch_schema::SchemaAttr;
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
struct IndexCommand {
|
struct IndexCommand {
|
||||||
@ -124,7 +124,7 @@ fn index_command(command: IndexCommand, database: Database) -> Result<(), Box<dy
|
|||||||
match index.main.schema(&writer)? {
|
match index.main.schema(&writer)? {
|
||||||
Some(current_schema) => {
|
Some(current_schema) => {
|
||||||
if current_schema != schema {
|
if current_schema != schema {
|
||||||
return Err(meilidb_core::Error::SchemaDiffer.into());
|
return Err(meilisearch_core::Error::SchemaDiffer.into());
|
||||||
}
|
}
|
||||||
writer.abort();
|
writer.abort();
|
||||||
}
|
}
|
||||||
@ -325,7 +325,7 @@ fn search_command(command: SearchCommand, database: Database) -> Result<(), Box<
|
|||||||
let schema = index.main.schema(&reader)?;
|
let schema = index.main.schema(&reader)?;
|
||||||
reader.abort();
|
reader.abort();
|
||||||
|
|
||||||
let schema = schema.ok_or(meilidb_core::Error::SchemaMissing)?;
|
let schema = schema.ok_or(meilisearch_core::Error::SchemaMissing)?;
|
||||||
|
|
||||||
let fields = command.displayed_fields.iter().map(String::as_str);
|
let fields = command.displayed_fields.iter().map(String::as_str);
|
||||||
let fields = HashSet::from_iter(fields);
|
let fields = HashSet::from_iter(fields);
|
@ -6,7 +6,7 @@ use std::{cmp, vec};
|
|||||||
|
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
use levenshtein_automata::DFA;
|
use levenshtein_automata::DFA;
|
||||||
use meilidb_tokenizer::{is_cjk, split_query_string};
|
use meilisearch_tokenizer::{is_cjk, split_query_string};
|
||||||
|
|
||||||
use crate::error::MResult;
|
use crate::error::MResult;
|
||||||
use crate::store;
|
use crate::store;
|
@ -1,6 +1,6 @@
|
|||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
|
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilisearch_schema::SchemaAttr;
|
||||||
use sdset::Set;
|
use sdset::Set;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
@ -4,7 +4,7 @@ use std::fmt;
|
|||||||
|
|
||||||
use crate::criterion::Criterion;
|
use crate::criterion::Criterion;
|
||||||
use crate::{RankedMap, RawDocument};
|
use crate::{RankedMap, RawDocument};
|
||||||
use meilidb_schema::{Schema, SchemaAttr};
|
use meilisearch_schema::{Schema, SchemaAttr};
|
||||||
|
|
||||||
/// An helper struct that permit to sort documents by
|
/// An helper struct that permit to sort documents by
|
||||||
/// some of their stored attributes.
|
/// some of their stored attributes.
|
||||||
@ -23,7 +23,7 @@ use meilidb_schema::{Schema, SchemaAttr};
|
|||||||
///
|
///
|
||||||
/// ```ignore
|
/// ```ignore
|
||||||
/// use serde_derive::Deserialize;
|
/// use serde_derive::Deserialize;
|
||||||
/// use meilidb::rank::criterion::*;
|
/// use meilisearch::rank::criterion::*;
|
||||||
///
|
///
|
||||||
/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
|
/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
|
||||||
///
|
///
|
@ -25,7 +25,7 @@ pub use self::ranked_map::RankedMap;
|
|||||||
pub use self::raw_document::RawDocument;
|
pub use self::raw_document::RawDocument;
|
||||||
pub use self::store::Index;
|
pub use self::store::Index;
|
||||||
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
|
pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
|
||||||
pub use meilidb_types::{DocIndex, DocumentId, Highlight};
|
pub use meilisearch_types::{DocIndex, DocumentId, Highlight};
|
||||||
|
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
@ -695,7 +695,7 @@ mod tests {
|
|||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
use fst::{IntoStreamer, Set};
|
use fst::{IntoStreamer, Set};
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilisearch_schema::SchemaAttr;
|
||||||
use sdset::SetBuf;
|
use sdset::SetBuf;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
@ -1672,7 +1672,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn deunicoded_synonyms() {
|
fn deunicoded_synonyms() {
|
||||||
let mut store = TempDatabase::from_iter(vec![
|
let mut store = TempDatabase::from_iter(vec![
|
||||||
("telephone", &[doc_index(0, 0)][..]), // meilidb indexes the unidecoded
|
("telephone", &[doc_index(0, 0)][..]), // meilisearch indexes the unidecoded
|
||||||
("téléphone", &[doc_index(0, 0)][..]), // and the original words on the same DocIndex
|
("téléphone", &[doc_index(0, 0)][..]), // and the original words on the same DocIndex
|
||||||
("iphone", &[doc_index(1, 0)][..]),
|
("iphone", &[doc_index(1, 0)][..]),
|
||||||
]);
|
]);
|
@ -1,7 +1,7 @@
|
|||||||
use std::io::{Read, Write};
|
use std::io::{Read, Write};
|
||||||
|
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilisearch_schema::SchemaAttr;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{DocumentId, Number};
|
use crate::{DocumentId, Number};
|
@ -1,7 +1,7 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilisearch_schema::SchemaAttr;
|
||||||
use sdset::SetBuf;
|
use sdset::SetBuf;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
@ -3,8 +3,8 @@ use std::convert::TryFrom;
|
|||||||
|
|
||||||
use crate::{DocIndex, DocumentId};
|
use crate::{DocIndex, DocumentId};
|
||||||
use deunicode::deunicode_with_tofu;
|
use deunicode::deunicode_with_tofu;
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilisearch_schema::SchemaAttr;
|
||||||
use meilidb_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
|
use meilisearch_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer};
|
||||||
use sdset::SetBuf;
|
use sdset::SetBuf;
|
||||||
|
|
||||||
const WORD_LENGTH_LIMIT: usize = 80;
|
const WORD_LENGTH_LIMIT: usize = 80;
|
@ -2,7 +2,7 @@ use std::collections::HashSet;
|
|||||||
use std::io::Cursor;
|
use std::io::Cursor;
|
||||||
use std::{error::Error, fmt};
|
use std::{error::Error, fmt};
|
||||||
|
|
||||||
use meilidb_schema::{Schema, SchemaAttr};
|
use meilisearch_schema::{Schema, SchemaAttr};
|
||||||
use serde::{de, forward_to_deserialize_any};
|
use serde::{de, forward_to_deserialize_any};
|
||||||
use serde_json::de::IoRead as SerdeJsonIoRead;
|
use serde_json::de::IoRead as SerdeJsonIoRead;
|
||||||
use serde_json::Deserializer as SerdeJsonDeserializer;
|
use serde_json::Deserializer as SerdeJsonDeserializer;
|
@ -1,4 +1,4 @@
|
|||||||
use meilidb_schema::SchemaAttr;
|
use meilisearch_schema::SchemaAttr;
|
||||||
use serde::ser;
|
use serde::ser;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
@ -1,4 +1,4 @@
|
|||||||
use meilidb_schema::{Schema, SchemaAttr, SchemaProps};
|
use meilisearch_schema::{Schema, SchemaAttr, SchemaProps};
|
||||||
use serde::ser;
|
use serde::ser;
|
||||||
|
|
||||||
use crate::raw_indexer::RawIndexer;
|
use crate::raw_indexer::RawIndexer;
|
@ -1,6 +1,6 @@
|
|||||||
use heed::types::{ByteSlice, OwnedType};
|
use heed::types::{ByteSlice, OwnedType};
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilisearch_schema::SchemaAttr;
|
||||||
|
|
||||||
use super::DocumentAttrKey;
|
use super::DocumentAttrKey;
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
@ -2,7 +2,7 @@ use super::DocumentAttrKey;
|
|||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
use heed::types::OwnedType;
|
use heed::types::OwnedType;
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilisearch_schema::SchemaAttr;
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Copy, Clone)]
|
||||||
pub struct DocumentsFieldsCounts {
|
pub struct DocumentsFieldsCounts {
|
@ -2,7 +2,7 @@ use crate::RankedMap;
|
|||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
|
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use meilidb_schema::Schema;
|
use meilisearch_schema::Schema;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
@ -21,7 +21,7 @@ pub use self::updates_results::UpdatesResults;
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use heed::Result as ZResult;
|
use heed::Result as ZResult;
|
||||||
use meilidb_schema::{Schema, SchemaAttr};
|
use meilisearch_schema::{Schema, SchemaAttr};
|
||||||
use serde::de::{self, Deserialize};
|
use serde::de::{self, Deserialize};
|
||||||
use zerocopy::{AsBytes, FromBytes};
|
use zerocopy::{AsBytes, FromBytes};
|
||||||
|
|
@ -1,7 +1,7 @@
|
|||||||
use std::collections::{BTreeSet, HashMap, HashSet};
|
use std::collections::{BTreeSet, HashMap, HashSet};
|
||||||
|
|
||||||
use fst::{SetBuilder, Streamer};
|
use fst::{SetBuilder, Streamer};
|
||||||
use meilidb_schema::Schema;
|
use meilisearch_schema::Schema;
|
||||||
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
|
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
|
||||||
|
|
||||||
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
use crate::database::{UpdateEvent, UpdateEventsEmitter};
|
@ -30,7 +30,7 @@ use log::debug;
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::{store, DocumentId, MResult};
|
use crate::{store, DocumentId, MResult};
|
||||||
use meilidb_schema::Schema;
|
use meilisearch_schema::Schema;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct Update {
|
pub struct Update {
|
@ -1,4 +1,4 @@
|
|||||||
use meilidb_schema::{Diff, Schema};
|
use meilisearch_schema::{Diff, Schema};
|
||||||
|
|
||||||
use crate::update::documents_addition::reindex_all_documents;
|
use crate::update::documents_addition::reindex_all_documents;
|
||||||
use crate::update::{next_update_id, Update};
|
use crate::update::{next_update_id, Update};
|
||||||
@ -21,7 +21,7 @@ pub fn apply_schema_update(
|
|||||||
let mut need_full_reindexing = false;
|
let mut need_full_reindexing = false;
|
||||||
|
|
||||||
if let Some(old_schema) = main_store.schema(writer)? {
|
if let Some(old_schema) = main_store.schema(writer)? {
|
||||||
for diff in meilidb_schema::diff(&old_schema, new_schema) {
|
for diff in meilisearch_schema::diff(&old_schema, new_schema) {
|
||||||
match diff {
|
match diff {
|
||||||
Diff::IdentChange { .. } => return Err(CannotUpdateSchemaIdentifier.into()),
|
Diff::IdentChange { .. } => return Err(CannotUpdateSchemaIdentifier.into()),
|
||||||
Diff::AttrMove { .. } => return Err(CannotReorderSchemaAttribute.into()),
|
Diff::AttrMove { .. } => return Err(CannotReorderSchemaAttribute.into()),
|
@ -1,5 +1,5 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "meilidb-http"
|
name = "meilisearch-http"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
authors = [
|
authors = [
|
||||||
"Quentin de Quelen <quentin@dequelen.me>",
|
"Quentin de Quelen <quentin@dequelen.me>",
|
||||||
@ -7,6 +7,10 @@ authors = [
|
|||||||
]
|
]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "meilisearch"
|
||||||
|
path = "src/main.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bincode = "1.2.0"
|
bincode = "1.2.0"
|
||||||
chrono = { version = "0.4.9", features = ["serde"] }
|
chrono = { version = "0.4.9", features = ["serde"] }
|
||||||
@ -18,8 +22,8 @@ indexmap = { version = "1.3.0", features = ["serde-1"] }
|
|||||||
isahc = "0.7.6"
|
isahc = "0.7.6"
|
||||||
log = "0.4.8"
|
log = "0.4.8"
|
||||||
main_error = "0.1.0"
|
main_error = "0.1.0"
|
||||||
meilidb-core = { path = "../meilidb-core", version = "0.8.0" }
|
meilisearch-core = { path = "../meilisearch-core", version = "0.8.0" }
|
||||||
meilidb-schema = { path = "../meilidb-schema", version = "0.8.0" }
|
meilisearch-schema = { path = "../meilisearch-schema", version = "0.8.0" }
|
||||||
pretty-bytes = "0.2.2"
|
pretty-bytes = "0.2.2"
|
||||||
rand = "0.7.2"
|
rand = "0.7.2"
|
||||||
rayon = "1.2.0"
|
rayon = "1.2.0"
|
@ -5,7 +5,7 @@ use std::sync::Arc;
|
|||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use heed::types::{SerdeBincode, Str};
|
use heed::types::{SerdeBincode, Str};
|
||||||
use log::error;
|
use log::error;
|
||||||
use meilidb_core::{Database, Error as MError, MResult};
|
use meilisearch_core::{Database, Error as MError, MResult};
|
||||||
use sysinfo::Pid;
|
use sysinfo::Pid;
|
||||||
|
|
||||||
use crate::option::Opt;
|
use crate::option::Opt;
|
@ -1,10 +1,10 @@
|
|||||||
use crate::routes::setting::{RankingOrdering, SettingBody};
|
use crate::routes::setting::{RankingOrdering, SettingBody};
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
use log::error;
|
use log::error;
|
||||||
use meilidb_core::criterion::*;
|
use meilisearch_core::criterion::*;
|
||||||
use meilidb_core::Highlight;
|
use meilisearch_core::Highlight;
|
||||||
use meilidb_core::{Index, RankedMap};
|
use meilisearch_core::{Index, RankedMap};
|
||||||
use meilidb_schema::{Schema, SchemaAttr};
|
use meilisearch_schema::{Schema, SchemaAttr};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
@ -57,8 +57,8 @@ impl fmt::Display for Error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<meilidb_core::Error> for Error {
|
impl From<meilisearch_core::Error> for Error {
|
||||||
fn from(error: meilidb_core::Error) -> Self {
|
fn from(error: meilisearch_core::Error) -> Self {
|
||||||
Error::Internal(error.to_string())
|
Error::Internal(error.to_string())
|
||||||
}
|
}
|
||||||
}
|
}
|
2
meilisearch-http/src/helpers/mod.rs
Normal file
2
meilisearch-http/src/helpers/mod.rs
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
pub mod meilisearch;
|
||||||
|
pub mod tide;
|
@ -3,7 +3,7 @@ use crate::models::token::*;
|
|||||||
use crate::Data;
|
use crate::Data;
|
||||||
use chrono::Utc;
|
use chrono::Utc;
|
||||||
use heed::types::{SerdeBincode, Str};
|
use heed::types::{SerdeBincode, Str};
|
||||||
use meilidb_core::Index;
|
use meilisearch_core::Index;
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use tide::Context;
|
use tide::Context;
|
||||||
|
|
||||||
@ -106,7 +106,7 @@ impl ContextExt for Context<Data> {
|
|||||||
let name = self
|
let name = self
|
||||||
.param::<Value>("identifier")
|
.param::<Value>("identifier")
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.map(meilidb_core::serde::value_to_string)
|
.map(meilisearch_core::serde::value_to_string)
|
||||||
.map_err(|e| ResponseError::bad_parameter("identifier", e))?
|
.map_err(|e| ResponseError::bad_parameter("identifier", e))?
|
||||||
.ok_or(ResponseError::bad_parameter(
|
.ok_or(ResponseError::bad_parameter(
|
||||||
"identifier",
|
"identifier",
|
@ -8,10 +8,10 @@ use structopt::StructOpt;
|
|||||||
use tide::middleware::{CorsMiddleware, CorsOrigin};
|
use tide::middleware::{CorsMiddleware, CorsOrigin};
|
||||||
use tide_log::RequestLogger;
|
use tide_log::RequestLogger;
|
||||||
|
|
||||||
use meilidb_http::data::Data;
|
use meilisearch_http::data::Data;
|
||||||
use meilidb_http::option::Opt;
|
use meilisearch_http::option::Opt;
|
||||||
use meilidb_http::routes;
|
use meilisearch_http::routes;
|
||||||
use meilidb_http::routes::index::index_update_callback;
|
use meilisearch_http::routes::index::index_update_callback;
|
||||||
|
|
||||||
mod analytics;
|
mod analytics;
|
||||||
|
|
@ -1,7 +1,7 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
use meilidb_schema::{Schema, SchemaBuilder, SchemaProps};
|
use meilisearch_schema::{Schema, SchemaBuilder, SchemaProps};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Eq, PartialEq, Hash, Serialize, Deserialize)]
|
@ -3,7 +3,7 @@ use structopt::StructOpt;
|
|||||||
#[derive(Debug, Clone, StructOpt)]
|
#[derive(Debug, Clone, StructOpt)]
|
||||||
pub struct Opt {
|
pub struct Opt {
|
||||||
/// The destination where the database must be created.
|
/// The destination where the database must be created.
|
||||||
#[structopt(long, env = "MEILI_DB_PATH", default_value = "/tmp/meilidb")]
|
#[structopt(long, env = "MEILI_DB_PATH", default_value = "/tmp/meilisearch")]
|
||||||
pub db_path: String,
|
pub db_path: String,
|
||||||
|
|
||||||
/// The address on which the http server will listen.
|
/// The address on which the http server will listen.
|
@ -19,7 +19,7 @@ pub async fn get_document(ctx: Context<Data>) -> SResult<Response> {
|
|||||||
let index = ctx.index()?;
|
let index = ctx.index()?;
|
||||||
|
|
||||||
let identifier = ctx.identifier()?;
|
let identifier = ctx.identifier()?;
|
||||||
let document_id = meilidb_core::serde::compute_document_id(identifier.clone());
|
let document_id = meilisearch_core::serde::compute_document_id(identifier.clone());
|
||||||
|
|
||||||
let env = &ctx.state().db.env;
|
let env = &ctx.state().db.env;
|
||||||
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
let reader = env.read_txn().map_err(ResponseError::internal)?;
|
||||||
@ -47,7 +47,7 @@ pub async fn delete_document(ctx: Context<Data>) -> SResult<Response> {
|
|||||||
|
|
||||||
let index = ctx.index()?;
|
let index = ctx.index()?;
|
||||||
let identifier = ctx.identifier()?;
|
let identifier = ctx.identifier()?;
|
||||||
let document_id = meilidb_core::serde::compute_document_id(identifier.clone());
|
let document_id = meilisearch_core::serde::compute_document_id(identifier.clone());
|
||||||
|
|
||||||
let env = &ctx.state().db.env;
|
let env = &ctx.state().db.env;
|
||||||
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
let mut writer = env.write_txn().map_err(ResponseError::internal)?;
|
||||||
@ -117,8 +117,8 @@ pub async fn get_all_documents(ctx: Context<Data>) -> SResult<Response> {
|
|||||||
Ok(tide::response::json(response_body))
|
Ok(tide::response::json(response_body))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn infered_schema(document: &IndexMap<String, Value>) -> Option<meilidb_schema::Schema> {
|
fn infered_schema(document: &IndexMap<String, Value>) -> Option<meilisearch_schema::Schema> {
|
||||||
use meilidb_schema::{SchemaBuilder, DISPLAYED, INDEXED};
|
use meilisearch_schema::{SchemaBuilder, DISPLAYED, INDEXED};
|
||||||
|
|
||||||
let mut identifier = None;
|
let mut identifier = None;
|
||||||
for key in document.keys() {
|
for key in document.keys() {
|
||||||
@ -206,9 +206,9 @@ pub async fn delete_multiple_documents(mut ctx: Context<Data>) -> SResult<Respon
|
|||||||
let mut documents_deletion = index.documents_deletion();
|
let mut documents_deletion = index.documents_deletion();
|
||||||
|
|
||||||
for identifier in data {
|
for identifier in data {
|
||||||
if let Some(identifier) = meilidb_core::serde::value_to_string(&identifier) {
|
if let Some(identifier) = meilisearch_core::serde::value_to_string(&identifier) {
|
||||||
documents_deletion
|
documents_deletion
|
||||||
.delete_document_by_id(meilidb_core::serde::compute_document_id(identifier));
|
.delete_document_by_id(meilisearch_core::serde::compute_document_id(identifier));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -1,8 +1,8 @@
|
|||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use http::StatusCode;
|
use http::StatusCode;
|
||||||
use log::error;
|
use log::error;
|
||||||
use meilidb_core::ProcessedUpdateResult;
|
use meilisearch_core::ProcessedUpdateResult;
|
||||||
use meilidb_schema::{Schema, SchemaBuilder};
|
use meilisearch_schema::{Schema, SchemaBuilder};
|
||||||
use rand::seq::SliceRandom;
|
use rand::seq::SliceRandom;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
@ -2,14 +2,14 @@ use std::collections::HashMap;
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use meilidb_core::Index;
|
use meilisearch_core::Index;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use tide::querystring::ContextExt as QSContextExt;
|
use tide::querystring::ContextExt as QSContextExt;
|
||||||
use tide::{Context, Response};
|
use tide::{Context, Response};
|
||||||
|
|
||||||
use crate::error::{ResponseError, SResult};
|
use crate::error::{ResponseError, SResult};
|
||||||
use crate::helpers::meilidb::{Error, IndexSearchExt, SearchHit};
|
use crate::helpers::meilisearch::{Error, IndexSearchExt, SearchHit};
|
||||||
use crate::helpers::tide::ContextExt;
|
use crate::helpers::tide::ContextExt;
|
||||||
use crate::Data;
|
use crate::Data;
|
||||||
|
|
@ -1,5 +1,5 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "meilidb-schema"
|
name = "meilisearch-schema"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
@ -1,5 +1,5 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "meilidb-tokenizer"
|
name = "meilisearch-tokenizer"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
authors = ["Kerollmops <renault.cle@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
@ -1,5 +1,5 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "meilidb-types"
|
name = "meilisearch-types"
|
||||||
version = "0.8.0"
|
version = "0.8.0"
|
||||||
authors = ["Clément Renault <renault.cle@gmail.com>"]
|
authors = ["Clément Renault <renault.cle@gmail.com>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
@ -1,6 +1,6 @@
|
|||||||
# Typo and Ranking rules
|
# Typo and Ranking rules
|
||||||
|
|
||||||
This is an explanation of the default rules used in MeiliDB.
|
This is an explanation of the default rules used in MeiliSearch.
|
||||||
|
|
||||||
First we have to explain some terms that are used in this reading.
|
First we have to explain some terms that are used in this reading.
|
||||||
|
|
||||||
@ -44,7 +44,7 @@ This means that "satuday", which is 7 characters long, use the second rule and e
|
|||||||
|
|
||||||
## Ranking rules
|
## Ranking rules
|
||||||
|
|
||||||
All documents that have been aggregated using the typo rules above can now be sorted. MeiliDB uses a bucket sort.
|
All documents that have been aggregated using the typo rules above can now be sorted. MeiliSearch uses a bucket sort.
|
||||||
|
|
||||||
What is a bucket sort? We sort all the documents with the first rule, for all documents that can't be separated we create a group and sort it using the second rule, and so on.
|
What is a bucket sort? We sort all the documents with the first rule, for all documents that can't be separated we create a group and sort it using the second rule, and so on.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user