Merge #3570

3570: Get documents by filter r=irevoire a=dureuill # Pull Request ## Related issue Associated spec: https://github.com/meilisearch/specifications/pull/234 None really, this is more of an extension of #3477: since after this issue we'll be able to delete documents by filter, it makes sense to also be able to get documents by filter. ## What does this PR do? ### User standpoint - Add a new `filter` URL parameter to `GET /indexes/{:indexUid}/documents` and a new `POST /indexes/{:indexUid}/documents/fetch` route with the same `offset, limit, fields, filter` ### Implementation standpoint - Add a new `Index::iter_documents` method to iterate on a set of documents rather than return a vector of these documents. - Rewrite the other `Index::*documents` methods to use the new `Index::iter_documents` method. ## Usage <details> <summary> Sample request and response </summary> ``` curl -X POST 'http://localhost:7700/indexes/index-1101/documents/fetch' -H 'Content-Type: application/json' --data-binary '{ "filter": "genres = Comedy", "limit": 3, "offset": 8000}' | jsonxf ``` ```json { "results": [ { "id": 326126, "title": "Bad Exorcists", "overview": "A trio of awkward teens intend to win a horror festival by making their own movie, but wind up getting their actress possessed in the process.", "genres": [ "Horror", "Comedy" ], "poster": "https://image.tmdb.org/t/p/w500/lwd65kPbjFacAw3QSXiwSsW6cFU.jpg", "release_date": 1425081600 }, { "id": 326215, "title": "Ooops! Noah is Gone...", "overview": "It's the end of the world. A flood is coming. Luckily for Dave and his son Finny, a couple of clumsy Nestrians, an Ark has been built to save all animals. But as it turns out, Nestrians aren't allowed. Sneaking on board with the involuntary help of Hazel and her daughter Leah, two Grymps, they think they're safe. Until the curious kids fall off the Ark. Now Finny and Leah struggle to survive the flood and hungry predators and attempt to reach the top of a mountain, while Dave and Hazel must put aside their differences, turn the Ark around and save their kids. It's definitely not going to be smooth sailing.", "genres": [ "Animation", "Adventure", "Comedy", "Family" ], "poster": "https://image.tmdb.org/t/p/w500/gEJXHgpiKh89Vwjc4XUY5CIgUdB.jpg", "release_date": 1427328000 }, { "id": 326241, "title": "For Here or to Go?", "overview": "An aspiring Indian tech entrepreneur in the Silicon Valley finds himself unexpectedly battling the bizarre American immigration system to keep his dream alive or prepare to return home forever.", "genres": [ "Drama", "Comedy" ], "poster": "https://image.tmdb.org/t/p/w500/ff8WaA7ItBgl36kdT232i0d0Fnq.jpg", "release_date": 1490918400 } ], "offset": 8000, "limit": 3, "total": 9331 } ``` <img width="1348" alt="Capture d’écran 2023-03-08 à 10 09 04" src="https://user-images.githubusercontent.com/41078892/223670905-6932b79b-f9b8-4a41-b59e-be2171705b7d.png"> </details> # Draft status - [ ] Route naming: having one route be `GET /indexes/{:indexUid}/documents` and the other `POST /indexes/{:indexUid}/documents/fetch` is suboptimal (also, technically a breaking change for documents with `fetch` as uid?), but `POST /indexes/{:indexUid}/documents` is already used to insert documents. Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Tamo <tamo@meilisearch.com>
2024-11-29 16:45:30 +08:00 · 2023-05-04 12:54:26 +00:00 · 2023-05-04 12:54:26 +00:00 · 4919774f2e
commit 4919774f2e
parent a95128df6b a3da680ce6
7 changed files with 469 additions and 50 deletions
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -214,6 +214,7 @@ InvalidApiKeyUid                      , InvalidRequest       , BAD_REQUEST ;
 InvalidContentType                    , InvalidRequest       , UNSUPPORTED_MEDIA_TYPE ;
 InvalidDocumentCsvDelimiter           , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentFields                 , InvalidRequest       , BAD_REQUEST ;
+InvalidDocumentFilter                 , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentGeoField               , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentId                     , InvalidRequest       , BAD_REQUEST ;
 InvalidDocumentLimit                  , InvalidRequest       , BAD_REQUEST ;
--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@ -17,6 +17,7 @@ use meilisearch_types::error::{Code, ResponseError};
 use meilisearch_types::heed::RoTxn;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::milli::update::IndexDocumentsMethod;
+use meilisearch_types::milli::DocumentId;
 use meilisearch_types::star_or::OptionStarOrList;
 use meilisearch_types::tasks::KindWithContent;
 use meilisearch_types::{milli, Document, Index};
@ -36,6 +37,7 @@ use crate::extractors::authentication::GuardedData;
 use crate::extractors::payload::Payload;
 use crate::extractors::sequential_extractor::SeqHandler;
 use crate::routes::{PaginationView, SummarizedTaskView, PAGINATION_DEFAULT_LIMIT};
+use crate::search::parse_filter;

 static ACCEPTED_CONTENT_TYPE: Lazy<Vec<String>> = Lazy::new(|| {
    vec!["application/json".to_string(), "application/x-ndjson".to_string(), "text/csv".to_string()]
@ -66,7 +68,7 @@ pub struct DocumentParam {
 pub fn configure(cfg: &mut web::ServiceConfig) {
    cfg.service(
        web::resource("")
-            .route(web::get().to(SeqHandler(get_all_documents)))
+            .route(web::get().to(SeqHandler(get_documents)))
            .route(web::post().to(SeqHandler(replace_documents)))
            .route(web::put().to(SeqHandler(update_documents)))
            .route(web::delete().to(SeqHandler(clear_all_documents))),
@ -76,6 +78,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
        web::resource("/delete-batch").route(web::post().to(SeqHandler(delete_documents_batch))),
    )
    .service(web::resource("/delete").route(web::post().to(SeqHandler(delete_documents_by_filter))))
+    .service(web::resource("/fetch").route(web::post().to(SeqHandler(documents_by_query_post))))
    .service(
        web::resource("/{document_id}")
            .route(web::get().to(SeqHandler(get_document)))
@ -130,29 +133,79 @@ pub async fn delete_document(

 #[derive(Debug, Deserr)]
 #[deserr(error = DeserrQueryParamError, rename_all = camelCase, deny_unknown_fields)]
-pub struct BrowseQuery {
+pub struct BrowseQueryGet {
    #[deserr(default, error = DeserrQueryParamError<InvalidDocumentOffset>)]
    offset: Param<usize>,
    #[deserr(default = Param(PAGINATION_DEFAULT_LIMIT), error = DeserrQueryParamError<InvalidDocumentLimit>)]
    limit: Param<usize>,
    #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFields>)]
    fields: OptionStarOrList<String>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidDocumentFilter>)]
+    filter: Option<String>,
 }

-pub async fn get_all_documents(
+#[derive(Debug, Deserr)]
+#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
+pub struct BrowseQuery {
+    #[deserr(default, error = DeserrJsonError<InvalidDocumentOffset>)]
+    offset: usize,
+    #[deserr(default = PAGINATION_DEFAULT_LIMIT, error = DeserrJsonError<InvalidDocumentLimit>)]
+    limit: usize,
+    #[deserr(default, error = DeserrJsonError<InvalidDocumentFields>)]
+    fields: Option<Vec<String>>,
+    #[deserr(default, error = DeserrJsonError<InvalidDocumentFilter>)]
+    filter: Option<Value>,
+}
+
+pub async fn documents_by_query_post(
    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
    index_uid: web::Path<String>,
-    params: AwebQueryParameter<BrowseQuery, DeserrQueryParamError>,
+    body: AwebJson<BrowseQuery, DeserrJsonError>,
+) -> Result<HttpResponse, ResponseError> {
+    debug!("called with body: {:?}", body);
+
+    documents_by_query(&index_scheduler, index_uid, body.into_inner())
+}
+
+pub async fn get_documents(
+    index_scheduler: GuardedData<ActionPolicy<{ actions::DOCUMENTS_GET }>, Data<IndexScheduler>>,
+    index_uid: web::Path<String>,
+    params: AwebQueryParameter<BrowseQueryGet, DeserrQueryParamError>,
+) -> Result<HttpResponse, ResponseError> {
+    debug!("called with params: {:?}", params);
+
+    let BrowseQueryGet { limit, offset, fields, filter } = params.into_inner();
+
+    let filter = match filter {
+        Some(f) => match serde_json::from_str(&f) {
+            Ok(v) => Some(v),
+            _ => Some(Value::String(f)),
+        },
+        None => None,
+    };
+
+    let query = BrowseQuery {
+        offset: offset.0,
+        limit: limit.0,
+        fields: fields.merge_star_and_none(),
+        filter,
+    };
+
+    documents_by_query(&index_scheduler, index_uid, query)
+}
+
+fn documents_by_query(
+    index_scheduler: &IndexScheduler,
+    index_uid: web::Path<String>,
+    query: BrowseQuery,
 ) -> Result<HttpResponse, ResponseError> {
    let index_uid = IndexUid::try_from(index_uid.into_inner())?;
-    debug!("called with params: {:?}", params);
-    let BrowseQuery { limit, offset, fields } = params.into_inner();
-    let attributes_to_retrieve = fields.merge_star_and_none();
+    let BrowseQuery { offset, limit, fields, filter } = query;

    let index = index_scheduler.index(&index_uid)?;
-    let (total, documents) = retrieve_documents(&index, offset.0, limit.0, attributes_to_retrieve)?;
+    let (total, documents) = retrieve_documents(&index, offset, limit, filter, fields)?;

-    let ret = PaginationView::new(offset.0, limit.0, total as usize, documents);
+    let ret = PaginationView::new(offset, limit, total as usize, documents);

    debug!("returns: {:?}", ret);
    Ok(HttpResponse::Ok().json(ret))
@ -455,14 +508,15 @@ pub async fn clear_all_documents(
    Ok(HttpResponse::Accepted().json(task))
 }

-fn all_documents<'a>(
-    index: &Index,
-    rtxn: &'a RoTxn,
+fn some_documents<'a, 't: 'a>(
+    index: &'a Index,
+    rtxn: &'t RoTxn,
+    doc_ids: impl IntoIterator<Item = DocumentId> + 'a,
 ) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
    let fields_ids_map = index.fields_ids_map(rtxn)?;
    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();

-    Ok(index.all_documents(rtxn)?.map(move |ret| {
+    Ok(index.iter_documents(rtxn, doc_ids)?.map(move |ret| {
        ret.map_err(ResponseError::from).and_then(|(_key, document)| -> Result<_, ResponseError> {
            Ok(milli::obkv_to_json(&all_fields, &fields_ids_map, document)?)
        })
@ -473,24 +527,45 @@ fn retrieve_documents<S: AsRef<str>>(
    index: &Index,
    offset: usize,
    limit: usize,
+    filter: Option<Value>,
    attributes_to_retrieve: Option<Vec<S>>,
 ) -> Result<(u64, Vec<Document>), ResponseError> {
    let rtxn = index.read_txn()?;
+    let filter = &filter;
+    let filter = if let Some(filter) = filter {
+        parse_filter(filter)
+            .map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))?
+    } else {
+        None
+    };

-    let mut documents = Vec::new();
-    for document in all_documents(index, &rtxn)?.skip(offset).take(limit) {
-        let document = match &attributes_to_retrieve {
+    let candidates = if let Some(filter) = filter {
+        filter.evaluate(&rtxn, index)?
+    } else {
+        index.documents_ids(&rtxn)?
+    };
+
+    let (it, number_of_documents) = {
+        let number_of_documents = candidates.len();
+        (
+            some_documents(index, &rtxn, candidates.into_iter().skip(offset).take(limit))?,
+            number_of_documents,
+        )
+    };
+
+    let documents: Result<Vec<_>, ResponseError> = it
+        .map(|document| {
+            Ok(match &attributes_to_retrieve {
                Some(attributes_to_retrieve) => permissive_json_pointer::select_values(
                    &document?,
                    attributes_to_retrieve.iter().map(|s| s.as_ref()),
                ),
                None => document?,
-        };
-        documents.push(document);
-    }
+            })
+        })
+        .collect();

-    let number_of_documents = index.number_of_documents(&rtxn)?;
-    Ok((number_of_documents, documents))
+    Ok((number_of_documents, documents?))
 }

 fn retrieve_document<S: AsRef<str>>(
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@ -745,7 +745,7 @@ fn format_value<A: AsRef<[u8]>>(
    }
 }

-pub fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
+pub(crate) fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
    match facets {
        Value::String(expr) => {
            let condition = Filter::from_str(expr)?;
--- a/meilisearch/tests/common/index.rs
+++ b/meilisearch/tests/common/index.rs
@ -198,6 +198,11 @@ impl Index<'_> {
        self.service.get(url).await
    }

+    pub async fn get_document_by_filter(&self, payload: Value) -> (Value, StatusCode) {
+        let url = format!("/indexes/{}/documents/fetch", urlencode(self.uid.as_ref()));
+        self.service.post(url, payload).await
+    }
+
    pub async fn get_all_documents_raw(&self, options: &str) -> (Value, StatusCode) {
        let url = format!("/indexes/{}/documents{}", urlencode(self.uid.as_ref()), options);
        self.service.get(url).await
--- a/meilisearch/tests/documents/errors.rs
+++ b/meilisearch/tests/documents/errors.rs
@ -82,6 +82,111 @@ async fn get_all_documents_bad_limit() {
    "###);
 }

+#[actix_rt::test]
+async fn get_all_documents_bad_filter() {
+    let server = Server::new().await;
+    let index = server.index("test");
+
+    // Since the filter can't be parsed automatically by deserr, we have the wrong error message
+    // if the index does not exist: we could expect to get an error message about the invalid filter before
+    // the existence of the index is checked, but it is not the case.
+    let (response, code) = index.get_all_documents_raw("?filter").await;
+    snapshot!(code, @"404 Not Found");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Index `test` not found.",
+      "code": "index_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_not_found"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
+    snapshot!(code, @"404 Not Found");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Index `test` not found.",
+      "code": "index_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_not_found"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
+    snapshot!(code, @"404 Not Found");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Index `test` not found.",
+      "code": "index_not_found",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#index_not_found"
+    }
+    "###);
+
+    let (response, code) = index.create(None).await;
+    snapshot!(code, @"202 Accepted");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "indexCreation",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    let response = server.wait_task(0).await;
+    snapshot!(json_string!(response, { ".duration" => "[duration]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]" }), @r###"
+    {
+      "uid": 0,
+      "indexUid": "test",
+      "status": "succeeded",
+      "type": "indexCreation",
+      "canceledBy": null,
+      "details": {
+        "primaryKey": null
+      },
+      "error": null,
+      "duration": "[duration]",
+      "enqueuedAt": "[date]",
+      "startedAt": "[date]",
+      "finishedAt": "[date]"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response), @r###"
+    {
+      "results": [],
+      "offset": 0,
+      "limit": 20,
+      "total": 0
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo").await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+
+    let (response, code) = index.get_all_documents_raw("?filter=doggo=bernese").await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Attribute `doggo` is not filterable. This index does not have configured filterable attributes.\n1:6 doggo=bernese",
+      "code": "invalid_search_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
+    }
+    "###);
+}
+
 #[actix_rt::test]
 async fn delete_documents_batch() {
    let server = Server::new().await;
@ -570,3 +675,77 @@ async fn delete_document_by_filter() {
    }
    "###);
 }
+
+#[actix_rt::test]
+async fn fetch_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+
+    let (response, code) = index.get_document_by_filter(json!(null)).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type: expected an object, but found null",
+      "code": "bad_request",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#bad_request"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "offset": "doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.offset`: expected a positive integer, but found a string: `\"doggo\"`",
+      "code": "invalid_document_offset",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_offset"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "limit": "doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.limit`: expected a positive integer, but found a string: `\"doggo\"`",
+      "code": "invalid_document_limit",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_limit"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "fields": "doggo" })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value type at `.fields`: expected an array, but found a string: `\"doggo\"`",
+      "code": "invalid_document_fields",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_fields"
+    }
+    "###);
+
+    let (response, code) = index.get_document_by_filter(json!({ "filter": true })).await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid syntax for the filter parameter: `expected String, Array, found: true`.",
+      "code": "invalid_document_filter",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_document_filter"
+    }
+    "###);
+}
--- a/meilisearch/tests/documents/get_documents.rs
+++ b/meilisearch/tests/documents/get_documents.rs
@ -1,5 +1,6 @@
 use actix_web::test;
 use http::header::ACCEPT_ENCODING;
+use meili_snap::*;
 use serde_json::{json, Value};
 use urlencoding::encode as urlencode;

@ -378,3 +379,164 @@ async fn get_documents_displayed_attributes_is_ignored() {
    assert_eq!(response.as_object().unwrap().keys().count(), 16);
    assert!(response.as_object().unwrap().get("gender").is_some());
 }
+
+#[actix_rt::test]
+async fn get_document_by_filter() {
+    let server = Server::new().await;
+    let index = server.index("doggo");
+    index.update_settings_filterable_attributes(json!(["color"])).await;
+    index
+        .add_documents(
+            json!([
+                { "id": 0, "color": "red" },
+                { "id": 1, "color": "blue" },
+                { "id": 2, "color": "blue" },
+                { "id": 3 },
+            ]),
+            Some("id"),
+        )
+        .await;
+    index.wait_task(1).await;
+
+    let (response, code) = index.get_document_by_filter(json!({})).await;
+    let (response2, code2) = index.get_all_documents_raw("").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 1,
+          "color": "blue"
+        },
+        {
+          "id": 2,
+          "color": "blue"
+        },
+        {
+          "id": 3
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 4
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    let (response, code) = index.get_document_by_filter(json!({ "filter": "color = blue" })).await;
+    let (response2, code2) = index.get_all_documents_raw("?filter=color=blue").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 1,
+          "color": "blue"
+        },
+        {
+          "id": 2,
+          "color": "blue"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 2
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    let (response, code) = index
+        .get_document_by_filter(json!({ "offset": 1, "limit": 1, "filter": "color != blue" }))
+        .await;
+    let (response2, code2) =
+        index.get_all_documents_raw("?filter=color!=blue&offset=1&limit=1").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 3
+        }
+      ],
+      "offset": 1,
+      "limit": 1,
+      "total": 2
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    let (response, code) = index
+        .get_document_by_filter(
+            json!({ "limit": 1, "filter": "color != blue", "fields": ["color"] }),
+        )
+        .await;
+    let (response2, code2) =
+        index.get_all_documents_raw("?limit=1&filter=color!=blue&fields=color").await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 1,
+      "total": 2
+    }
+    "###);
+    assert_eq!(code, code2);
+    assert_eq!(response, response2);
+
+    // Now testing more complex filter that the get route can't represent
+
+    let (response, code) =
+        index.get_document_by_filter(json!({ "filter": [["color = blue", "color = red"]] })).await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        },
+        {
+          "id": 1,
+          "color": "blue"
+        },
+        {
+          "id": 2,
+          "color": "blue"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 3
+    }
+    "###);
+
+    let (response, code) = index
+        .get_document_by_filter(json!({ "filter": [["color != blue"], "color EXISTS"] }))
+        .await;
+    snapshot!(code, @"200 OK");
+    snapshot!(json_string!(response, { ".enqueuedAt" => "[date]" }), @r###"
+    {
+      "results": [
+        {
+          "id": 0,
+          "color": "red"
+        }
+      ],
+      "offset": 0,
+      "limit": 20,
+      "total": 1
+    }
+    "###);
+}
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -1032,16 +1032,15 @@ impl Index {

    /* documents */

-    /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
-    pub fn documents<'t>(
-        &self,
+    /// Returns an iterator over the requested documents. The next item will be an error if a document is missing.
+    pub fn iter_documents<'a, 't: 'a>(
+        &'a self,
        rtxn: &'t RoTxn,
-        ids: impl IntoIterator<Item = DocumentId>,
-    ) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
+        ids: impl IntoIterator<Item = DocumentId> + 'a,
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
        let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
-        let mut documents = Vec::new();

-        for id in ids {
+        Ok(ids.into_iter().map(move |id| {
            if soft_deleted_documents.contains(id) {
                return Err(UserError::AccessingSoftDeletedDocument { document_id: id })?;
            }
@ -1049,27 +1048,25 @@ impl Index {
                .documents
                .get(rtxn, &BEU32::new(id))?
                .ok_or(UserError::UnknownInternalDocumentId { document_id: id })?;
-            documents.push((id, kv));
+            Ok((id, kv))
+        }))
    }

-        Ok(documents)
+    /// Returns a [`Vec`] of the requested documents. Returns an error if a document is missing.
+    pub fn documents<'t>(
+        &self,
+        rtxn: &'t RoTxn,
+        ids: impl IntoIterator<Item = DocumentId>,
+    ) -> Result<Vec<(DocumentId, obkv::KvReaderU16<'t>)>> {
+        self.iter_documents(rtxn, ids)?.collect()
    }

    /// Returns an iterator over all the documents in the index.
-    pub fn all_documents<'t>(
-        &self,
+    pub fn all_documents<'a, 't: 'a>(
+        &'a self,
        rtxn: &'t RoTxn,
-    ) -> Result<impl Iterator<Item = heed::Result<(DocumentId, obkv::KvReaderU16<'t>)>>> {
-        let soft_deleted_docids = self.soft_deleted_documents_ids(rtxn)?;
-
-        Ok(self
-            .documents
-            .iter(rtxn)?
-            // we cast the BEU32 to a DocumentId
-            .map(|document| document.map(|(id, obkv)| (id.get(), obkv)))
-            .filter(move |document| {
-                document.as_ref().map_or(true, |(id, _)| !soft_deleted_docids.contains(*id))
-            }))
+    ) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
+        self.iter_documents(rtxn, self.documents_ids(rtxn)?)
    }

    pub fn facets_distribution<'a>(&'a self, rtxn: &'a RoTxn) -> FacetDistribution<'a> {