Merge branch 'main' into tmp-release-v1.5.0

2024-11-22 18:17:39 +08:00 · 2023-11-21 16:30:46 +01:00 · 2023-11-21 16:30:46 +01:00 · 7cb7e37ba8
commit 7cb7e37ba8
parent b11f85a635 33b7c574ea
184 changed files with 3972 additions and 7137 deletions
--- a/.github/workflows/benchmarks-pr.yml
+++ b/.github/workflows/benchmarks-pr.yml
@ -90,7 +90,8 @@ jobs:
          set -x
          export base_ref=$(git merge-base origin/main ${{ steps.comment-branch.outputs.head_ref }} | head -c8)
          export base_filename=$(echo ${{ steps.command.outputs.command-arguments }}_main_${base_ref}.json)
-          echo 'Here are your benchmarks diff 👊' >> body.txt
+          export bench_name=$(echo ${{ steps.command.outputs.command-arguments }})
+          echo "Here are your $bench_name benchmarks diff 👊" >> body.txt
          echo '```' >> body.txt
          ./benchmarks/scripts/compare.sh $base_filename ${{ steps.file.outputs.basename }}.json >> body.txt
          echo '```' >> body.txt
--- a/.github/workflows/publish-apt-brew-pkg.yml
+++ b/.github/workflows/publish-apt-brew-pkg.yml
@ -50,7 +50,7 @@ jobs:
    needs: check-version
    steps:
      - name: Create PR to Homebrew
-        uses: mislav/bump-homebrew-formula-action@v2
+        uses: mislav/bump-homebrew-formula-action@v3
        with:
          formula-name: meilisearch
          formula-path: Formula/m/meilisearch.rb
--- a/.github/workflows/publish-docker-images.yml
+++ b/.github/workflows/publish-docker-images.yml
@ -63,7 +63,7 @@ jobs:
        uses: docker/setup-buildx-action@v3

      - name: Login to Docker Hub
-        uses: docker/login-action@v2
+        uses: docker/login-action@v3
        with:
          username: ${{ secrets.DOCKERHUB_USERNAME }}
          password: ${{ secrets.DOCKERHUB_TOKEN }}
--- a/.github/workflows/sdks-tests.yml
+++ b/.github/workflows/sdks-tests.yml
@ -160,7 +160,7 @@ jobs:
        with:
          repository: meilisearch/meilisearch-js
      - name: Setup node
-        uses: actions/setup-node@v3
+        uses: actions/setup-node@v4
        with:
          cache: 'yarn'
      - name: Install dependencies
@ -318,7 +318,7 @@ jobs:
        with:
          repository: meilisearch/meilisearch-js-plugins
      - name: Setup node
-        uses: actions/setup-node@v3
+        uses: actions/setup-node@v4
        with:
          cache: yarn
      - name: Install dependencies
--- a/.github/workflows/test-suite.yml
+++ b/.github/workflows/test-suite.yml
@ -43,7 +43,7 @@ jobs:
          toolchain: nightly
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@ -65,7 +65,7 @@ jobs:
    steps:
      - uses: actions/checkout@v3
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo check without any default features
        uses: actions-rs/cargo@v1
        with:
@ -149,7 +149,7 @@ jobs:
          toolchain: stable
          override: true
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run tests in debug
        uses: actions-rs/cargo@v1
        with:
@ -168,7 +168,7 @@ jobs:
          override: true
          components: clippy
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo clippy
        uses: actions-rs/cargo@v1
        with:
@ -187,7 +187,7 @@ jobs:
          override: true
          components: rustfmt
      - name: Cache dependencies
-        uses: Swatinem/rust-cache@v2.6.2
+        uses: Swatinem/rust-cache@v2.7.1
      - name: Run cargo fmt
        # Since we never ran the `build.rs` script in the benchmark directory we are missing one auto-generated import file.
        # Since we want to trigger (and fail) this action as fast as possible, instead of building the benchmark crate
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1731,12 +1731,13 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"

 [[package]]
 name = "grenad"
-version = "0.4.4"
+version = "0.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5232b2d157b7bf63d7abe1b12177039e58db2f29e377517c0cdee1578cca4c93"
+checksum = "6a007932af5475ebb5c63bef8812bb1c36f317983bb4ca663e9d6dd58d6a0f8c"
 dependencies = [
 "bytemuck",
 "byteorder",
+ "rayon",
 "tempfile",
 ]

@ -3281,6 +3282,7 @@ dependencies = [
 "logging_timer",
 "maplit",
 "md5",
+ "meili-snap",
 "memmap2",
 "mimalloc",
 "obkv",
@ -3443,9 +3445,9 @@ dependencies = [

 [[package]]
 name = "obkv"
-version = "0.2.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f69e48cd7c8e5bb52a1da1287fdbfd877c32673176583ce664cd63b201aba385"
+checksum = "6c459142426056c639ff88d053ebaaaeca0ee1411c94362892398ef4ccd81080"

 [[package]]
 name = "once_cell"
--- a/README.md
+++ b/README.md
@ -25,12 +25,6 @@

 <p align="center">⚡ A lightning-fast search engine that fits effortlessly into your apps, websites, and workflow 🔍</p>

---
-
-### 🔥 On November 2nd, we are hosting our first-ever live demo and product updates for [Meilisearch Cloud](https://www.meilisearch.com/cloud?utm_campaign=oss&utm_source=github&utm_medium=meilisearch). Make sure to [register here](https://us06web.zoom.us/meeting/register/tZMlc-mqrjIsH912-HTRe-AaT-pp41bDe81a#/registration) and bring your questions for live Q&A!
-
---
-
 Meilisearch helps you shape a delightful search experience in a snap, offering features that work out-of-the-box to speed up your workflow.

 <p align="center" name="demo">
--- a/benchmarks/benches/indexing.rs
+++ b/benchmarks/benches/indexing.rs
@ -6,9 +6,7 @@ use std::path::Path;

 use criterion::{criterion_group, criterion_main, Criterion};
 use milli::heed::{EnvOpenOptions, RwTxn};
-use milli::update::{
-    DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
-};
+use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
 use milli::Index;
 use rand::seq::SliceRandom;
 use rand_chacha::rand_core::SeedableRng;
@ -266,17 +264,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -613,17 +601,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -875,22 +853,31 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
 }

+fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBitmap>) {
+    let mut wtxn = index.write_txn().unwrap();
+
+    let indexer_config = IndexerConfig::default();
+    for ids in document_ids_to_delete {
+        let config = IndexDocumentsConfig::default();
+
+        let mut builder =
+            IndexDocuments::new(&mut wtxn, &index, &indexer_config, config, |_| (), || false)
+                .unwrap();
+        (builder, _) = builder.remove_documents_from_db_no_batch(&ids).unwrap();
+        builder.execute().unwrap();
+    }
+
+    wtxn.commit().unwrap();
+
+    index.prepare_for_closing().wait();
+}
+
 fn indexing_movies_in_three_batches(c: &mut Criterion) {
    let mut group = c.benchmark_group("indexing");
    group.sample_size(BENCHMARK_ITERATION);
@ -1112,17 +1099,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
@ -1338,17 +1315,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
                (index, document_ids_to_delete)
            },
            move |(index, document_ids_to_delete)| {
-                let mut wtxn = index.write_txn().unwrap();
-
-                for ids in document_ids_to_delete {
-                    let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-                    builder.delete_documents(&ids);
-                    builder.execute().unwrap();
-                }
-
-                wtxn.commit().unwrap();
-
-                index.prepare_for_closing().wait();
+                delete_documents_from_ids(index, document_ids_to_delete)
            },
        )
    });
--- a/dump/src/reader/mod.rs
+++ b/dump/src/reader/mod.rs
@ -526,12 +526,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.688964637Z",
+          "updatedAt": "2022-10-09T20:27:23.951017769Z"
        }
        "###);

@ -541,12 +541,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.197788495Z",
+          "updatedAt": "2022-10-09T20:28:01.93111053Z"
        }
        "###);

@ -571,12 +571,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:24.242683494Z",
+          "updatedAt": "2022-10-09T20:27:24.312809641Z"
        }
        "###);

@ -617,12 +617,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.595257Z",
+          "updatedAt": "2023-01-30T16:25:58.70348Z"
        }
        "###);

@ -632,12 +632,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.192178Z",
+          "updatedAt": "2023-01-30T16:25:56.455714Z"
        }
        "###);

@ -647,12 +647,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:58.876405Z",
+          "updatedAt": "2023-01-30T16:25:59.079906Z"
        }
        "###);

--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-11.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-11.snap
@ -1,24 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: spells.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-5.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-5.snap
@ -1,38 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: products.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [],
-  "sortableAttributes": [],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness"
-  ],
-  "stopWords": [],
-  "synonyms": {
-    "android": [
-      "phone",
-      "smartphone"
-    ],
-    "iphone": [
-      "phone",
-      "smartphone"
-    ],
-    "phone": [
-      "android",
-      "iphone",
-      "smartphone"
-    ]
-  },
-  "distinctAttribute": null
-}
--- a/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-8.snap
+++ b/dump/src/reader/snapshots/dumpreadertest__import_dump_v1-8.snap
@ -1,31 +0,0 @@
---
-source: dump/src/reader/mod.rs
-expression: movies.settings().unwrap()
---
-{
-  "displayedAttributes": [
-    "*"
-  ],
-  "searchableAttributes": [
-    "*"
-  ],
-  "filterableAttributes": [
-    "genres",
-    "id"
-  ],
-  "sortableAttributes": [
-    "genres",
-    "id"
-  ],
-  "rankingRules": [
-    "typo",
-    "words",
-    "proximity",
-    "attribute",
-    "exactness",
-    "release_date:asc"
-  ],
-  "stopWords": [],
-  "synonyms": {},
-  "distinctAttribute": null
-}
--- a/dump/src/reader/v2/mod.rs
+++ b/dump/src/reader/v2/mod.rs
@ -46,6 +46,7 @@ pub type Checked = settings::Checked;
 pub type Unchecked = settings::Unchecked;

 pub type Task = updates::UpdateEntry;
+pub type Kind = updates::UpdateMeta;

 // everything related to the errors
 pub type ResponseError = errors::ResponseError;
@ -107,8 +108,11 @@ impl V2Reader {
    pub fn indexes(&self) -> Result<impl Iterator<Item = Result<V2IndexReader>> + '_> {
        Ok(self.index_uuid.iter().map(|index| -> Result<_> {
            V2IndexReader::new(
-                index.uid.clone(),
                &self.dump.path().join("indexes").join(format!("index-{}", index.uuid)),
+                index,
+                BufReader::new(
+                    File::open(self.dump.path().join("updates").join("data.jsonl")).unwrap(),
+                ),
            )
        }))
    }
@ -143,16 +147,41 @@ pub struct V2IndexReader {
 }

 impl V2IndexReader {
-    pub fn new(name: String, path: &Path) -> Result<Self> {
+    pub fn new(path: &Path, index_uuid: &IndexUuid, tasks: BufReader<File>) -> Result<Self> {
        let meta = File::open(path.join("meta.json"))?;
        let meta: DumpMeta = serde_json::from_reader(meta)?;

+        let mut created_at = None;
+        let mut updated_at = None;
+
+        for line in tasks.lines() {
+            let task: Task = serde_json::from_str(&line?)?;
+            if !(task.uuid == index_uuid.uuid && task.is_finished()) {
+                continue;
+            }
+
+            let new_created_at = match task.update.meta() {
+                Kind::DocumentsAddition { .. } | Kind::Settings(_) => task.update.finished_at(),
+                _ => None,
+            };
+            let new_updated_at = task.update.finished_at();
+
+            if created_at.is_none() || created_at > new_created_at {
+                created_at = new_created_at;
+            }
+
+            if updated_at.is_none() || updated_at < new_updated_at {
+                updated_at = new_updated_at;
+            }
+        }
+
+        let current_time = OffsetDateTime::now_utc();
+
        let metadata = IndexMetadata {
-            uid: name,
+            uid: index_uuid.uid.clone(),
            primary_key: meta.primary_key,
-            // FIXME: Iterate over the whole task queue to find the creation and last update date.
-            created_at: OffsetDateTime::now_utc(),
-            updated_at: OffsetDateTime::now_utc(),
+            created_at: created_at.unwrap_or(current_time),
+            updated_at: updated_at.unwrap_or(current_time),
        };

        let ret = V2IndexReader {
@ -248,12 +277,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.688964637Z",
+          "updatedAt": "2022-10-09T20:27:23.951017769Z"
        }
        "###);

@ -263,12 +292,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:22.197788495Z",
+          "updatedAt": "2022-10-09T20:28:01.93111053Z"
        }
        "###);

@ -293,12 +322,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"d751713988987e9331980363e24189ce");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2022-10-09T20:27:24.242683494Z",
+          "updatedAt": "2022-10-09T20:27:24.312809641Z"
        }
        "###);

@ -340,12 +369,12 @@ pub(crate) mod test {
        assert!(indexes.is_empty());

        // products
-        insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(products.metadata(), @r###"
        {
          "uid": "products",
          "primaryKey": "sku",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.595257Z",
+          "updatedAt": "2023-01-30T16:25:58.70348Z"
        }
        "###);

@ -355,12 +384,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");

        // movies
-        insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(movies.metadata(), @r###"
        {
          "uid": "movies",
          "primaryKey": "id",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:56.192178Z",
+          "updatedAt": "2023-01-30T16:25:56.455714Z"
        }
        "###);

@ -370,12 +399,12 @@ pub(crate) mod test {
        meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");

        // spells
-        insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
+        insta::assert_json_snapshot!(spells.metadata(), @r###"
        {
          "uid": "dnd_spells",
          "primaryKey": "index",
-          "createdAt": "[now]",
-          "updatedAt": "[now]"
+          "createdAt": "2023-01-30T16:25:58.876405Z",
+          "updatedAt": "2023-01-30T16:25:59.079906Z"
        }
        "###);

--- a/dump/src/reader/v2/updates.rs
+++ b/dump/src/reader/v2/updates.rs
@ -227,4 +227,14 @@ impl UpdateStatus {
            _ => None,
        }
    }
+
+    pub fn finished_at(&self) -> Option<OffsetDateTime> {
+        match self {
+            UpdateStatus::Processing(_) => None,
+            UpdateStatus::Enqueued(_) => None,
+            UpdateStatus::Processed(u) => Some(u.processed_at),
+            UpdateStatus::Aborted(_) => None,
+            UpdateStatus::Failed(u) => Some(u.failed_at),
+        }
+    }
 }
--- a/index-scheduler/src/batch.rs
+++ b/index-scheduler/src/batch.rs
@ -24,14 +24,13 @@ use std::fs::{self, File};
 use std::io::BufWriter;

 use dump::IndexMetadata;
-use log::{debug, error, info};
+use log::{debug, error, info, trace};
 use meilisearch_types::error::Code;
 use meilisearch_types::heed::{RoTxn, RwTxn};
 use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader};
 use meilisearch_types::milli::heed::CompactionOption;
 use meilisearch_types::milli::update::{
-    DeleteDocuments, DocumentDeletionResult, IndexDocumentsConfig, IndexDocumentsMethod,
-    Settings as MilliSettings,
+    IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings as MilliSettings,
 };
 use meilisearch_types::milli::{self, Filter, BEU32};
 use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
@ -44,7 +43,7 @@ use uuid::Uuid;

 use crate::autobatcher::{self, BatchKind};
 use crate::utils::{self, swap_index_uid_in_task};
-use crate::{Error, IndexScheduler, ProcessingTasks, Result, TaskId};
+use crate::{Error, IndexScheduler, MustStopProcessing, ProcessingTasks, Result, TaskId};

 /// Represents a combination of tasks that can all be processed at the same time.
 ///
@ -105,12 +104,6 @@ pub(crate) enum IndexOperation {
        operations: Vec<DocumentOperation>,
        tasks: Vec<Task>,
    },
-    DocumentDeletion {
-        index_uid: String,
-        // The vec associated with each document deletion tasks.
-        documents: Vec<Vec<String>>,
-        tasks: Vec<Task>,
-    },
    IndexDocumentDeletionByFilter {
        index_uid: String,
        task: Task,
@ -162,7 +155,6 @@ impl Batch {
            }
            Batch::IndexOperation { op, .. } => match op {
                IndexOperation::DocumentOperation { tasks, .. }
-                | IndexOperation::DocumentDeletion { tasks, .. }
                | IndexOperation::Settings { tasks, .. }
                | IndexOperation::DocumentClear { tasks, .. } => {
                    tasks.iter().map(|task| task.uid).collect()
@ -227,7 +219,6 @@ impl IndexOperation {
    pub fn index_uid(&self) -> &str {
        match self {
            IndexOperation::DocumentOperation { index_uid, .. }
-            | IndexOperation::DocumentDeletion { index_uid, .. }
            | IndexOperation::IndexDocumentDeletionByFilter { index_uid, .. }
            | IndexOperation::DocumentClear { index_uid, .. }
            | IndexOperation::Settings { index_uid, .. }
@ -243,9 +234,6 @@ impl fmt::Display for IndexOperation {
            IndexOperation::DocumentOperation { .. } => {
                f.write_str("IndexOperation::DocumentOperation")
            }
-            IndexOperation::DocumentDeletion { .. } => {
-                f.write_str("IndexOperation::DocumentDeletion")
-            }
            IndexOperation::IndexDocumentDeletionByFilter { .. } => {
                f.write_str("IndexOperation::IndexDocumentDeletionByFilter")
            }
@ -348,18 +336,27 @@ impl IndexScheduler {
            BatchKind::DocumentDeletion { deletion_ids } => {
                let tasks = self.get_existing_tasks(rtxn, deletion_ids)?;

-                let mut documents = Vec::new();
+                let mut operations = Vec::with_capacity(tasks.len());
+                let mut documents_counts = Vec::with_capacity(tasks.len());
                for task in &tasks {
                    match task.kind {
                        KindWithContent::DocumentDeletion { ref documents_ids, .. } => {
-                            documents.push(documents_ids.clone())
+                            operations.push(DocumentOperation::Delete(documents_ids.clone()));
+                            documents_counts.push(documents_ids.len() as u64);
                        }
                        _ => unreachable!(),
                    }
                }

                Ok(Some(Batch::IndexOperation {
-                    op: IndexOperation::DocumentDeletion { index_uid, documents, tasks },
+                    op: IndexOperation::DocumentOperation {
+                        index_uid,
+                        primary_key: None,
+                        method: IndexDocumentsMethod::ReplaceDocuments,
+                        documents_counts,
+                        operations,
+                        tasks,
+                    },
                    must_create_index,
                }))
            }
@ -825,6 +822,10 @@ impl IndexScheduler {
                // 2. dump the tasks
                let mut dump_tasks = dump.create_tasks_queue()?;
                for ret in self.all_tasks.iter(&rtxn)? {
+                    if self.must_stop_processing.get() {
+                        return Err(Error::AbortedTask);
+                    }
+
                    let (_, mut t) = ret?;
                    let status = t.status;
                    let content_file = t.content_uuid();
@ -845,6 +846,9 @@ impl IndexScheduler {

                    // 2.1. Dump the `content_file` associated with the task if there is one and the task is not finished yet.
                    if let Some(content_file) = content_file {
+                        if self.must_stop_processing.get() {
+                            return Err(Error::AbortedTask);
+                        }
                        if status == Status::Enqueued {
                            let content_file = self.file_store.get_update(content_file)?;

@ -884,6 +888,9 @@ impl IndexScheduler {

                    // 3.1. Dump the documents
                    for ret in index.all_documents(&rtxn)? {
+                        if self.must_stop_processing.get() {
+                            return Err(Error::AbortedTask);
+                        }
                        let (_id, doc) = ret?;
                        let document = milli::obkv_to_json(&all_fields, &fields_ids_map, doc)?;
                        index_dumper.push_document(&document)?;
@ -903,6 +910,9 @@ impl IndexScheduler {
                    "[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
                )).unwrap();

+                if self.must_stop_processing.get() {
+                    return Err(Error::AbortedTask);
+                }
                let path = self.dumps_path.join(format!("{}.dump", dump_uid));
                let file = File::create(path)?;
                dump.persist_to(BufWriter::new(file))?;
@ -1195,7 +1205,7 @@ impl IndexScheduler {
                    index,
                    indexer_config,
                    config,
-                    |indexing_step| debug!("update: {:?}", indexing_step),
+                    |indexing_step| trace!("update: {:?}", indexing_step),
                    || must_stop_processing.get(),
                )?;

@ -1242,7 +1252,8 @@ impl IndexScheduler {
                            let (new_builder, user_result) =
                                builder.remove_documents(document_ids)?;
                            builder = new_builder;
-
+                            // Uses Invariant: remove documents actually always returns Ok for the inner result
+                            let count = user_result.unwrap();
                            let provided_ids =
                                if let Some(Details::DocumentDeletion { provided_ids, .. }) =
                                    task.details
@ -1253,23 +1264,11 @@ impl IndexScheduler {
                                    unreachable!();
                                };

-                            match user_result {
-                                Ok(count) => {
-                                    task.status = Status::Succeeded;
-                                    task.details = Some(Details::DocumentDeletion {
-                                        provided_ids,
-                                        deleted_documents: Some(count),
-                                    });
-                                }
-                                Err(e) => {
-                                    task.status = Status::Failed;
-                                    task.details = Some(Details::DocumentDeletion {
-                                        provided_ids,
-                                        deleted_documents: Some(0),
-                                    });
-                                    task.error = Some(milli::Error::from(e).into());
-                                }
-                            }
+                            task.status = Status::Succeeded;
+                            task.details = Some(Details::DocumentDeletion {
+                                provided_ids,
+                                deleted_documents: Some(count),
+                            });
                        }
                    }
                }
@ -1284,31 +1283,13 @@ impl IndexScheduler {
                        milli::update::Settings::new(index_wtxn, index, indexer_config);
                    builder.reset_primary_key();
                    builder.execute(
-                        |indexing_step| debug!("update: {:?}", indexing_step),
+                        |indexing_step| trace!("update: {:?}", indexing_step),
                        || must_stop_processing.clone().get(),
                    )?;
                }

                Ok(tasks)
            }
-            IndexOperation::DocumentDeletion { index_uid: _, documents, mut tasks } => {
-                let mut builder = milli::update::DeleteDocuments::new(index_wtxn, index)?;
-                documents.iter().flatten().for_each(|id| {
-                    builder.delete_external_id(id);
-                });
-
-                let DocumentDeletionResult { deleted_documents, .. } = builder.execute()?;
-
-                for (task, documents) in tasks.iter_mut().zip(documents) {
-                    task.status = Status::Succeeded;
-                    task.details = Some(Details::DocumentDeletion {
-                        provided_ids: documents.len(),
-                        deleted_documents: Some(deleted_documents.min(documents.len() as u64)),
-                    });
-                }
-
-                Ok(tasks)
-            }
            IndexOperation::IndexDocumentDeletionByFilter { mut task, index_uid: _ } => {
                let filter =
                    if let KindWithContent::DocumentDeletionByFilter { filter_expr, .. } =
@ -1318,7 +1299,13 @@ impl IndexScheduler {
                    } else {
                        unreachable!()
                    };
-                let deleted_documents = delete_document_by_filter(index_wtxn, filter, index);
+                let deleted_documents = delete_document_by_filter(
+                    index_wtxn,
+                    filter,
+                    self.index_mapper.indexer_config(),
+                    self.must_stop_processing.clone(),
+                    index,
+                );
                let original_filter = if let Some(Details::DocumentDeletionByFilter {
                    original_filter,
                    deleted_documents: _,
@ -1552,6 +1539,8 @@ impl IndexScheduler {
 fn delete_document_by_filter<'a>(
    wtxn: &mut RwTxn<'a, '_>,
    filter: &serde_json::Value,
+    indexer_config: &IndexerConfig,
+    must_stop_processing: MustStopProcessing,
    index: &'a Index,
 ) -> Result<u64> {
    let filter = Filter::from_json(filter)?;
@ -1562,9 +1551,26 @@ fn delete_document_by_filter<'a>(
            }
            e => e.into(),
        })?;
-        let mut delete_operation = DeleteDocuments::new(wtxn, index)?;
-        delete_operation.delete_documents(&candidates);
-        delete_operation.execute().map(|result| result.deleted_documents)?
+
+        let config = IndexDocumentsConfig {
+            update_method: IndexDocumentsMethod::ReplaceDocuments,
+            ..Default::default()
+        };
+
+        let mut builder = milli::update::IndexDocuments::new(
+            wtxn,
+            index,
+            indexer_config,
+            config,
+            |indexing_step| debug!("update: {:?}", indexing_step),
+            || must_stop_processing.get(),
+        )?;
+
+        let (new_builder, count) = builder.remove_documents_from_db_no_batch(&candidates)?;
+        builder = new_builder;
+
+        let _ = builder.execute()?;
+        count
    } else {
        0
    })
--- a/index-scheduler/src/error.rs
+++ b/index-scheduler/src/error.rs
@ -108,6 +108,8 @@ pub enum Error {
    TaskDeletionWithEmptyQuery,
    #[error("Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.")]
    TaskCancelationWithEmptyQuery,
+    #[error("Aborted task")]
+    AbortedTask,

    #[error(transparent)]
    Dump(#[from] dump::Error),
@ -175,6 +177,7 @@ impl Error {
            | Error::TaskNotFound(_)
            | Error::TaskDeletionWithEmptyQuery
            | Error::TaskCancelationWithEmptyQuery
+            | Error::AbortedTask
            | Error::Dump(_)
            | Error::Heed(_)
            | Error::Milli(_)
@ -236,6 +239,9 @@ impl ErrorCode for Error {
            Error::TaskDatabaseUpdate(_) => Code::Internal,
            Error::CreateBatch(_) => Code::Internal,

+            // This one should never be seen by the end user
+            Error::AbortedTask => Code::Internal,
+
            #[cfg(test)]
            Error::PlannedFailure => Code::Internal,
        }
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -1183,7 +1183,8 @@ impl IndexScheduler {
            // If we have an abortion error we must stop the tick here and re-schedule tasks.
            Err(Error::Milli(milli::Error::InternalError(
                milli::InternalError::AbortedIndexation,
-            ))) => {
+            )))
+            | Err(Error::AbortedTask) => {
                #[cfg(test)]
                self.breakpoint(Breakpoint::AbortedIndexation);
                wtxn.abort().map_err(Error::HeedTransaction)?;
@ -4339,4 +4340,26 @@ mod tests {
        }
        "###);
    }
+
+    #[test]
+    fn cancel_processing_dump() {
+        let (index_scheduler, mut handle) = IndexScheduler::test(true, vec![]);
+
+        let dump_creation = KindWithContent::DumpCreation { keys: Vec::new(), instance_uid: None };
+        let dump_cancellation = KindWithContent::TaskCancelation {
+            query: "cancel dump".to_owned(),
+            tasks: RoaringBitmap::from_iter([0]),
+        };
+        let _ = index_scheduler.register(dump_creation).unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_dump_register");
+        handle.advance_till([Start, BatchCreated, InsideProcessBatch]);
+
+        let _ = index_scheduler.register(dump_cancellation).unwrap();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_registered");
+
+        snapshot!(format!("{:?}", handle.advance()), @"AbortedIndexation");
+
+        handle.advance_one_successful_batch();
+        snapshot!(snapshot_index_scheduler(&index_scheduler), name: "cancel_processed");
+    }
 }
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/after_dump_register.snap
@ -0,0 +1,35 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,]
+----------------------------------------------------------------------
+### Kind:
+"dumpCreation" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_processed.snap
@ -0,0 +1,45 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: canceled, canceled_by: 1, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
+1 {uid: 1, status: succeeded, details: { matched_tasks: 1, canceled_tasks: Some(0), original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
+----------------------------------------------------------------------
+### Status:
+enqueued []
+succeeded [1,]
+canceled [0,]
+----------------------------------------------------------------------
+### Kind:
+"taskCancelation" [1,]
+"dumpCreation" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+1 [0,]
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Finished At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
+++ b/index-scheduler/src/snapshots/lib.rs/cancel_processing_dump/cancel_registered.snap
@ -0,0 +1,38 @@
+---
+source: index-scheduler/src/lib.rs
+---
+### Autobatching Enabled = true
+### Processing Tasks:
+[0,]
+----------------------------------------------------------------------
+### All Tasks:
+0 {uid: 0, status: enqueued, details: { dump_uid: None }, kind: DumpCreation { keys: [], instance_uid: None }}
+1 {uid: 1, status: enqueued, details: { matched_tasks: 1, canceled_tasks: None, original_filter: "cancel dump" }, kind: TaskCancelation { query: "cancel dump", tasks: RoaringBitmap<[0]> }}
+----------------------------------------------------------------------
+### Status:
+enqueued [0,1,]
+----------------------------------------------------------------------
+### Kind:
+"taskCancelation" [1,]
+"dumpCreation" [0,]
+----------------------------------------------------------------------
+### Index Tasks:
+----------------------------------------------------------------------
+### Index Mapper:
+
+----------------------------------------------------------------------
+### Canceled By:
+
+----------------------------------------------------------------------
+### Enqueued At:
+[timestamp] [0,]
+[timestamp] [1,]
+----------------------------------------------------------------------
+### Started At:
+----------------------------------------------------------------------
+### Finished At:
+----------------------------------------------------------------------
+### File Store:
+
+----------------------------------------------------------------------
+
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -324,7 +324,6 @@ impl ErrorCode for milli::Error {
                    UserError::SerdeJson(_)
                    | UserError::InvalidLmdbOpenOptions
                    | UserError::DocumentLimitReached
-                    | UserError::AccessingSoftDeletedDocument { .. }
                    | UserError::UnknownInternalDocumentId { .. } => Code::Internal,
                    UserError::InvalidStoreFile => Code::InvalidStoreFile,
                    UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -362,7 +362,7 @@ fn import_dump(
                update_method: IndexDocumentsMethod::ReplaceDocuments,
                ..Default::default()
            },
-            |indexing_step| log::debug!("update: {:?}", indexing_step),
+            |indexing_step| log::trace!("update: {:?}", indexing_step),
            || false,
        )?;

--- a/meilisearch/src/routes/indexes/documents.rs
+++ b/meilisearch/src/routes/indexes/documents.rs
@ -612,8 +612,8 @@ fn retrieve_document<S: AsRef<str>>(
    let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();

    let internal_id = index
-        .external_documents_ids(&txn)?
-        .get(doc_id.as_bytes())
+        .external_documents_ids()
+        .get(&txn, doc_id)?
        .ok_or_else(|| MeilisearchHttpError::DocumentNotFound(doc_id.to_string()))?;

    let document = index
--- a/meilisearch/tests/documents/delete_documents.rs
+++ b/meilisearch/tests/documents/delete_documents.rs
@ -397,7 +397,7 @@ async fn delete_document_by_complex_filter() {
      "canceledBy": null,
      "details": {
        "providedIds": 0,
-        "deletedDocuments": 4,
+        "deletedDocuments": 2,
        "originalFilter": "[[\"color = green\",\"color NOT EXISTS\"]]"
      },
      "error": null,
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -26,8 +26,8 @@ flatten-serde-json = { path = "../flatten-serde-json" }
 fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.5.1"
-grenad = { version = "0.4.4", default-features = false, features = [
-    "tempfile",
+grenad = { version = "0.4.5", default-features = false, features = [
+    "rayon", "tempfile"
 ] }
 heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [
    "lmdb", "read-txn-no-tls"
@ -79,6 +79,7 @@ big_s = "1.0.2"
 insta = "1.29.0"
 maplit = "1.0.2"
 md5 = "0.7.0"
+meili-snap = { path = "../meili-snap" }
 rand = { version = "0.8.5", features = ["small_rng"] }

 [features]
--- a/milli/src/documents/mod.rs
+++ b/milli/src/documents/mod.rs
@ -1,5 +1,6 @@
 mod builder;
 mod enriched;
+mod primary_key;
 mod reader;
 mod serde_impl;

@ -11,6 +12,7 @@ use bimap::BiHashMap;
 pub use builder::DocumentsBatchBuilder;
 pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
 use obkv::KvReader;
+pub use primary_key::{DocumentIdExtractionError, FieldIdMapper, PrimaryKey, DEFAULT_PRIMARY_KEY};
 pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
 use serde::{Deserialize, Serialize};

@ -87,6 +89,12 @@ impl DocumentsBatchIndex {
    }
 }

+impl FieldIdMapper for DocumentsBatchIndex {
+    fn id(&self, name: &str) -> Option<FieldId> {
+        self.id(name)
+    }
+}
+
 #[derive(Debug, thiserror::Error)]
 pub enum Error {
    #[error("Error parsing number {value:?} at line {line}: {error}")]
--- a/milli/src/documents/primary_key.rs
+++ b/milli/src/documents/primary_key.rs
@ -0,0 +1,172 @@
+use std::iter;
+use std::result::Result as StdResult;
+
+use serde_json::Value;
+
+use crate::{FieldId, InternalError, Object, Result, UserError};
+
+/// The symbol used to define levels in a nested primary key.
+const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
+
+/// The default primary that is used when not specified.
+pub const DEFAULT_PRIMARY_KEY: &str = "id";
+
+/// Trait for objects that can map the name of a field to its [`FieldId`].
+pub trait FieldIdMapper {
+    /// Attempts to map the passed name to its [`FieldId`].
+    ///
+    /// `None` if the field with this name was not found.
+    fn id(&self, name: &str) -> Option<FieldId>;
+}
+
+/// A type that represent the type of primary key that has been set
+/// for this index, a classic flat one or a nested one.
+#[derive(Debug, Clone, Copy)]
+pub enum PrimaryKey<'a> {
+    Flat { name: &'a str, field_id: FieldId },
+    Nested { name: &'a str },
+}
+
+pub enum DocumentIdExtractionError {
+    InvalidDocumentId(UserError),
+    MissingDocumentId,
+    TooManyDocumentIds(usize),
+}
+
+impl<'a> PrimaryKey<'a> {
+    pub fn new(path: &'a str, fields: &impl FieldIdMapper) -> Option<Self> {
+        Some(if path.contains(PRIMARY_KEY_SPLIT_SYMBOL) {
+            Self::Nested { name: path }
+        } else {
+            let field_id = fields.id(path)?;
+            Self::Flat { name: path, field_id }
+        })
+    }
+
+    pub fn name(&self) -> &str {
+        match self {
+            PrimaryKey::Flat { name, .. } => name,
+            PrimaryKey::Nested { name } => name,
+        }
+    }
+
+    pub fn document_id(
+        &self,
+        document: &obkv::KvReader<FieldId>,
+        fields: &impl FieldIdMapper,
+    ) -> Result<StdResult<String, DocumentIdExtractionError>> {
+        match self {
+            PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
+                Some(document_id_bytes) => {
+                    let document_id = serde_json::from_slice(document_id_bytes)
+                        .map_err(InternalError::SerdeJson)?;
+                    match validate_document_id_value(document_id)? {
+                        Ok(document_id) => Ok(Ok(document_id)),
+                        Err(user_error) => {
+                            Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
+                        }
+                    }
+                }
+                None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+            },
+            nested @ PrimaryKey::Nested { .. } => {
+                let mut matching_documents_ids = Vec::new();
+                for (first_level_name, right) in nested.possible_level_names() {
+                    if let Some(field_id) = fields.id(first_level_name) {
+                        if let Some(value_bytes) = document.get(field_id) {
+                            let object = serde_json::from_slice(value_bytes)
+                                .map_err(InternalError::SerdeJson)?;
+                            fetch_matching_values(object, right, &mut matching_documents_ids);
+
+                            if matching_documents_ids.len() >= 2 {
+                                return Ok(Err(DocumentIdExtractionError::TooManyDocumentIds(
+                                    matching_documents_ids.len(),
+                                )));
+                            }
+                        }
+                    }
+                }
+
+                match matching_documents_ids.pop() {
+                    Some(document_id) => match validate_document_id_value(document_id)? {
+                        Ok(document_id) => Ok(Ok(document_id)),
+                        Err(user_error) => {
+                            Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
+                        }
+                    },
+                    None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+                }
+            }
+        }
+    }
+
+    /// Returns an `Iterator` that gives all the possible fields names the primary key
+    /// can have depending of the first level name and depth of the objects.
+    pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
+        let name = self.name();
+        name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
+            .map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
+            .chain(iter::once((name, "")))
+    }
+}
+
+fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
+    match value {
+        Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
+        otherwise => output.push(otherwise),
+    }
+}
+
+fn fetch_matching_values_in_object(
+    object: Object,
+    selector: &str,
+    base_key: &str,
+    output: &mut Vec<Value>,
+) {
+    for (key, value) in object {
+        let base_key = if base_key.is_empty() {
+            key.to_string()
+        } else {
+            format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
+        };
+
+        if starts_with(selector, &base_key) {
+            match value {
+                Value::Object(object) => {
+                    fetch_matching_values_in_object(object, selector, &base_key, output)
+                }
+                value => output.push(value),
+            }
+        }
+    }
+}
+
+fn starts_with(selector: &str, key: &str) -> bool {
+    selector.strip_prefix(key).map_or(false, |tail| {
+        tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
+    })
+}
+
+// FIXME: move to a DocumentId struct
+
+fn validate_document_id(document_id: &str) -> Option<&str> {
+    if !document_id.is_empty()
+        && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
+    {
+        Some(document_id)
+    } else {
+        None
+    }
+}
+
+pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
+    match document_id {
+        Value::String(string) => match validate_document_id(&string) {
+            Some(s) if s.len() == string.len() => Ok(Ok(string)),
+            Some(s) => Ok(Ok(s.to_string())),
+            None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
+        },
+        Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
+        content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
+    }
+}
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -89,8 +89,6 @@ pub enum FieldIdMapMissingEntry {

 #[derive(Error, Debug)]
 pub enum UserError {
-    #[error("A soft deleted internal document id have been used: `{document_id}`.")]
-    AccessingSoftDeletedDocument { document_id: DocumentId },
    #[error("A document cannot contain more than 65,535 fields.")]
    AttributeLimitReached,
    #[error(transparent)]
--- a/milli/src/external_documents_ids.rs
+++ b/milli/src/external_documents_ids.rs
@ -1,159 +1,75 @@
-use std::borrow::Cow;
 use std::collections::HashMap;
-use std::convert::TryInto;
-use std::{fmt, str};

-use fst::map::IndexedValue;
-use fst::{IntoStreamer, Streamer};
-use roaring::RoaringBitmap;
+use heed::types::{OwnedType, Str};
+use heed::{Database, RoIter, RoTxn, RwTxn};

-const DELETED_ID: u64 = u64::MAX;
+use crate::{DocumentId, BEU32};

-pub struct ExternalDocumentsIds<'a> {
-    pub(crate) hard: fst::Map<Cow<'a, [u8]>>,
-    pub(crate) soft: fst::Map<Cow<'a, [u8]>>,
-    soft_deleted_docids: RoaringBitmap,
+pub enum DocumentOperationKind {
+    Create,
+    Delete,
 }

-impl<'a> ExternalDocumentsIds<'a> {
-    pub fn new(
-        hard: fst::Map<Cow<'a, [u8]>>,
-        soft: fst::Map<Cow<'a, [u8]>>,
-        soft_deleted_docids: RoaringBitmap,
-    ) -> ExternalDocumentsIds<'a> {
-        ExternalDocumentsIds { hard, soft, soft_deleted_docids }
-    }
+pub struct DocumentOperation {
+    pub external_id: String,
+    pub internal_id: DocumentId,
+    pub kind: DocumentOperationKind,
+}

-    pub fn into_static(self) -> ExternalDocumentsIds<'static> {
-        ExternalDocumentsIds {
-            hard: self.hard.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
-            soft: self.soft.map_data(|c| Cow::Owned(c.into_owned())).unwrap(),
-            soft_deleted_docids: self.soft_deleted_docids,
-        }
+pub struct ExternalDocumentsIds(Database<Str, OwnedType<BEU32>>);
+
+impl ExternalDocumentsIds {
+    pub fn new(db: Database<Str, OwnedType<BEU32>>) -> ExternalDocumentsIds {
+        ExternalDocumentsIds(db)
    }

    /// Returns `true` if hard and soft external documents lists are empty.
-    pub fn is_empty(&self) -> bool {
-        self.hard.is_empty() && self.soft.is_empty()
+    pub fn is_empty(&self, rtxn: &RoTxn) -> heed::Result<bool> {
+        self.0.is_empty(rtxn).map_err(Into::into)
    }

-    pub fn get<A: AsRef<[u8]>>(&self, external_id: A) -> Option<u32> {
-        let external_id = external_id.as_ref();
-        match self.soft.get(external_id).or_else(|| self.hard.get(external_id)) {
-            Some(id) if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) => {
-                Some(id.try_into().unwrap())
-            }
-            _otherwise => None,
-        }
-    }
-
-    /// Rebuild the internal FSTs in the ExternalDocumentsIds structure such that they
-    /// don't contain any soft deleted document id.
-    pub fn delete_soft_deleted_documents_ids_from_fsts(&mut self) -> fst::Result<()> {
-        let mut new_hard_builder = fst::MapBuilder::memory();
-
-        let union_op = self.hard.op().add(&self.soft).r#union();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, docids)) = iter.next() {
-            // prefer selecting the ids from soft, always
-            let id = indexed_last_value(docids).unwrap();
-            if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) {
-                new_hard_builder.insert(external_id, id)?;
-            }
-        }
-        drop(iter);
-
-        // Delete soft map completely
-        self.soft = fst::Map::default().map_data(Cow::Owned)?;
-        // We save the new map as the new hard map.
-        self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
-
-        Ok(())
-    }
-
-    pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
-        let union_op = self.soft.op().add(other).r#union();
-
-        let mut new_soft_builder = fst::MapBuilder::memory();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let id = indexed_last_value(marked_docids).unwrap();
-            new_soft_builder.insert(external_id, id)?;
-        }
-
-        drop(iter);
-
-        // We save the new map as the new soft map.
-        self.soft = new_soft_builder.into_map().map_data(Cow::Owned)?;
-        self.merge_soft_into_hard()
+    pub fn get<A: AsRef<str>>(&self, rtxn: &RoTxn, external_id: A) -> heed::Result<Option<u32>> {
+        Ok(self.0.get(rtxn, external_id.as_ref())?.map(|x| x.get()))
    }

    /// An helper function to debug this type, returns an `HashMap` of both,
    /// soft and hard fst maps, combined.
-    pub fn to_hash_map(&self) -> HashMap<String, u32> {
-        let mut map = HashMap::new();
-
-        let union_op = self.hard.op().add(&self.soft).r#union();
-        let mut iter = union_op.into_stream();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let id = indexed_last_value(marked_docids).unwrap();
-            if id != DELETED_ID {
-                let external_id = str::from_utf8(external_id).unwrap();
-                map.insert(external_id.to_owned(), id.try_into().unwrap());
-            }
+    pub fn to_hash_map(&self, rtxn: &RoTxn) -> heed::Result<HashMap<String, u32>> {
+        let mut map = HashMap::default();
+        for result in self.0.iter(rtxn)? {
+            let (external, internal) = result?;
+            map.insert(external.to_owned(), internal.get());
        }
-
-        map
+        Ok(map)
    }

-    /// Return an fst of the combined hard and soft deleted ID.
-    pub fn to_fst<'b>(&'b self) -> fst::Result<Cow<'b, fst::Map<Cow<'a, [u8]>>>> {
-        if self.soft.is_empty() {
-            return Ok(Cow::Borrowed(&self.hard));
-        }
-        let union_op = self.hard.op().add(&self.soft).r#union();
-
-        let mut iter = union_op.into_stream();
-        let mut new_hard_builder = fst::MapBuilder::memory();
-        while let Some((external_id, marked_docids)) = iter.next() {
-            let value = indexed_last_value(marked_docids).unwrap();
-            if value != DELETED_ID {
-                new_hard_builder.insert(external_id, value)?;
+    /// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
+    ///
+    /// If the list contains multiple operations on the same external id, then the result is unspecified.
+    ///
+    /// # Panics
+    ///
+    /// - If attempting to delete a document that doesn't exist
+    /// - If attempting to create a document that already exists
+    pub fn apply(&self, wtxn: &mut RwTxn, operations: Vec<DocumentOperation>) -> heed::Result<()> {
+        for DocumentOperation { external_id, internal_id, kind } in operations {
+            match kind {
+                DocumentOperationKind::Create => {
+                    self.0.put(wtxn, &external_id, &BEU32::new(internal_id))?;
+                }
+                DocumentOperationKind::Delete => {
+                    if !self.0.delete(wtxn, &external_id)? {
+                        panic!("Attempting to delete a non-existing document")
+                    }
+                }
            }
        }

-        drop(iter);
-
-        Ok(Cow::Owned(new_hard_builder.into_map().map_data(Cow::Owned)?))
-    }
-
-    fn merge_soft_into_hard(&mut self) -> fst::Result<()> {
-        if self.soft.len() >= self.hard.len() / 2 {
-            self.hard = self.to_fst()?.into_owned();
-            self.soft = fst::Map::default().map_data(Cow::Owned)?;
-        }
-
        Ok(())
    }
-}

-impl fmt::Debug for ExternalDocumentsIds<'_> {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.debug_tuple("ExternalDocumentsIds").field(&self.to_hash_map()).finish()
+    /// Returns an iterator over all the external ids.
+    pub fn iter<'t>(&self, rtxn: &'t RoTxn) -> heed::Result<RoIter<'t, Str, OwnedType<BEU32>>> {
+        self.0.iter(rtxn)
    }
 }
-
-impl Default for ExternalDocumentsIds<'static> {
-    fn default() -> Self {
-        ExternalDocumentsIds {
-            hard: fst::Map::default().map_data(Cow::Owned).unwrap(),
-            soft: fst::Map::default().map_data(Cow::Owned).unwrap(),
-            soft_deleted_docids: RoaringBitmap::new(),
-        }
-    }
-}
-
-/// Returns the value of the `IndexedValue` with the highest _index_.
-fn indexed_last_value(indexed_values: &[IndexedValue]) -> Option<u64> {
-    indexed_values.iter().copied().max_by_key(|iv| iv.index).map(|iv| iv.value)
-}
--- a/milli/src/fields_ids_map.rs
+++ b/milli/src/fields_ids_map.rs
@ -81,6 +81,12 @@ impl Default for FieldsIdsMap {
    }
 }

+impl crate::documents::FieldIdMapper for FieldsIdsMap {
+    fn id(&self, name: &str) -> Option<FieldId> {
+        self.id(name)
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
--- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -6,6 +6,7 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
 use roaring::RoaringBitmap;

 use crate::heed_codec::BytesDecodeOwned;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};

 /// This is the limit where using a byteorder became less size efficient
 /// than using a direct roaring encoding, it is also the point where we are able
@ -60,12 +61,16 @@ impl CboRoaringBitmapCodec {
    /// if the merged values length is under the threshold, values are directly
    /// serialized in the buffer else a RoaringBitmap is created from the
    /// values and is serialized in the buffer.
-    pub fn merge_into(slices: &[Cow<[u8]>], buffer: &mut Vec<u8>) -> io::Result<()> {
+    pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
+    where
+        I: IntoIterator<Item = A>,
+        A: AsRef<[u8]>,
+    {
        let mut roaring = RoaringBitmap::new();
        let mut vec = Vec::new();

        for bytes in slices {
-            if bytes.len() <= THRESHOLD * size_of::<u32>() {
+            if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
                let mut reader = bytes.as_ref();
                while let Ok(integer) = reader.read_u32::<NativeEndian>() {
                    vec.push(integer);
@ -85,7 +90,7 @@ impl CboRoaringBitmapCodec {
                }
            } else {
                // We can unwrap safely because the vector is sorted upper.
-                let roaring = RoaringBitmap::from_sorted_iter(vec.into_iter()).unwrap();
+                let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
                roaring.serialize_into(buffer)?;
            }
        } else {
@ -95,6 +100,33 @@ impl CboRoaringBitmapCodec {

        Ok(())
    }
+
+    /// Merges a DelAdd delta into a CboRoaringBitmap.
+    pub fn merge_deladd_into<'a>(
+        deladd: KvReaderDelAdd<'_>,
+        previous: &[u8],
+        buffer: &'a mut Vec<u8>,
+    ) -> io::Result<Option<&'a [u8]>> {
+        // Deserialize the bitmap that is already there
+        let mut previous = Self::deserialize_from(previous)?;
+
+        // Remove integers we no more want in the previous bitmap
+        if let Some(value) = deladd.get(DelAdd::Deletion) {
+            previous -= Self::deserialize_from(value)?;
+        }
+
+        // Insert the new integers we want in the previous bitmap
+        if let Some(value) = deladd.get(DelAdd::Addition) {
+            previous |= Self::deserialize_from(value)?;
+        }
+
+        if previous.is_empty() {
+            return Ok(None);
+        }
+
+        Self::serialize_into(&previous, buffer);
+        Ok(Some(&buffer[..]))
+    }
 }

 impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
--- a/milli/src/search/facet/facet_sort_ascending.rs
+++ b/milli/src/search/facet/facet_sort_ascending.rs
@ -13,7 +13,7 @@ use crate::heed_codec::ByteSliceRefCodec;
 /// The documents returned by the iterator are grouped by the facet values that
 /// determined their rank. For example, given the documents:
 ///
-/// ```ignore
+/// ```text
 /// 0: { "colour": ["blue", "green"] }
 /// 1: { "colour": ["blue", "red"] }
 /// 2: { "colour": ["orange", "red"] }
@ -22,7 +22,7 @@ use crate::heed_codec::ByteSliceRefCodec;
 /// ```
 /// Then calling the function on the candidates `[0, 2, 3, 4]` will return an iterator
 /// over the following elements:
-/// ```ignore
+/// ```text
 /// [0, 4]  // corresponds to all the documents within the candidates that have the facet value "blue"
 /// [3]     // same for "green"
 /// [2]     // same for "orange"
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@ -223,12 +223,9 @@ impl<'a> Filter<'a> {
 impl<'a> Filter<'a> {
    pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
        // to avoid doing this for each recursive call we're going to do it ONCE ahead of time
-        let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
        let filterable_fields = index.filterable_fields(rtxn)?;

-        // and finally we delete all the soft_deleted_documents, again, only once at the very end
        self.inner_evaluate(rtxn, index, &filterable_fields)
-            .map(|result| result - soft_deleted_documents)
    }

    fn evaluate_operator(
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@ -12,7 +12,7 @@ use super::Word;
 use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
 use crate::update::{merge_cbo_roaring_bitmaps, MergeFn};
 use crate::{
-    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, RoaringBitmapCodec, SearchContext,
+    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
 };

 /// A cache storing pointers to values in the LMDB databases.
@ -25,7 +25,7 @@ pub struct DatabaseCache<'ctx> {
    pub word_pair_proximity_docids:
        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
    pub word_prefix_pair_proximity_docids:
-        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
+        FxHashMap<(u8, Interned<String>, Interned<String>), Option<RoaringBitmap>>,
    pub prefix_word_pair_proximity_docids:
        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
    pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
@ -168,7 +168,7 @@ impl<'ctx> SearchContext<'ctx> {
                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
                self.txn,
                word,
                self.word_interner.get(word).as_str(),
@ -182,7 +182,7 @@ impl<'ctx> SearchContext<'ctx> {
        &mut self,
        word: Interned<String>,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
            self.txn,
            word,
            self.word_interner.get(word).as_str(),
@ -230,7 +230,7 @@ impl<'ctx> SearchContext<'ctx> {
                    merge_cbo_roaring_bitmaps,
                )
            }
-            None => DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+            None => DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
                self.txn,
                prefix,
                self.word_interner.get(prefix).as_str(),
@ -244,7 +244,7 @@ impl<'ctx> SearchContext<'ctx> {
        &mut self,
        prefix: Interned<String>,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, RoaringBitmapCodec>(
+        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
            self.txn,
            prefix,
            self.word_interner.get(prefix).as_str(),
@ -297,35 +297,47 @@ impl<'ctx> SearchContext<'ctx> {
        prefix2: Interned<String>,
        proximity: u8,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
-            self.txn,
-            (proximity, word1, prefix2),
-            &(
-                proximity,
-                self.word_interner.get(word1).as_str(),
-                self.word_interner.get(prefix2).as_str(),
-            ),
-            &mut self.db_cache.word_prefix_pair_proximity_docids,
-            self.index.word_prefix_pair_proximity_docids.remap_data_type::<ByteSlice>(),
-        )
+        let docids = match self
+            .db_cache
+            .word_prefix_pair_proximity_docids
+            .entry((proximity, word1, prefix2))
+        {
+            Entry::Occupied(docids) => docids.get().clone(),
+            Entry::Vacant(entry) => {
+                // compute docids using prefix iter and store the result in the cache.
+                let key = U8StrStrCodec::bytes_encode(&(
+                    proximity,
+                    self.word_interner.get(word1).as_str(),
+                    self.word_interner.get(prefix2).as_str(),
+                ))
+                .unwrap()
+                .into_owned();
+                let mut prefix_docids = RoaringBitmap::new();
+                let remap_key_type = self
+                    .index
+                    .word_pair_proximity_docids
+                    .remap_key_type::<ByteSlice>()
+                    .prefix_iter(self.txn, &key)?;
+                for result in remap_key_type {
+                    let (_, docids) = result?;
+
+                    prefix_docids |= docids;
+                }
+                entry.insert(Some(prefix_docids.clone()));
+                Some(prefix_docids)
+            }
+        };
+        Ok(docids)
    }
+
    pub fn get_db_prefix_word_pair_proximity_docids(
        &mut self,
        left_prefix: Interned<String>,
        right: Interned<String>,
        proximity: u8,
    ) -> Result<Option<RoaringBitmap>> {
-        DatabaseCache::get_value::<_, _, CboRoaringBitmapCodec>(
-            self.txn,
-            (proximity, left_prefix, right),
-            &(
-                proximity,
-                self.word_interner.get(left_prefix).as_str(),
-                self.word_interner.get(right).as_str(),
-            ),
-            &mut self.db_cache.prefix_word_pair_proximity_docids,
-            self.index.prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>(),
-        )
+        // only accept exact matches on reverted positions
+        self.get_db_word_pair_proximity_docids(left_prefix, right, proximity)
    }

    pub fn get_db_word_fid_docids(
--- a/milli/src/search/new/tests/proximity.rs
+++ b/milli/src/search/new/tests/proximity.rs
@ -371,7 +371,7 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best s");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 6, 7, 8, 11, 15]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 9, 6, 7, 8, 11, 12, 13, 15]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

@ -379,13 +379,13 @@ fn test_proximity_prefix_db() {
    insta::assert_debug_snapshot!(texts, @r###"
    [
        "\"this is the best summer meal\"",
-        "\"summer best\"",
        "\"this is the best meal of summer\"",
-        "\"summer x best\"",
        "\"this is the best meal I have ever had in such a beautiful summer day\"",
        "\"this is the best cooked meal of the summer\"",
        "\"this is the best meal of the summer\"",
        "\"summer x y best\"",
+        "\"summer x best\"",
+        "\"summer best\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
    ]
    "###);
@ -423,17 +423,17 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best win");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[15, 16, 17, 18, 19, 20, 21, 22]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

    insta::assert_debug_snapshot!(texts, @r###"
    [
+        "\"this is the best winter meal\"",
+        "\"this is the best meal of winter\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
        "\"this is the best cooked meal of the winter\"",
        "\"this is the best meal of the winter\"",
-        "\"this is the best meal of winter\"",
-        "\"this is the best winter meal\"",
        "\"winter x y best\"",
        "\"winter x best\"",
        "\"winter best\"",
@ -471,20 +471,20 @@ fn test_proximity_prefix_db() {
    s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
    s.query("best wi");
    let SearchResult { documents_ids, document_scores, .. } = s.execute().unwrap();
-    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 15, 16, 17, 20]");
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 15, 16, 17, 20, 21, 22]");
    insta::assert_snapshot!(format!("{document_scores:#?}"));
    let texts = collect_field_values(&index, &txn, "text", &documents_ids);

    insta::assert_debug_snapshot!(texts, @r###"
    [
        "\"this is the best winter meal\"",
-        "\"winter best\"",
        "\"this is the best meal of winter\"",
-        "\"winter x best\"",
        "\"this is the best meal I have ever had in such a beautiful winter day\"",
        "\"this is the best cooked meal of the winter\"",
        "\"this is the best meal of the winter\"",
        "\"winter x y best\"",
+        "\"winter x best\"",
+        "\"winter best\"",
    ]
    "###);
 }
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-14.snap
@ -11,14 +11,6 @@ expression: "format!(\"{document_scores:#?}\")"
            },
        ),
    ],
-    [
-        Proximity(
-            Rank {
-                rank: 3,
-                max_rank: 4,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
@ -30,7 +22,15 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 2,
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
                max_rank: 4,
            },
        ),
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-2.snap
@ -11,14 +11,6 @@ expression: "format!(\"{document_scores:#?}\")"
            },
        ),
    ],
-    [
-        Proximity(
-            Rank {
-                rank: 3,
-                max_rank: 4,
-            },
-        ),
-    ],
    [
        Proximity(
            Rank {
@ -30,7 +22,15 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 2,
+                rank: 1,
+                max_rank: 4,
+            },
+        ),
+    ],
+    [
+        Proximity(
+            Rank {
+                rank: 1,
                max_rank: 4,
            },
        ),
--- a/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
+++ b/milli/src/search/new/tests/snapshots/millisearchnewtestsproximity__proximity_prefix_db-8.snap
@ -6,7 +6,7 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 1,
+                rank: 4,
                max_rank: 4,
            },
        ),
@ -14,7 +14,7 @@ expression: "format!(\"{document_scores:#?}\")"
    [
        Proximity(
            Rank {
-                rank: 1,
+                rank: 2,
                max_rank: 4,
            },
        ),
--- a/milli/src/search/new/tests/sort.rs
+++ b/milli/src/search/new/tests/sort.rs
@ -13,6 +13,7 @@ This module tests the `sort` ranking rule:

 use big_s::S;
 use maplit::hashset;
+use meili_snap::insta;

 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
--- a/milli/src/snapshot_tests.rs
+++ b/milli/src/snapshot_tests.rs
@ -4,9 +4,8 @@ use std::path::Path;

 use roaring::RoaringBitmap;

-use crate::facet::FacetType;
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
-use crate::{make_db_snap_from_iter, obkv_to_json, ExternalDocumentsIds, Index};
+use crate::{make_db_snap_from_iter, obkv_to_json, Index};

 #[track_caller]
 pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> (insta::Settings, String) {
@ -98,7 +97,6 @@ Create a snapshot test of the given database.
    - `facet_id_string_docids`
    - `documents_ids`
    - `stop_words`
-    - `soft_deleted_documents_ids`
    - `field_distribution`
    - `fields_ids_map`
    - `geo_faceted_documents_ids`
@ -221,22 +219,6 @@ pub fn snap_word_pair_proximity_docids(index: &Index) -> String {
        &format!("{proximity:<2} {word1:<16} {word2:<16} {}", display_bitmap(&b))
    })
 }
-pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String {
-    make_db_snap_from_iter!(index, word_prefix_pair_proximity_docids, |(
-        (proximity, word1, prefix),
-        b,
-    )| {
-        &format!("{proximity:<2} {word1:<16} {prefix:<4} {}", display_bitmap(&b))
-    })
-}
-pub fn snap_prefix_word_pair_proximity_docids(index: &Index) -> String {
-    make_db_snap_from_iter!(index, prefix_word_pair_proximity_docids, |(
-        (proximity, prefix, word2),
-        b,
-    )| {
-        &format!("{proximity:<2} {prefix:<4} {word2:<16} {}", display_bitmap(&b))
-    })
-}
 pub fn snap_word_position_docids(index: &Index) -> String {
    make_db_snap_from_iter!(index, word_position_docids, |((word, position), b)| {
        &format!("{word:<16} {position:<6} {}", display_bitmap(&b))
@ -308,12 +290,6 @@ pub fn snap_stop_words(index: &Index) -> String {
    let snap = format!("{stop_words:?}");
    snap
 }
-pub fn snap_soft_deleted_documents_ids(index: &Index) -> String {
-    let rtxn = index.read_txn().unwrap();
-    let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap();
-
-    display_bitmap(&soft_deleted_documents_ids)
-}
 pub fn snap_field_distributions(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
    let mut snap = String::new();
@ -340,50 +316,21 @@ pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
 }
 pub fn snap_external_documents_ids(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
-    let ExternalDocumentsIds { soft, hard, .. } = index.external_documents_ids(&rtxn).unwrap();
+    let external_ids = index.external_documents_ids().to_hash_map(&rtxn).unwrap();
+    // ensure fixed order (not guaranteed by hashmap)
+    let mut external_ids: Vec<(String, u32)> = external_ids.into_iter().collect();
+    external_ids.sort_by(|(l, _), (r, _)| l.cmp(r));

    let mut snap = String::new();

-    writeln!(&mut snap, "soft:").unwrap();
-    let stream_soft = soft.stream();
-    let soft_external_ids = stream_soft.into_str_vec().unwrap();
-    for (key, id) in soft_external_ids {
-        writeln!(&mut snap, "{key:<24} {id}").unwrap();
-    }
-    writeln!(&mut snap, "hard:").unwrap();
-    let stream_hard = hard.stream();
-    let hard_external_ids = stream_hard.into_str_vec().unwrap();
-    for (key, id) in hard_external_ids {
+    writeln!(&mut snap, "docids:").unwrap();
+    for (key, id) in external_ids {
        writeln!(&mut snap, "{key:<24} {id}").unwrap();
    }

    snap
 }
-pub fn snap_number_faceted_documents_ids(index: &Index) -> String {
-    let rtxn = index.read_txn().unwrap();
-    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
-    let mut snap = String::new();
-    for field_id in fields_ids_map.ids() {
-        let number_faceted_documents_ids =
-            index.faceted_documents_ids(&rtxn, field_id, FacetType::Number).unwrap();
-        writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&number_faceted_documents_ids))
-            .unwrap();
-    }
-    snap
-}
-pub fn snap_string_faceted_documents_ids(index: &Index) -> String {
-    let rtxn = index.read_txn().unwrap();
-    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();

-    let mut snap = String::new();
-    for field_id in fields_ids_map.ids() {
-        let string_faceted_documents_ids =
-            index.faceted_documents_ids(&rtxn, field_id, FacetType::String).unwrap();
-        writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&string_faceted_documents_ids))
-            .unwrap();
-    }
-    snap
-}
 pub fn snap_words_fst(index: &Index) -> String {
    let rtxn = index.read_txn().unwrap();
    let words_fst = index.words_fst(&rtxn).unwrap();
@ -516,9 +463,6 @@ macro_rules! full_snap_of_db {
    ($index:ident, stop_words) => {{
        $crate::snapshot_tests::snap_stop_words(&$index)
    }};
-    ($index:ident, soft_deleted_documents_ids) => {{
-        $crate::snapshot_tests::snap_soft_deleted_documents_ids(&$index)
-    }};
    ($index:ident, field_distribution) => {{
        $crate::snapshot_tests::snap_field_distributions(&$index)
    }};
@ -531,12 +475,6 @@ macro_rules! full_snap_of_db {
    ($index:ident, external_documents_ids) => {{
        $crate::snapshot_tests::snap_external_documents_ids(&$index)
    }};
-    ($index:ident, number_faceted_documents_ids) => {{
-        $crate::snapshot_tests::snap_number_faceted_documents_ids(&$index)
-    }};
-    ($index:ident, string_faceted_documents_ids) => {{
-        $crate::snapshot_tests::snap_string_faceted_documents_ids(&$index)
-    }};
    ($index:ident, words_fst) => {{
        $crate::snapshot_tests::snap_words_fst(&$index)
    }};
--- a/milli/src/update/available_documents_ids.rs
+++ b/milli/src/update/available_documents_ids.rs
@ -8,16 +8,11 @@ pub struct AvailableDocumentsIds {
 }

 impl AvailableDocumentsIds {
-    pub fn from_documents_ids(
-        docids: &RoaringBitmap,
-        soft_deleted_docids: &RoaringBitmap,
-    ) -> AvailableDocumentsIds {
-        let used_docids = docids | soft_deleted_docids;
-
-        match used_docids.max() {
+    pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds {
+        match docids.max() {
            Some(last_id) => {
                let mut available = RoaringBitmap::from_iter(0..last_id);
-                available -= used_docids;
+                available -= docids;

                let iter = match last_id.checked_add(1) {
                    Some(id) => id..=u32::max_value(),
@ -50,7 +45,7 @@ mod tests {
    #[test]
    fn empty() {
        let base = RoaringBitmap::new();
-        let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
+        let left = AvailableDocumentsIds::from_documents_ids(&base);
        let right = 0..=u32::max_value();
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
@ -63,28 +58,8 @@ mod tests {
        base.insert(100);
        base.insert(405);

-        let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
+        let left = AvailableDocumentsIds::from_documents_ids(&base);
        let right = (0..=u32::max_value()).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
    }
-
-    #[test]
-    fn soft_deleted() {
-        let mut base = RoaringBitmap::new();
-        base.insert(0);
-        base.insert(10);
-        base.insert(100);
-        base.insert(405);
-
-        let mut soft_deleted = RoaringBitmap::new();
-        soft_deleted.insert(1);
-        soft_deleted.insert(11);
-        soft_deleted.insert(101);
-        soft_deleted.insert(406);
-
-        let left = AvailableDocumentsIds::from_documents_ids(&base, &soft_deleted);
-        let right =
-            (0..=u32::max_value()).filter(|&n| ![0, 1, 10, 11, 100, 101, 405, 406].contains(&n));
-        left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
-    }
 }
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@ -1,8 +1,7 @@
 use roaring::RoaringBitmap;
 use time::OffsetDateTime;

-use crate::facet::FacetType;
-use crate::{ExternalDocumentsIds, FieldDistribution, Index, Result};
+use crate::{FieldDistribution, Index, Result};

 pub struct ClearDocuments<'t, 'u, 'i> {
    wtxn: &'t mut heed::RwTxn<'i, 'u>,
@ -21,13 +20,12 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
        let Index {
            env: _env,
            main: _main,
+            external_documents_ids,
            word_docids,
            exact_word_docids,
            word_prefix_docids,
            exact_word_prefix_docids,
            word_pair_proximity_docids,
-            word_prefix_pair_proximity_docids,
-            prefix_word_pair_proximity_docids,
            word_position_docids,
            word_fid_docids,
            field_id_word_count_docids,
@ -51,43 +49,23 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {

        // We retrieve the number of documents ids that we are deleting.
        let number_of_documents = self.index.number_of_documents(self.wtxn)?;
-        let faceted_fields = self.index.faceted_fields_ids(self.wtxn)?;

        // We clean some of the main engine datastructures.
        self.index.put_words_fst(self.wtxn, &fst::Set::default())?;
        self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
-        self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
        self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
-        self.index.put_soft_deleted_documents_ids(self.wtxn, &empty_roaring)?;
        self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
        self.index.delete_geo_rtree(self.wtxn)?;
        self.index.delete_geo_faceted_documents_ids(self.wtxn)?;
        self.index.delete_vector_hnsw(self.wtxn)?;

-        // We clean all the faceted documents ids.
-        for field_id in faceted_fields {
-            self.index.put_faceted_documents_ids(
-                self.wtxn,
-                field_id,
-                FacetType::Number,
-                &empty_roaring,
-            )?;
-            self.index.put_faceted_documents_ids(
-                self.wtxn,
-                field_id,
-                FacetType::String,
-                &empty_roaring,
-            )?;
-        }
-
        // Clear the other databases.
+        external_documents_ids.clear(self.wtxn)?;
        word_docids.clear(self.wtxn)?;
        exact_word_docids.clear(self.wtxn)?;
        word_prefix_docids.clear(self.wtxn)?;
        exact_word_prefix_docids.clear(self.wtxn)?;
        word_pair_proximity_docids.clear(self.wtxn)?;
-        word_prefix_pair_proximity_docids.clear(self.wtxn)?;
-        prefix_word_pair_proximity_docids.clear(self.wtxn)?;
        word_position_docids.clear(self.wtxn)?;
        word_fid_docids.clear(self.wtxn)?;
        field_id_word_count_docids.clear(self.wtxn)?;
@ -140,7 +118,7 @@ mod tests {

        assert!(index.words_fst(&rtxn).unwrap().is_empty());
        assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
-        assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
+        assert!(index.external_documents_ids().is_empty(&rtxn).unwrap());
        assert!(index.documents_ids(&rtxn).unwrap().is_empty());
        assert!(index.field_distribution(&rtxn).unwrap().is_empty());
        assert!(index.geo_rtree(&rtxn).unwrap().is_none());
@ -150,7 +128,6 @@ mod tests {
        assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
        assert!(index.word_pair_proximity_docids.is_empty(&rtxn).unwrap());
        assert!(index.field_id_word_count_docids.is_empty(&rtxn).unwrap());
-        assert!(index.word_prefix_pair_proximity_docids.is_empty(&rtxn).unwrap());
        assert!(index.facet_id_f64_docids.is_empty(&rtxn).unwrap());
        assert!(index.facet_id_string_docids.is_empty(&rtxn).unwrap());
        assert!(index.field_id_docid_facet_f64s.is_empty(&rtxn).unwrap());
--- a/milli/src/update/del_add.rs
+++ b/milli/src/update/del_add.rs
@ -0,0 +1,125 @@
+use obkv::Key;
+
+pub type KvWriterDelAdd<W> = obkv::KvWriter<W, DelAdd>;
+pub type KvReaderDelAdd<'a> = obkv::KvReader<'a, DelAdd>;
+
+/// DelAdd defines the new value to add in the database and old value to delete from the database.
+///
+/// Its used in an OBKV to be serialized in grenad files.
+#[repr(u8)]
+#[derive(Clone, Copy, PartialOrd, PartialEq, Debug)]
+pub enum DelAdd {
+    Deletion = 0,
+    Addition = 1,
+}
+
+impl Key for DelAdd {
+    const BYTES_SIZE: usize = std::mem::size_of::<DelAdd>();
+    type BYTES = [u8; Self::BYTES_SIZE];
+
+    fn to_be_bytes(&self) -> Self::BYTES {
+        u8::to_be_bytes(*self as u8)
+    }
+
+    fn from_be_bytes(array: Self::BYTES) -> Self {
+        match u8::from_be_bytes(array) {
+            0 => Self::Deletion,
+            1 => Self::Addition,
+            otherwise => unreachable!("DelAdd has only 2 variants, unknown variant: {}", otherwise),
+        }
+    }
+}
+
+/// Creates a Kv<K, Kv<DelAdd, value>> from Kv<K, value>
+///
+/// Deletion: put all the values under DelAdd::Deletion
+/// Addition: put all the values under DelAdd::Addition,
+/// DeletionAndAddition: put all the values under DelAdd::Deletion and DelAdd::Addition,
+pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
+    reader: obkv::KvReader<K>,
+    operation: DelAddOperation,
+    buffer: &mut Vec<u8>,
+) -> Result<(), std::io::Error> {
+    let mut writer = obkv::KvWriter::new(buffer);
+    let mut value_buffer = Vec::new();
+    for (key, value) in reader.iter() {
+        value_buffer.clear();
+        let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+        if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) {
+            value_writer.insert(DelAdd::Deletion, value)?;
+        }
+        if matches!(operation, DelAddOperation::Addition | DelAddOperation::DeletionAndAddition) {
+            value_writer.insert(DelAdd::Addition, value)?;
+        }
+        value_writer.finish()?;
+        writer.insert(key, &value_buffer)?;
+    }
+
+    writer.finish()
+}
+
+/// Enum controlling the side of the DelAdd obkv in which the provided value will be written.
+#[derive(Debug, Clone, Copy)]
+pub enum DelAddOperation {
+    Deletion,
+    Addition,
+    DeletionAndAddition,
+}
+
+/// Creates a Kv<K, Kv<DelAdd, value>> from two Kv<K, value>
+///
+/// putting each deletion obkv's keys under an DelAdd::Deletion
+/// and putting each addition obkv's keys under an DelAdd::Addition
+pub fn del_add_from_two_obkvs<K: obkv::Key + PartialOrd + Ord>(
+    deletion: obkv::KvReader<K>,
+    addition: obkv::KvReader<K>,
+    buffer: &mut Vec<u8>,
+) -> Result<(), std::io::Error> {
+    use itertools::merge_join_by;
+    use itertools::EitherOrBoth::{Both, Left, Right};
+
+    let mut writer = obkv::KvWriter::new(buffer);
+    let mut value_buffer = Vec::new();
+
+    for eob in merge_join_by(deletion.iter(), addition.iter(), |(b, _), (u, _)| b.cmp(u)) {
+        value_buffer.clear();
+        match eob {
+            Left((k, v)) => {
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Deletion, v).unwrap();
+                writer.insert(k, value_writer.into_inner()?).unwrap();
+            }
+            Right((k, v)) => {
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Addition, v).unwrap();
+                writer.insert(k, value_writer.into_inner()?).unwrap();
+            }
+            Both((k, deletion), (_, addition)) => {
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Deletion, deletion).unwrap();
+                value_writer.insert(DelAdd::Addition, addition).unwrap();
+                writer.insert(k, value_writer.into_inner()?).unwrap();
+            }
+        }
+    }
+
+    writer.finish()
+}
+
+pub fn is_noop_del_add_obkv(del_add: KvReaderDelAdd) -> bool {
+    del_add.get(DelAdd::Deletion) == del_add.get(DelAdd::Addition)
+}
+
+/// A function that extracts and returns the Add side of a DelAdd obkv.
+/// This is useful when there are no previous value in the database and
+/// therefore we don't need to do a diff with what's already there.
+///
+/// If there is no Add side we currently write an empty buffer
+/// which is a valid CboRoaringBitmap.
+#[allow(clippy::ptr_arg)] // required to avoid signature mismatch
+pub fn deladd_serialize_add_side<'a>(
+    obkv: &'a [u8],
+    _buffer: &mut Vec<u8>,
+) -> crate::Result<&'a [u8]> {
+    Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default())
+}
--- a/milli/src/update/delete_documents.rs
+++ b/milli/src/update/delete_documents.rs
--- a/milli/src/update/facet/bulk.rs
+++ b/milli/src/update/facet/bulk.rs
@ -1,10 +1,9 @@
-use std::borrow::Cow;
 use std::fs::File;
 use std::io::BufReader;

 use grenad::CompressionType;
 use heed::types::ByteSlice;
-use heed::{BytesEncode, Error, RoTxn, RwTxn};
+use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn};
 use roaring::RoaringBitmap;

 use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
@ -13,17 +12,15 @@ use crate::heed_codec::facet::{
    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
 };
 use crate::heed_codec::ByteSliceRefCodec;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
 use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
-use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
+use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};

 /// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
 /// by rebuilding the database "from scratch".
 ///
 /// First, the new elements are inserted into the level 0 of the database. Then, the
 /// higher levels are cleared and recomputed from the content of level 0.
-///
-/// Finally, the `faceted_documents_ids` value in the main database of `Index`
-/// is updated to contain the new set of faceted documents.
 pub struct FacetsUpdateBulk<'i> {
    index: &'i Index,
    group_size: u8,
@ -31,7 +28,7 @@ pub struct FacetsUpdateBulk<'i> {
    facet_type: FacetType,
    field_ids: Vec<FieldId>,
    // None if level 0 does not need to be updated
-    new_data: Option<grenad::Reader<BufReader<File>>>,
+    delta_data: Option<grenad::Reader<BufReader<File>>>,
 }

 impl<'i> FacetsUpdateBulk<'i> {
@ -39,7 +36,7 @@ impl<'i> FacetsUpdateBulk<'i> {
        index: &'i Index,
        field_ids: Vec<FieldId>,
        facet_type: FacetType,
-        new_data: grenad::Reader<BufReader<File>>,
+        delta_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
    ) -> FacetsUpdateBulk<'i> {
@ -49,7 +46,7 @@ impl<'i> FacetsUpdateBulk<'i> {
            group_size,
            min_level_size,
            facet_type,
-            new_data: Some(new_data),
+            delta_data: Some(delta_data),
        }
    }

@ -64,13 +61,13 @@ impl<'i> FacetsUpdateBulk<'i> {
            group_size: FACET_GROUP_SIZE,
            min_level_size: FACET_MIN_LEVEL_SIZE,
            facet_type,
-            new_data: None,
+            delta_data: None,
        }
    }

    #[logging_timer::time("FacetsUpdateBulk::{}")]
    pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
-        let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self;
+        let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self;

        let db = match facet_type {
            FacetType::String => index
@ -81,12 +78,9 @@ impl<'i> FacetsUpdateBulk<'i> {
            }
        };

-        let inner = FacetsUpdateBulkInner { db, new_data, group_size, min_level_size };
+        let inner = FacetsUpdateBulkInner { db, delta_data, group_size, min_level_size };

-        inner.update(wtxn, &field_ids, |wtxn, field_id, all_docids| {
-            index.put_faceted_documents_ids(wtxn, field_id, facet_type, &all_docids)?;
-            Ok(())
-        })?;
+        inner.update(wtxn, &field_ids)?;

        Ok(())
    }
@ -95,26 +89,19 @@ impl<'i> FacetsUpdateBulk<'i> {
 /// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
 pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
    pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
-    pub new_data: Option<grenad::Reader<R>>,
+    pub delta_data: Option<grenad::Reader<R>>,
    pub group_size: u8,
    pub min_level_size: u8,
 }
 impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
-    pub fn update(
-        mut self,
-        wtxn: &mut RwTxn,
-        field_ids: &[u16],
-        mut handle_all_docids: impl FnMut(&mut RwTxn, FieldId, RoaringBitmap) -> Result<()>,
-    ) -> Result<()> {
+    pub fn update(mut self, wtxn: &mut RwTxn, field_ids: &[u16]) -> Result<()> {
        self.update_level0(wtxn)?;
        for &field_id in field_ids.iter() {
            self.clear_levels(wtxn, field_id)?;
        }

        for &field_id in field_ids.iter() {
-            let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, wtxn)?;
-
-            handle_all_docids(wtxn, field_id, all_docids)?;
+            let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?;

            for level_reader in level_readers {
                let mut cursor = level_reader.into_cursor()?;
@ -133,19 +120,27 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        self.db.delete_range(wtxn, &range).map(drop)?;
        Ok(())
    }
+
    fn update_level0(&mut self, wtxn: &mut RwTxn) -> Result<()> {
-        let new_data = match self.new_data.take() {
+        let delta_data = match self.delta_data.take() {
            Some(x) => x,
            None => return Ok(()),
        };
        if self.db.is_empty(wtxn)? {
            let mut buffer = Vec::new();
            let mut database = self.db.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>();
-            let mut cursor = new_data.into_cursor()?;
+            let mut cursor = delta_data.into_cursor()?;
            while let Some((key, value)) = cursor.move_on_next()? {
                if !valid_lmdb_key(key) {
                    continue;
                }
+                let value = KvReaderDelAdd::new(value);
+
+                // DB is empty, it is safe to ignore Del operations
+                let Some(value) = value.get(DelAdd::Addition) else {
+                    continue;
+                };
+
                buffer.clear();
                // the group size for level 0
                buffer.push(1);
@ -157,11 +152,14 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
            let mut buffer = Vec::new();
            let database = self.db.remap_types::<ByteSlice, ByteSlice>();

-            let mut cursor = new_data.into_cursor()?;
+            let mut cursor = delta_data.into_cursor()?;
            while let Some((key, value)) = cursor.move_on_next()? {
                if !valid_lmdb_key(key) {
                    continue;
                }
+
+                let value = KvReaderDelAdd::new(value);
+
                // the value is a CboRoaringBitmap, but I still need to prepend the
                // group size for level 0 (= 1) to it
                buffer.clear();
@ -169,17 +167,27 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
                // then we extend the buffer with the docids bitmap
                match database.get(wtxn, key)? {
                    Some(prev_value) => {
+                        // prev_value is the group size for level 0, followed by the previous bitmap.
                        let old_bitmap = &prev_value[1..];
-                        CboRoaringBitmapCodec::merge_into(
-                            &[Cow::Borrowed(value), Cow::Borrowed(old_bitmap)],
-                            &mut buffer,
-                        )?;
+                        CboRoaringBitmapCodec::merge_deladd_into(value, old_bitmap, &mut buffer)?;
                    }
                    None => {
+                        // it is safe to ignore the del in that case.
+                        let Some(value) = value.get(DelAdd::Addition) else {
+                            // won't put the key in DB as the value would be empty
+                            continue;
+                        };
+
                        buffer.extend_from_slice(value);
                    }
                };
-                database.put(wtxn, key, &buffer)?;
+                let new_bitmap = &buffer[1..];
+                // if the new bitmap is empty, let's remove it
+                if CboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
+                    database.delete(wtxn, key)?;
+                } else {
+                    database.put(wtxn, key, &buffer)?;
+                }
            }
        }
        Ok(())
@ -188,16 +196,10 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
        &self,
        field_id: FieldId,
        txn: &RoTxn,
-    ) -> Result<(Vec<grenad::Reader<BufReader<File>>>, RoaringBitmap)> {
-        let mut all_docids = RoaringBitmap::new();
-        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |bitmaps, _| {
-            for bitmap in bitmaps {
-                all_docids |= bitmap;
-            }
-            Ok(())
-        })?;
+    ) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
+        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |_, _| Ok(()))?;

-        Ok((subwriters, all_docids))
+        Ok(subwriters)
    }
    #[allow(clippy::type_complexity)]
    fn read_level_0<'t>(
@ -491,7 +493,6 @@ mod tests {
        index.add_documents(documents).unwrap();

        db_snap!(index, facet_id_f64_docids, "initial", @"c34f499261f3510d862fa0283bbe843a");
-        db_snap!(index, number_faceted_documents_ids, "initial", @"01594fecbb316798ce3651d6730a4521");
    }

    #[test]
--- a/milli/src/update/facet/delete.rs
+++ b/milli/src/update/facet/delete.rs
@ -1,360 +0,0 @@
-use std::collections::{HashMap, HashSet};
-
-use heed::RwTxn;
-use log::debug;
-use roaring::RoaringBitmap;
-use time::OffsetDateTime;
-
-use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
-use crate::facet::FacetType;
-use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
-use crate::heed_codec::ByteSliceRefCodec;
-use crate::update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner};
-use crate::{FieldId, Index, Result};
-
-/// A builder used to remove elements from the `facet_id_string_docids` or `facet_id_f64_docids` databases.
-///
-/// Depending on the number of removed elements and the existing size of the database, we use either
-/// a bulk delete method or an incremental delete method.
-pub struct FacetsDelete<'i, 'b> {
-    index: &'i Index,
-    database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
-    facet_type: FacetType,
-    affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
-    docids_to_delete: &'b RoaringBitmap,
-    group_size: u8,
-    max_group_size: u8,
-    min_level_size: u8,
-}
-impl<'i, 'b> FacetsDelete<'i, 'b> {
-    pub fn new(
-        index: &'i Index,
-        facet_type: FacetType,
-        affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
-        docids_to_delete: &'b RoaringBitmap,
-    ) -> Self {
-        let database = match facet_type {
-            FacetType::String => index
-                .facet_id_string_docids
-                .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
-            FacetType::Number => {
-                index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
-            }
-        };
-        Self {
-            index,
-            database,
-            facet_type,
-            affected_facet_values,
-            docids_to_delete,
-            group_size: FACET_GROUP_SIZE,
-            max_group_size: FACET_MAX_GROUP_SIZE,
-            min_level_size: FACET_MIN_LEVEL_SIZE,
-        }
-    }
-
-    pub fn execute(self, wtxn: &mut RwTxn) -> Result<()> {
-        debug!("Computing and writing the facet values levels docids into LMDB on disk...");
-        self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
-
-        for (field_id, affected_facet_values) in self.affected_facet_values {
-            // This is an incorrect condition, since we assume that the length of the database is equal
-            // to the number of facet values for the given field_id. It means that in some cases, we might
-            // wrongly choose the incremental indexer over the bulk indexer. But the only case where that could
-            // really be a performance problem is when we fully delete a large ratio of all facet values for
-            // each field id. This would almost never happen. Still, to be overly cautious, I have added a
-            // 2x penalty to the incremental indexer. That is, instead of assuming a 70x worst-case performance
-            // penalty to the incremental indexer, we assume a 150x worst-case performance penalty instead.
-            if affected_facet_values.len() >= (self.database.len(wtxn)? / 150) {
-                // Bulk delete
-                let mut modified = false;
-
-                for facet_value in affected_facet_values {
-                    let key =
-                        FacetGroupKey { field_id, level: 0, left_bound: facet_value.as_slice() };
-                    let mut old = self.database.get(wtxn, &key)?.unwrap();
-                    let previous_len = old.bitmap.len();
-                    old.bitmap -= self.docids_to_delete;
-                    if old.bitmap.is_empty() {
-                        modified = true;
-                        self.database.delete(wtxn, &key)?;
-                    } else if old.bitmap.len() != previous_len {
-                        modified = true;
-                        self.database.put(wtxn, &key, &old)?;
-                    }
-                }
-                if modified {
-                    let builder = FacetsUpdateBulk::new_not_updating_level_0(
-                        self.index,
-                        vec![field_id],
-                        self.facet_type,
-                    );
-                    builder.execute(wtxn)?;
-                }
-            } else {
-                // Incremental
-                let inc = FacetsUpdateIncrementalInner {
-                    db: self.database,
-                    group_size: self.group_size,
-                    min_level_size: self.min_level_size,
-                    max_group_size: self.max_group_size,
-                };
-                for facet_value in affected_facet_values {
-                    inc.delete(wtxn, field_id, facet_value.as_slice(), self.docids_to_delete)?;
-                }
-            }
-        }
-        Ok(())
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use std::iter::FromIterator;
-
-    use big_s::S;
-    use maplit::hashset;
-    use rand::seq::SliceRandom;
-    use rand::SeedableRng;
-    use roaring::RoaringBitmap;
-
-    use crate::db_snap;
-    use crate::documents::documents_batch_reader_from_objects;
-    use crate::index::tests::TempIndex;
-    use crate::update::facet::test_helpers::ordered_string;
-    use crate::update::{DeleteDocuments, DeletionStrategy};
-
-    #[test]
-    fn delete_mixed_incremental_and_bulk() {
-        // The point of this test is to create an index populated with documents
-        // containing different filterable attributes. Then, we delete a bunch of documents
-        // such that a mix of the incremental and bulk indexer is used (depending on the field id)
-        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index
-            .update_settings(|settings| {
-                settings.set_filterable_fields(
-                    hashset! { S("id"), S("label"), S("timestamp"), S("colour") },
-                );
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "label": i / 10,
-                        "colour": i / 100,
-                        "timestamp": i / 2,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, 1, @"550cd138d6fe31ccdd42cd5392fbd576");
-        db_snap!(index, number_faceted_documents_ids, 1, @"9a0ea88e7c9dcf6dc0ef0b601736ffcf");
-
-        let mut wtxn = index.env.write_txn().unwrap();
-
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.strategy(DeletionStrategy::AlwaysHard);
-        builder.delete_documents(&RoaringBitmap::from_iter(0..100));
-        // by deleting the first 100 documents, we expect that:
-        // - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
-        // - the "label" part will be updated incrementally, since #affected_facet_value = 10 which is < 13
-        // - the "colour" part will also be updated incrementally, since #affected_values = 1 which is < 13
-        // - the "timestamp" part will be updated in bulk, since #affected_values = 50 which is > 13
-        // This has to be verified manually by inserting breakpoint/adding print statements to the code when running the test
-        builder.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-        db_snap!(index, facet_id_f64_docids, 2, @"d4d5f14e7f1e1f09b86821a0b6defcc6");
-        db_snap!(index, number_faceted_documents_ids, 2, @"3570e0ac0fdb21be9ebe433f59264b56");
-    }
-
-    // Same test as above but working with string values for the facets
-    #[test]
-    fn delete_mixed_incremental_and_bulk_string() {
-        // The point of this test is to create an index populated with documents
-        // containing different filterable attributes. Then, we delete a bunch of documents
-        // such that a mix of the incremental and bulk indexer is used (depending on the field id)
-        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index
-            .update_settings(|settings| {
-                settings.set_filterable_fields(
-                    hashset! { S("id"), S("label"), S("timestamp"), S("colour") },
-                );
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "label": ordered_string(i / 10),
-                        "colour": ordered_string(i / 100),
-                        "timestamp": ordered_string(i / 2),
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        // Note that empty strings are not stored in the facet db due to commit 4860fd452965 (comment written on 29 Nov 2022)
-        db_snap!(index, facet_id_string_docids, 1, @"5fd1bd0724c65a6dc1aafb6db93c7503");
-        db_snap!(index, string_faceted_documents_ids, 1, @"54bc15494fa81d93339f43c08fd9d8f5");
-
-        let mut wtxn = index.env.write_txn().unwrap();
-
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.strategy(DeletionStrategy::AlwaysHard);
-        builder.delete_documents(&RoaringBitmap::from_iter(0..100));
-        // by deleting the first 100 documents, we expect that:
-        // - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
-        // - the "label" part will be updated incrementally, since #affected_facet_value = 10 which is < 13
-        // - the "colour" part will also be updated incrementally, since #affected_values = 1 which is < 13
-        // - the "timestamp" part will be updated in bulk, since #affected_values = 50 which is > 13
-        // This has to be verified manually by inserting breakpoint/adding print statements to the code when running the test
-        builder.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-        db_snap!(index, facet_id_string_docids, 2, @"7f9c00b29e04d58c1821202a5dda0ebc");
-        db_snap!(index, string_faceted_documents_ids, 2, @"504152afa5c94fd4e515dcdfa4c7161f");
-    }
-
-    #[test]
-    fn delete_almost_all_incrementally_string() {
-        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index
-            .update_settings(|settings| {
-                settings.set_filterable_fields(
-                    hashset! { S("id"), S("label"), S("timestamp"), S("colour") },
-                );
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "label": ordered_string(i / 10),
-                        "colour": ordered_string(i / 100),
-                        "timestamp": ordered_string(i / 2),
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        // Note that empty strings are not stored in the facet db due to commit 4860fd452965 (comment written on 29 Nov 2022)
-        db_snap!(index, facet_id_string_docids, 1, @"5fd1bd0724c65a6dc1aafb6db93c7503");
-        db_snap!(index, string_faceted_documents_ids, 1, @"54bc15494fa81d93339f43c08fd9d8f5");
-
-        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
-
-        let mut docids_to_delete = (0..1000).collect::<Vec<_>>();
-        docids_to_delete.shuffle(&mut rng);
-        for docid in docids_to_delete.into_iter().take(990) {
-            let mut wtxn = index.env.write_txn().unwrap();
-            let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-            builder.strategy(DeletionStrategy::AlwaysHard);
-            builder.delete_documents(&RoaringBitmap::from_iter([docid]));
-            builder.execute().unwrap();
-            wtxn.commit().unwrap();
-        }
-
-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-        db_snap!(index, facet_id_string_docids, 2, @"ece56086e76d50e661fb2b58475b9f7d");
-        db_snap!(index, string_faceted_documents_ids, 2, @r###"
-        0   []
-        1   [11, 20, 73, 292, 324, 358, 381, 493, 839, 852, ]
-        2   [292, 324, 358, 381, 493, 839, 852, ]
-        3   [11, 20, 73, 292, 324, 358, 381, 493, 839, 852, ]
-        "###);
-    }
-}
-
-#[allow(unused)]
-#[cfg(test)]
-mod comparison_bench {
-    use std::iter::once;
-
-    use rand::Rng;
-    use roaring::RoaringBitmap;
-
-    use crate::heed_codec::facet::OrderedF64Codec;
-    use crate::update::facet::test_helpers::FacetIndex;
-
-    // This is a simple test to get an intuition on the relative speed
-    // of the incremental vs. bulk indexer.
-    //
-    // The benchmark shows the worst-case scenario for the incremental indexer, since
-    // each facet value contains only one document ID.
-    //
-    // In that scenario, it appears that the incremental indexer is about 70 times slower than the
-    // bulk indexer.
-    // #[test]
-    fn benchmark_facet_indexing_delete() {
-        let mut r = rand::thread_rng();
-
-        for i in 1..=20 {
-            let size = 50_000 * i;
-            let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
-
-            let mut txn = index.env.write_txn().unwrap();
-            let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
-            for i in 0..size {
-                // field id = 0, left_bound = i, docids = [i]
-                elements.push(((0, i as f64), once(i).collect()));
-            }
-            let timer = std::time::Instant::now();
-            index.bulk_insert(&mut txn, &[0], elements.iter());
-            let time_spent = timer.elapsed().as_millis();
-            println!("bulk {size} : {time_spent}ms");
-
-            txn.commit().unwrap();
-
-            for nbr_doc in [1, 100, 1000, 10_000] {
-                let mut txn = index.env.write_txn().unwrap();
-                let timer = std::time::Instant::now();
-                //
-                // delete one document
-                //
-                for _ in 0..nbr_doc {
-                    let deleted_u32 = r.gen::<u32>() % size;
-                    let deleted_f64 = deleted_u32 as f64;
-                    index.delete_single_docid(&mut txn, 0, &deleted_f64, deleted_u32)
-                }
-                let time_spent = timer.elapsed().as_millis();
-                println!("    delete {nbr_doc} : {time_spent}ms");
-                txn.abort().unwrap();
-            }
-        }
-    }
-}
--- a/milli/src/update/facet/incremental.rs
+++ b/milli/src/update/facet/incremental.rs
@ -1,9 +1,9 @@
-use std::collections::HashMap;
 use std::fs::File;
 use std::io::BufReader;

 use heed::types::{ByteSlice, DecodeIgnore};
 use heed::{BytesDecode, Error, RoTxn, RwTxn};
+use obkv::KvReader;
 use roaring::RoaringBitmap;

 use crate::facet::FacetType;
@ -12,8 +12,9 @@ use crate::heed_codec::facet::{
 };
 use crate::heed_codec::ByteSliceRefCodec;
 use crate::search::facet::get_highest_level;
+use crate::update::del_add::DelAdd;
 use crate::update::index_documents::valid_lmdb_key;
-use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
+use crate::{CboRoaringBitmapCodec, Index, Result};

 enum InsertionResult {
    InPlace,
@ -28,27 +29,21 @@ enum DeletionResult {

 /// Algorithm to incrementally insert and delete elememts into the
 /// `facet_id_(string/f64)_docids` databases.
-///
-/// Rhe `faceted_documents_ids` value in the main database of `Index`
-/// is also updated to contain the new set of faceted documents.
-pub struct FacetsUpdateIncremental<'i> {
-    index: &'i Index,
+pub struct FacetsUpdateIncremental {
    inner: FacetsUpdateIncrementalInner,
-    facet_type: FacetType,
-    new_data: grenad::Reader<BufReader<File>>,
+    delta_data: grenad::Reader<BufReader<File>>,
 }

-impl<'i> FacetsUpdateIncremental<'i> {
+impl FacetsUpdateIncremental {
    pub fn new(
-        index: &'i Index,
+        index: &Index,
        facet_type: FacetType,
-        new_data: grenad::Reader<BufReader<File>>,
+        delta_data: grenad::Reader<BufReader<File>>,
        group_size: u8,
        min_level_size: u8,
        max_group_size: u8,
    ) -> Self {
        FacetsUpdateIncremental {
-            index,
            inner: FacetsUpdateIncrementalInner {
                db: match facet_type {
                    FacetType::String => index
@ -62,31 +57,41 @@ impl<'i> FacetsUpdateIncremental<'i> {
                max_group_size,
                min_level_size,
            },
-            facet_type,
-            new_data,
+            delta_data,
        }
    }

-    pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> {
-        let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
-
-        let mut cursor = self.new_data.into_cursor()?;
+    pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
+        let mut cursor = self.delta_data.into_cursor()?;
        while let Some((key, value)) = cursor.move_on_next()? {
            if !valid_lmdb_key(key) {
                continue;
            }
            let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
                .ok_or(heed::Error::Encoding)?;
-            let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
-            self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
-            *new_faceted_docids.entry(key.field_id).or_default() |= docids;
+            let value = KvReader::new(value);
+
+            let docids_to_delete = value
+                .get(DelAdd::Deletion)
+                .map(CboRoaringBitmapCodec::bytes_decode)
+                .map(|o| o.ok_or(heed::Error::Encoding));
+
+            let docids_to_add = value
+                .get(DelAdd::Addition)
+                .map(CboRoaringBitmapCodec::bytes_decode)
+                .map(|o| o.ok_or(heed::Error::Encoding));
+
+            if let Some(docids_to_delete) = docids_to_delete {
+                let docids_to_delete = docids_to_delete?;
+                self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?;
+            }
+
+            if let Some(docids_to_add) = docids_to_add {
+                let docids_to_add = docids_to_add?;
+                self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?;
+            }
        }

-        for (field_id, new_docids) in new_faceted_docids {
-            let mut docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?;
-            docids |= new_docids;
-            self.index.put_faceted_documents_ids(wtxn, field_id, self.facet_type, &docids)?;
-        }
        Ok(())
    }
 }
--- a/milli/src/update/facet/mod.rs
+++ b/milli/src/update/facet/mod.rs
@ -14,7 +14,7 @@ The databases must be able to return results for queries such as:
 The algorithms that implement these queries are found in the `src/search/facet` folder.

 To make these queries fast to compute, the database adopts a tree structure:
-```ignore
+```text
            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
 ┌───────┐   │           "ab" (2)            │           "gaf" (2)           │   "woz" (1)   │
 │Level 2│   │                               │                               │               │
@ -41,7 +41,7 @@ These documents all contain a facet value that is contained within `ab .. gaf`.
 In the database, each node is represented by a key/value pair encoded as a [`FacetGroupKey`] and a
 [`FacetGroupValue`], which have the following format:

-```ignore
+```text
 FacetGroupKey:
 - field id  : u16
 - level     : u8
@ -98,7 +98,6 @@ use crate::update::merge_btreeset_string;
 use crate::{BEU16StrCodec, Index, Result, BEU16, MAX_FACET_VALUE_LENGTH};

 pub mod bulk;
-pub mod delete;
 pub mod incremental;

 /// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases.
@ -109,7 +108,7 @@ pub struct FacetsUpdate<'i> {
    index: &'i Index,
    database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
    facet_type: FacetType,
-    new_data: grenad::Reader<BufReader<File>>,
+    delta_data: grenad::Reader<BufReader<File>>,
    group_size: u8,
    max_group_size: u8,
    min_level_size: u8,
@ -118,7 +117,7 @@ impl<'i> FacetsUpdate<'i> {
    pub fn new(
        index: &'i Index,
        facet_type: FacetType,
-        new_data: grenad::Reader<BufReader<File>>,
+        delta_data: grenad::Reader<BufReader<File>>,
    ) -> Self {
        let database = match facet_type {
            FacetType::String => index
@ -135,26 +134,26 @@ impl<'i> FacetsUpdate<'i> {
            max_group_size: FACET_MAX_GROUP_SIZE,
            min_level_size: FACET_MIN_LEVEL_SIZE,
            facet_type,
-            new_data,
+            delta_data,
        }
    }

    pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
-        if self.new_data.is_empty() {
+        if self.delta_data.is_empty() {
            return Ok(());
        }
        debug!("Computing and writing the facet values levels docids into LMDB on disk...");
        self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;

        // See self::comparison_bench::benchmark_facet_indexing
-        if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
+        if self.delta_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
            let field_ids =
                self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
            let bulk_update = FacetsUpdateBulk::new(
                self.index,
                field_ids,
                self.facet_type,
-                self.new_data,
+                self.delta_data,
                self.group_size,
                self.min_level_size,
            );
@ -163,7 +162,7 @@ impl<'i> FacetsUpdate<'i> {
            let incremental_update = FacetsUpdateIncremental::new(
                self.index,
                self.facet_type,
-                self.new_data,
+                self.delta_data,
                self.group_size,
                self.min_level_size,
                self.max_group_size,
@ -279,6 +278,7 @@ pub(crate) mod test_helpers {
    use crate::heed_codec::ByteSliceRefCodec;
    use crate::search::facet::get_highest_level;
    use crate::snapshot_tests::display_bitmap;
+    use crate::update::del_add::{DelAdd, KvWriterDelAdd};
    use crate::update::FacetsUpdateIncrementalInner;
    use crate::CboRoaringBitmapCodec;

@ -455,20 +455,22 @@ pub(crate) mod test_helpers {
                let key: FacetGroupKey<&[u8]> =
                    FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
                let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap();
+                let mut inner_writer = KvWriterDelAdd::memory();
                let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
-                writer.insert(&key, &value).unwrap();
+                inner_writer.insert(DelAdd::Addition, value).unwrap();
+                writer.insert(&key, inner_writer.into_inner().unwrap()).unwrap();
            }
            writer.finish().unwrap();
            let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap();

            let update = FacetsUpdateBulkInner {
                db: self.content,
-                new_data: Some(reader),
+                delta_data: Some(reader),
                group_size: self.group_size.get(),
                min_level_size: self.min_level_size.get(),
            };

-            update.update(wtxn, field_ids, |_, _, _| Ok(())).unwrap();
+            update.update(wtxn, field_ids).unwrap();
        }

        pub fn verify_structure_validity(&self, txn: &RoTxn, field_id: u16) {
@ -556,101 +558,6 @@ pub(crate) mod test_helpers {
    }
 }

-#[cfg(test)]
-mod tests {
-    use big_s::S;
-    use maplit::hashset;
-
-    use crate::db_snap;
-    use crate::documents::documents_batch_reader_from_objects;
-    use crate::index::tests::TempIndex;
-    use crate::update::DeletionStrategy;
-
-    #[test]
-    fn replace_all_identical_soft_deletion_then_hard_deletion() {
-        let mut index = TempIndex::new_with_map_size(4096 * 1000 * 100);
-
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
-
-        index
-            .update_settings(|settings| {
-                settings.set_primary_key("id".to_owned());
-                settings.set_filterable_fields(hashset! { S("size") });
-            })
-            .unwrap();
-
-        let mut documents = vec![];
-        for i in 0..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "size": i % 250,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, "initial", @"777e0e221d778764b472c512617eeb3b");
-        db_snap!(index, number_faceted_documents_ids, "initial", @"bd916ef32b05fd5c3c4c518708f431a9");
-        db_snap!(index, soft_deleted_documents_ids, "initial", @"[]");
-
-        let mut documents = vec![];
-        for i in 0..999 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "size": i % 250,
-                        "other": 0,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, "replaced_1_soft", @"abba175d7bed727d0efadaef85a4388f");
-        db_snap!(index, number_faceted_documents_ids, "replaced_1_soft", @"de76488bd05ad94c6452d725acf1bd06");
-        db_snap!(index, soft_deleted_documents_ids, "replaced_1_soft", @"6c975deb900f286d2f6456d2d5c3a123");
-
-        // Then replace the last document while disabling soft_deletion
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
-        let mut documents = vec![];
-        for i in 999..1000 {
-            documents.push(
-                serde_json::json! {
-                    {
-                        "id": i,
-                        "size": i % 250,
-                        "other": 0,
-                    }
-                }
-                .as_object()
-                .unwrap()
-                .clone(),
-            );
-        }
-
-        let documents = documents_batch_reader_from_objects(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, facet_id_f64_docids, "replaced_2_hard", @"029e27a46d09c574ae949aa4289b45e6");
-        db_snap!(index, number_faceted_documents_ids, "replaced_2_hard", @"60b19824f136affe6b240a7200779028");
-        db_snap!(index, soft_deleted_documents_ids, "replaced_2_hard", @"[]");
-    }
-}
-
 #[allow(unused)]
 #[cfg(test)]
 mod comparison_bench {
--- a/milli/src/update/index_documents/enrich.rs
+++ b/milli/src/update/index_documents/enrich.rs
@ -1,20 +1,17 @@
+use std::fmt;
 use std::io::{BufWriter, Read, Seek};
 use std::result::Result as StdResult;
-use std::{fmt, iter};

 use serde::{Deserialize, Serialize};
 use serde_json::Value;

-use crate::documents::{DocumentsBatchIndex, DocumentsBatchReader, EnrichedDocumentsBatchReader};
+use crate::documents::{
+    DocumentIdExtractionError, DocumentsBatchIndex, DocumentsBatchReader,
+    EnrichedDocumentsBatchReader, PrimaryKey, DEFAULT_PRIMARY_KEY,
+};
 use crate::error::{GeoError, InternalError, UserError};
 use crate::update::index_documents::{obkv_to_object, writer_into_reader};
-use crate::{FieldId, Index, Object, Result};
-
-/// The symbol used to define levels in a nested primary key.
-const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
-
-/// The default primary that is used when not specified.
-const DEFAULT_PRIMARY_KEY: &str = "id";
+use crate::{FieldId, Index, Result};

 /// This function validates and enrich the documents by checking that:
 ///  - we can infer a primary key,
@ -41,14 +38,12 @@ pub fn enrich_documents_batch<R: Read + Seek>(
    // The primary key *field id* that has already been set for this index or the one
    // we will guess by searching for the first key that contains "id" as a substring.
    let primary_key = match index.primary_key(rtxn)? {
-        Some(primary_key) if primary_key.contains(PRIMARY_KEY_SPLIT_SYMBOL) => {
-            PrimaryKey::nested(primary_key)
-        }
-        Some(primary_key) => match documents_batch_index.id(primary_key) {
-            Some(id) => PrimaryKey::flat(primary_key, id),
-            None if autogenerate_docids => {
-                PrimaryKey::flat(primary_key, documents_batch_index.insert(primary_key))
-            }
+        Some(primary_key) => match PrimaryKey::new(primary_key, &documents_batch_index) {
+            Some(primary_key) => primary_key,
+            None if autogenerate_docids => PrimaryKey::Flat {
+                name: primary_key,
+                field_id: documents_batch_index.insert(primary_key),
+            },
            None => {
                return match cursor.next_document()? {
                    Some(first_document) => Ok(Err(UserError::MissingDocumentId {
@ -76,14 +71,14 @@ pub fn enrich_documents_batch<R: Read + Seek>(
            });

            match guesses.as_slice() {
-                [] if autogenerate_docids => PrimaryKey::flat(
-                    DEFAULT_PRIMARY_KEY,
-                    documents_batch_index.insert(DEFAULT_PRIMARY_KEY),
-                ),
+                [] if autogenerate_docids => PrimaryKey::Flat {
+                    name: DEFAULT_PRIMARY_KEY,
+                    field_id: documents_batch_index.insert(DEFAULT_PRIMARY_KEY),
+                },
                [] => return Ok(Err(UserError::NoPrimaryKeyCandidateFound)),
                [(field_id, name)] => {
                    log::info!("Primary key was not specified in index. Inferred to '{name}'");
-                    PrimaryKey::flat(name, *field_id)
+                    PrimaryKey::Flat { name, field_id: *field_id }
                }
                multiple => {
                    return Ok(Err(UserError::MultiplePrimaryKeyCandidatesFound {
@ -156,92 +151,24 @@ fn fetch_or_generate_document_id(
    uuid_buffer: &mut [u8; uuid::fmt::Hyphenated::LENGTH],
    count: u32,
 ) -> Result<StdResult<DocumentId, UserError>> {
-    match primary_key {
-        PrimaryKey::Flat { name: primary_key, field_id: primary_key_id } => {
-            match document.get(primary_key_id) {
-                Some(document_id_bytes) => {
-                    let document_id = serde_json::from_slice(document_id_bytes)
-                        .map_err(InternalError::SerdeJson)?;
-                    match validate_document_id_value(document_id)? {
-                        Ok(document_id) => Ok(Ok(DocumentId::retrieved(document_id))),
-                        Err(user_error) => Ok(Err(user_error)),
-                    }
-                }
-                None if autogenerate_docids => {
-                    let uuid = uuid::Uuid::new_v4().as_hyphenated().encode_lower(uuid_buffer);
-                    Ok(Ok(DocumentId::generated(uuid.to_string(), count)))
-                }
-                None => Ok(Err(UserError::MissingDocumentId {
-                    primary_key: primary_key.to_string(),
-                    document: obkv_to_object(document, documents_batch_index)?,
-                })),
-            }
+    Ok(match primary_key.document_id(document, documents_batch_index)? {
+        Ok(document_id) => Ok(DocumentId::Retrieved { value: document_id }),
+        Err(DocumentIdExtractionError::InvalidDocumentId(user_error)) => Err(user_error),
+        Err(DocumentIdExtractionError::MissingDocumentId) if autogenerate_docids => {
+            let uuid = uuid::Uuid::new_v4().as_hyphenated().encode_lower(uuid_buffer);
+            Ok(DocumentId::Generated { value: uuid.to_string(), document_nth: count })
        }
-        nested @ PrimaryKey::Nested { .. } => {
-            let mut matching_documents_ids = Vec::new();
-            for (first_level_name, right) in nested.possible_level_names() {
-                if let Some(field_id) = documents_batch_index.id(first_level_name) {
-                    if let Some(value_bytes) = document.get(field_id) {
-                        let object = serde_json::from_slice(value_bytes)
-                            .map_err(InternalError::SerdeJson)?;
-                        fetch_matching_values(object, right, &mut matching_documents_ids);
-
-                        if matching_documents_ids.len() >= 2 {
-                            return Ok(Err(UserError::TooManyDocumentIds {
-                                primary_key: nested.name().to_string(),
-                                document: obkv_to_object(document, documents_batch_index)?,
-                            }));
-                        }
-                    }
-                }
-            }
-
-            match matching_documents_ids.pop() {
-                Some(document_id) => match validate_document_id_value(document_id)? {
-                    Ok(document_id) => Ok(Ok(DocumentId::retrieved(document_id))),
-                    Err(user_error) => Ok(Err(user_error)),
-                },
-                None => Ok(Err(UserError::MissingDocumentId {
-                    primary_key: nested.name().to_string(),
-                    document: obkv_to_object(document, documents_batch_index)?,
-                })),
-            }
+        Err(DocumentIdExtractionError::MissingDocumentId) => Err(UserError::MissingDocumentId {
+            primary_key: primary_key.name().to_string(),
+            document: obkv_to_object(document, documents_batch_index)?,
+        }),
+        Err(DocumentIdExtractionError::TooManyDocumentIds(_)) => {
+            Err(UserError::TooManyDocumentIds {
+                primary_key: primary_key.name().to_string(),
+                document: obkv_to_object(document, documents_batch_index)?,
+            })
        }
-    }
-}
-
-/// A type that represent the type of primary key that has been set
-/// for this index, a classic flat one or a nested one.
-#[derive(Debug, Clone, Copy)]
-enum PrimaryKey<'a> {
-    Flat { name: &'a str, field_id: FieldId },
-    Nested { name: &'a str },
-}
-
-impl PrimaryKey<'_> {
-    fn flat(name: &str, field_id: FieldId) -> PrimaryKey {
-        PrimaryKey::Flat { name, field_id }
-    }
-
-    fn nested(name: &str) -> PrimaryKey {
-        PrimaryKey::Nested { name }
-    }
-
-    fn name(&self) -> &str {
-        match self {
-            PrimaryKey::Flat { name, .. } => name,
-            PrimaryKey::Nested { name } => name,
-        }
-    }
-
-    /// Returns an `Iterator` that gives all the possible fields names the primary key
-    /// can have depending of the first level name and deepnes of the objects.
-    fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
-        let name = self.name();
-        name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
-            .map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
-            .chain(iter::once((name, "")))
-    }
+    })
 }

 /// A type that represents a document id that has been retrieved from a document or auto-generated.
@ -255,14 +182,6 @@ pub enum DocumentId {
 }

 impl DocumentId {
-    fn retrieved(value: String) -> DocumentId {
-        DocumentId::Retrieved { value }
-    }
-
-    fn generated(value: String, document_nth: u32) -> DocumentId {
-        DocumentId::Generated { value, document_nth }
-    }
-
    fn debug(&self) -> String {
        format!("{:?}", self)
    }
@ -290,66 +209,6 @@ impl fmt::Debug for DocumentId {
    }
 }

-fn starts_with(selector: &str, key: &str) -> bool {
-    selector.strip_prefix(key).map_or(false, |tail| {
-        tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
-    })
-}
-
-pub fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
-    match value {
-        Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
-        otherwise => output.push(otherwise),
-    }
-}
-
-pub fn fetch_matching_values_in_object(
-    object: Object,
-    selector: &str,
-    base_key: &str,
-    output: &mut Vec<Value>,
-) {
-    for (key, value) in object {
-        let base_key = if base_key.is_empty() {
-            key.to_string()
-        } else {
-            format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
-        };
-
-        if starts_with(selector, &base_key) {
-            match value {
-                Value::Object(object) => {
-                    fetch_matching_values_in_object(object, selector, &base_key, output)
-                }
-                value => output.push(value),
-            }
-        }
-    }
-}
-
-pub fn validate_document_id(document_id: &str) -> Option<&str> {
-    if !document_id.is_empty()
-        && document_id.chars().all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_'))
-    {
-        Some(document_id)
-    } else {
-        None
-    }
-}
-
-/// Parses a Json encoded document id and validate it, returning a user error when it is one.
-pub fn validate_document_id_value(document_id: Value) -> Result<StdResult<String, UserError>> {
-    match document_id {
-        Value::String(string) => match validate_document_id(&string) {
-            Some(s) if s.len() == string.len() => Ok(Ok(string)),
-            Some(s) => Ok(Ok(s.to_string())),
-            None => Ok(Err(UserError::InvalidDocumentId { document_id: Value::String(string) })),
-        },
-        Value::Number(number) if number.is_i64() => Ok(Ok(number.to_string())),
-        content => Ok(Err(UserError::InvalidDocumentId { document_id: content })),
-    }
-}
-
 /// Try to extract an `f64` from a JSON `Value` and return the `Value`
 /// in the `Err` variant if it failed.
 pub fn extract_finite_float_from_value(value: Value) -> StdResult<f64, Value> {
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -5,18 +5,16 @@ use std::io::BufReader;
 use std::{io, mem, str};

 use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
-use obkv::KvReader;
+use obkv::{KvReader, KvWriterU16};
 use roaring::RoaringBitmap;
 use serde_json::Value;

-use super::helpers::{concat_u32s_array, create_sorter, sorter_into_reader, GrenadParameters};
+use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
 use crate::error::{InternalError, SerializationError};
-use crate::update::index_documents::MergeFn;
-use crate::{
-    absolute_from_relative_position, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH,
-};
+use crate::update::del_add::{del_add_from_two_obkvs, DelAdd, KvReaderDelAdd};
+use crate::{FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};

-pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), RoaringBitmap>;
+pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>;

 /// Extracts the word and positions where this word appear and
 /// prefixes it by the document id.
@ -32,25 +30,162 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
-) -> Result<(RoaringBitmap, grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
+) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
    puffin::profile_function!();

    let max_positions_per_attributes = max_positions_per_attributes
        .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
    let max_memory = indexer.max_memory_by_thread();

+    // initialize destination values.
    let mut documents_ids = RoaringBitmap::new();
    let mut script_language_docids = HashMap::new();
    let mut docid_word_positions_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        concat_u32s_array,
+        keep_latest_obkv,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    let mut buffers = Buffers::default();
+    // initialize buffers.
+    let mut del_buffers = Buffers::default();
+    let mut add_buffers = Buffers::default();
+    let mut key_buffer = Vec::new();
+    let mut value_buffer = Vec::new();
+
+    // initialize tokenizer.
+    let mut builder = tokenizer_builder(stop_words, allowed_separators, dictionary, None);
+    let tokenizer = builder.build();
+
+    // iterate over documents.
+    let mut cursor = obkv_documents.into_cursor()?;
+    while let Some((key, value)) = cursor.move_on_next()? {
+        let document_id = key
+            .try_into()
+            .map(u32::from_be_bytes)
+            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+        let obkv = KvReader::<FieldId>::new(value);
+
+        // if the searchable fields didn't change, skip the searchable indexing for this document.
+        if !searchable_fields_changed(&KvReader::<FieldId>::new(value), searchable_fields) {
+            continue;
+        }
+
+        documents_ids.push(document_id);
+
+        // Update key buffer prefix.
+        key_buffer.clear();
+        key_buffer.extend_from_slice(&document_id.to_be_bytes());
+
+        // Tokenize deletions and additions in 2 diffferent threads.
+        let (del, add): (Result<_>, Result<_>) = rayon::join(
+            || {
+                // deletions
+                lang_safe_tokens_from_document(
+                    &obkv,
+                    searchable_fields,
+                    &tokenizer,
+                    stop_words,
+                    allowed_separators,
+                    dictionary,
+                    max_positions_per_attributes,
+                    DelAdd::Deletion,
+                    &mut del_buffers,
+                )
+            },
+            || {
+                // additions
+                lang_safe_tokens_from_document(
+                    &obkv,
+                    searchable_fields,
+                    &tokenizer,
+                    stop_words,
+                    allowed_separators,
+                    dictionary,
+                    max_positions_per_attributes,
+                    DelAdd::Addition,
+                    &mut add_buffers,
+                )
+            },
+        );
+
+        let (del_obkv, del_script_language_word_count) = del?;
+        let (add_obkv, add_script_language_word_count) = add?;
+
+        // merge deletions and additions.
+        // transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>>
+        value_buffer.clear();
+        del_add_from_two_obkvs(
+            KvReader::<FieldId>::new(del_obkv),
+            KvReader::<FieldId>::new(add_obkv),
+            &mut value_buffer,
+        )?;
+
+        // write each KV<DelAdd, KV<u16, String>> into the sorter, field by field.
+        let obkv = KvReader::<FieldId>::new(&value_buffer);
+        for (field_id, value) in obkv.iter() {
+            key_buffer.truncate(mem::size_of::<u32>());
+            key_buffer.extend_from_slice(&field_id.to_be_bytes());
+            docid_word_positions_sorter.insert(&key_buffer, value)?;
+        }
+
+        // update script_language_docids deletions.
+        for (script, languages_frequency) in del_script_language_word_count {
+            for (language, _) in languages_frequency {
+                let entry = script_language_docids
+                    .entry((script, language))
+                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
+                entry.0.push(document_id);
+            }
+        }
+
+        // update script_language_docids additions.
+        for (script, languages_frequency) in add_script_language_word_count {
+            for (language, _) in languages_frequency {
+                let entry = script_language_docids
+                    .entry((script, language))
+                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
+                entry.1.push(document_id);
+            }
+        }
+    }
+
+    // the returned sorter is serialized as: key: (DocId, FieldId), value: KV<DelAdd, KV<u16, String>>.
+    sorter_into_reader(docid_word_positions_sorter, indexer)
+        .map(|reader| (reader, script_language_docids))
+}
+
+/// Check if any searchable fields of a document changed.
+fn searchable_fields_changed(
+    obkv: &KvReader<FieldId>,
+    searchable_fields: &Option<HashSet<FieldId>>,
+) -> bool {
+    for (field_id, field_bytes) in obkv.iter() {
+        if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
+            let del_add = KvReaderDelAdd::new(field_bytes);
+            match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
+                // if both fields are None, check the next field.
+                (None, None) => (),
+                // if both contains a value and values are the same, check the next field.
+                (Some(del), Some(add)) if del == add => (),
+                // otherwise the fields are different, return true.
+                _otherwise => return true,
+            }
+        }
+    }
+
+    false
+}
+
+/// Factorize tokenizer building.
+fn tokenizer_builder<'a>(
+    stop_words: Option<&'a fst::Set<&[u8]>>,
+    allowed_separators: Option<&'a [&str]>,
+    dictionary: Option<&'a [&str]>,
+    script_language: Option<&'a HashMap<Script, Vec<Language>>>,
+) -> TokenizerBuilder<'a, &'a [u8]> {
    let mut tokenizer_builder = TokenizerBuilder::new();
    if let Some(stop_words) = stop_words {
        tokenizer_builder.stop_words(stop_words);
@ -61,130 +196,147 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    if let Some(separators) = allowed_separators {
        tokenizer_builder.separators(separators);
    }
-    let tokenizer = tokenizer_builder.build();

-    let mut cursor = obkv_documents.into_cursor()?;
-    while let Some((key, value)) = cursor.move_on_next()? {
-        let document_id = key
-            .try_into()
-            .map(u32::from_be_bytes)
-            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-        let obkv = KvReader::<FieldId>::new(value);
+    if let Some(script_language) = script_language {
+        tokenizer_builder.allow_list(script_language);
+    }

-        documents_ids.push(document_id);
-        buffers.key_buffer.clear();
-        buffers.key_buffer.extend_from_slice(&document_id.to_be_bytes());
+    tokenizer_builder
+}

-        let mut script_language_word_count = HashMap::new();
+/// Extract words mapped with their positions of a document,
+/// ensuring no Language detection mistakes was made.
+#[allow(clippy::too_many_arguments)] // FIXME: consider grouping arguments in a struct
+fn lang_safe_tokens_from_document<'a>(
+    obkv: &KvReader<FieldId>,
+    searchable_fields: &Option<HashSet<FieldId>>,
+    tokenizer: &Tokenizer,
+    stop_words: Option<&fst::Set<&[u8]>>,
+    allowed_separators: Option<&[&str]>,
+    dictionary: Option<&[&str]>,
+    max_positions_per_attributes: u32,
+    del_add: DelAdd,
+    buffers: &'a mut Buffers,
+) -> Result<(&'a [u8], HashMap<Script, Vec<(Language, usize)>>)> {
+    let mut script_language_word_count = HashMap::new();

-        extract_tokens_from_document(
-            &obkv,
-            searchable_fields,
-            &tokenizer,
-            max_positions_per_attributes,
-            &mut buffers,
-            &mut script_language_word_count,
-            &mut docid_word_positions_sorter,
-        )?;
+    tokens_from_document(
+        obkv,
+        searchable_fields,
+        tokenizer,
+        max_positions_per_attributes,
+        del_add,
+        buffers,
+        &mut script_language_word_count,
+    )?;

-        // if we detect a potetial mistake in the language detection,
-        // we rerun the extraction forcing the tokenizer to detect the most frequently detected Languages.
-        // context: https://github.com/meilisearch/meilisearch/issues/3565
-        if script_language_word_count
-            .values()
-            .map(Vec::as_slice)
-            .any(potential_language_detection_error)
-        {
-            // build an allow list with the most frequent detected languages in the document.
-            let script_language: HashMap<_, _> =
-                script_language_word_count.iter().filter_map(most_frequent_languages).collect();
+    // if we detect a potetial mistake in the language detection,
+    // we rerun the extraction forcing the tokenizer to detect the most frequently detected Languages.
+    // context: https://github.com/meilisearch/meilisearch/issues/3565
+    if script_language_word_count
+        .values()
+        .map(Vec::as_slice)
+        .any(potential_language_detection_error)
+    {
+        // build an allow list with the most frequent detected languages in the document.
+        let script_language: HashMap<_, _> =
+            script_language_word_count.iter().filter_map(most_frequent_languages).collect();

-            // if the allow list is empty, meaning that no Language is considered frequent,
-            // then we don't rerun the extraction.
-            if !script_language.is_empty() {
-                // build a new temporary tokenizer including the allow list.
-                let mut tokenizer_builder = TokenizerBuilder::new();
-                if let Some(stop_words) = stop_words {
-                    tokenizer_builder.stop_words(stop_words);
-                }
-                tokenizer_builder.allow_list(&script_language);
-                let tokenizer = tokenizer_builder.build();
+        // if the allow list is empty, meaning that no Language is considered frequent,
+        // then we don't rerun the extraction.
+        if !script_language.is_empty() {
+            // build a new temporary tokenizer including the allow list.
+            let mut builder = tokenizer_builder(
+                stop_words,
+                allowed_separators,
+                dictionary,
+                Some(&script_language),
+            );
+            let tokenizer = builder.build();

-                script_language_word_count.clear();
+            script_language_word_count.clear();

-                // rerun the extraction.
-                extract_tokens_from_document(
-                    &obkv,
-                    searchable_fields,
-                    &tokenizer,
-                    max_positions_per_attributes,
-                    &mut buffers,
-                    &mut script_language_word_count,
-                    &mut docid_word_positions_sorter,
-                )?;
-            }
-        }
-
-        for (script, languages_frequency) in script_language_word_count {
-            for (language, _) in languages_frequency {
-                let entry = script_language_docids
-                    .entry((script, language))
-                    .or_insert_with(RoaringBitmap::new);
-                entry.push(document_id);
-            }
+            // rerun the extraction.
+            tokens_from_document(
+                obkv,
+                searchable_fields,
+                &tokenizer,
+                max_positions_per_attributes,
+                del_add,
+                buffers,
+                &mut script_language_word_count,
+            )?;
        }
    }

-    sorter_into_reader(docid_word_positions_sorter, indexer)
-        .map(|reader| (documents_ids, reader, script_language_docids))
+    // returns a (KV<FieldId, KV<u16, String>>, HashMap<Script, Vec<(Language, usize)>>)
+    Ok((&buffers.obkv_buffer, script_language_word_count))
 }

-fn extract_tokens_from_document(
+/// Extract words mapped with their positions of a document.
+fn tokens_from_document<'a>(
    obkv: &KvReader<FieldId>,
    searchable_fields: &Option<HashSet<FieldId>>,
    tokenizer: &Tokenizer,
    max_positions_per_attributes: u32,
-    buffers: &mut Buffers,
+    del_add: DelAdd,
+    buffers: &'a mut Buffers,
    script_language_word_count: &mut HashMap<Script, Vec<(Language, usize)>>,
-    docid_word_positions_sorter: &mut grenad::Sorter<MergeFn>,
-) -> Result<()> {
+) -> Result<&'a [u8]> {
+    buffers.obkv_buffer.clear();
+    let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
    for (field_id, field_bytes) in obkv.iter() {
+        // if field is searchable.
        if searchable_fields.as_ref().map_or(true, |sf| sf.contains(&field_id)) {
-            let value = serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
-            buffers.field_buffer.clear();
-            if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
-                let tokens = process_tokens(tokenizer.tokenize(field))
-                    .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
+            // extract deletion or addition only.
+            if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
+                // parse json.
+                let value =
+                    serde_json::from_slice(field_bytes).map_err(InternalError::SerdeJson)?;

-                for (index, token) in tokens {
-                    // if a language has been detected for the token, we update the counter.
-                    if let Some(language) = token.language {
-                        let script = token.script;
-                        let entry =
-                            script_language_word_count.entry(script).or_insert_with(Vec::new);
-                        match entry.iter_mut().find(|(l, _)| *l == language) {
-                            Some((_, n)) => *n += 1,
-                            None => entry.push((language, 1)),
+                // prepare writing destination.
+                buffers.obkv_positions_buffer.clear();
+                let mut writer = KvWriterU16::new(&mut buffers.obkv_positions_buffer);
+
+                // convert json into a unique string.
+                buffers.field_buffer.clear();
+                if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
+                    // create an iterator of token with their positions.
+                    let tokens = process_tokens(tokenizer.tokenize(field))
+                        .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
+
+                    for (index, token) in tokens {
+                        // if a language has been detected for the token, we update the counter.
+                        if let Some(language) = token.language {
+                            let script = token.script;
+                            let entry =
+                                script_language_word_count.entry(script).or_insert_with(Vec::new);
+                            match entry.iter_mut().find(|(l, _)| *l == language) {
+                                Some((_, n)) => *n += 1,
+                                None => entry.push((language, 1)),
+                            }
+                        }
+
+                        // keep a word only if it is not empty and fit in a LMDB key.
+                        let token = token.lemma().trim();
+                        if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
+                            let position: u16 = index
+                                .try_into()
+                                .map_err(|_| SerializationError::InvalidNumberSerialization)?;
+                            writer.insert(position, token.as_bytes())?;
                        }
                    }
-                    let token = token.lemma().trim();
-                    if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
-                        buffers.key_buffer.truncate(mem::size_of::<u32>());
-                        buffers.key_buffer.extend_from_slice(token.as_bytes());

-                        let position: u16 = index
-                            .try_into()
-                            .map_err(|_| SerializationError::InvalidNumberSerialization)?;
-                        let position = absolute_from_relative_position(field_id, position);
-                        docid_word_positions_sorter
-                            .insert(&buffers.key_buffer, position.to_ne_bytes())?;
-                    }
+                    // write positions into document.
+                    let positions = writer.into_inner()?;
+                    document_writer.insert(field_id, positions)?;
                }
            }
        }
    }

-    Ok(())
+    // returns a KV<FieldId, KV<u16, String>>
+    Ok(document_writer.into_inner().map(|v| v.as_slice())?)
 }

 /// Transform a JSON value into a string that can be indexed.
@ -287,10 +439,10 @@ fn compute_language_frequency_threshold(languages_frequency: &[(Language, usize)

 #[derive(Default)]
 struct Buffers {
-    // the key buffer is the concatenation of the internal document id with the field id.
-    // The buffer has to be completelly cleared between documents,
-    // and the field id part must be cleared between each field.
-    key_buffer: Vec<u8>,
    // the field buffer for each fields desserialization, and must be cleared between each field.
    field_buffer: String,
+    // buffer used to store the value data containing an obkv.
+    obkv_buffer: Vec<u8>,
+    // buffer used to store the value data containing an obkv of tokens with their positions.
+    obkv_positions_buffer: Vec<u8>,
 }
--- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs
@ -4,11 +4,12 @@ use std::io::{self, BufReader};
 use heed::{BytesDecode, BytesEncode};

 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
 };
 use crate::heed_codec::facet::{
    FacetGroupKey, FacetGroupKeyCodec, FieldDocIdFacetF64Codec, OrderedF64Codec,
 };
+use crate::update::del_add::{KvReaderDelAdd, KvWriterDelAdd};
 use crate::Result;

 /// Extracts the facet number and the documents ids where this facet number appear.
@ -17,7 +18,7 @@ use crate::Result;
 /// documents ids from the given chunk of docid facet number positions.
 #[logging_timer::time]
 pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
-    docid_fid_facet_number: grenad::Reader<R>,
+    fid_docid_facet_number: grenad::Reader<R>,
    indexer: GrenadParameters,
 ) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();
@ -26,21 +27,30 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(

    let mut facet_number_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    let mut cursor = docid_fid_facet_number.into_cursor()?;
-    while let Some((key_bytes, _)) = cursor.move_on_next()? {
+    let mut buffer = Vec::new();
+    let mut cursor = fid_docid_facet_number.into_cursor()?;
+    while let Some((key_bytes, deladd_obkv_bytes)) = cursor.move_on_next()? {
        let (field_id, document_id, number) =
            FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();

        let key = FacetGroupKey { field_id, level: 0, left_bound: number };
        let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
-        facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
+
+        buffer.clear();
+        let mut obkv = KvWriterDelAdd::new(&mut buffer);
+        for (deladd_key, _) in KvReaderDelAdd::new(deladd_obkv_bytes).iter() {
+            obkv.insert(deladd_key, document_id.to_ne_bytes())?;
+        }
+        obkv.finish()?;
+
+        facet_number_docids_sorter.insert(key_bytes, &buffer)?;
    }

    sorter_into_reader(facet_number_docids_sorter, indexer)
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@ -1,13 +1,15 @@
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io::BufReader;
+use std::{io, str};

 use heed::BytesEncode;

 use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
 use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
 use crate::heed_codec::StrRefCodec;
-use crate::update::index_documents::merge_cbo_roaring_bitmaps;
-use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
+use crate::update::del_add::{KvReaderDelAdd, KvWriterDelAdd};
+use crate::update::index_documents::helpers::merge_deladd_cbo_roaring_bitmaps;
+use crate::{FieldId, Result};

 /// Extracts the facet string and the documents ids where this facet string appear.
 ///
@ -24,15 +26,16 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(

    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

+    let mut buffer = Vec::new();
    let mut cursor = docid_fid_facet_string.into_cursor()?;
-    while let Some((key, _original_value_bytes)) = cursor.move_on_next()? {
+    while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
        let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
        let field_id = FieldId::from_be_bytes(field_id_bytes);

@ -40,21 +43,17 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
            try_split_array_at::<_, 4>(bytes).unwrap();
        let document_id = u32::from_be_bytes(document_id_bytes);

-        let mut normalised_value = std::str::from_utf8(normalized_value_bytes)?;
-
-        let normalised_truncated_value: String;
-        if normalised_value.len() > MAX_FACET_VALUE_LENGTH {
-            normalised_truncated_value = normalised_value
-                .char_indices()
-                .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
-                .map(|(_, c)| c)
-                .collect();
-            normalised_value = normalised_truncated_value.as_str();
-        }
-        let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
+        let normalized_value = str::from_utf8(normalized_value_bytes)?;
+        let key = FacetGroupKey { field_id, level: 0, left_bound: normalized_value };
        let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
-        // document id is encoded in native-endian because of the CBO roaring bitmap codec
-        facet_string_docids_sorter.insert(&key_bytes, document_id.to_ne_bytes())?;
+
+        buffer.clear();
+        let mut obkv = KvWriterDelAdd::new(&mut buffer);
+        for (deladd_key, _) in KvReaderDelAdd::new(deladd_original_value_bytes).iter() {
+            obkv.insert(deladd_key, document_id.to_ne_bytes())?;
+        }
+        obkv.finish()?;
+        facet_string_docids_sorter.insert(&key_bytes, &buffer)?;
    }

    sorter_into_reader(facet_string_docids_sorter, indexer)
--- a/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -1,24 +1,36 @@
+use std::borrow::Cow;
 use std::collections::{BTreeMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
 use std::mem::size_of;
+use std::result::Result as StdResult;

+use grenad::Sorter;
 use heed::zerocopy::AsBytes;
 use heed::BytesEncode;
+use itertools::EitherOrBoth;
+use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;
 use serde_json::{from_slice, Value};
+use FilterableValues::{Empty, Null, Values};

 use super::helpers::{create_sorter, keep_first, sorter_into_reader, GrenadParameters};
 use crate::error::InternalError;
 use crate::facet::value_encoding::f64_into_bytes;
+use crate::update::del_add::{DelAdd, KvWriterDelAdd};
 use crate::update::index_documents::{create_writer, writer_into_reader};
-use crate::{CboRoaringBitmapCodec, DocumentId, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH};
+use crate::{
+    CboRoaringBitmapCodec, DocumentId, Error, FieldId, Result, BEU32, MAX_FACET_VALUE_LENGTH,
+};
+
+/// The length of the elements that are always in the buffer when inserting new values.
+const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();

 /// The extracted facet values stored in grenad files by type.
 pub struct ExtractedFacetValues {
-    pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
-    pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_docid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
+    pub fid_docid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
    pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
    pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
    pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
@ -58,71 +70,150 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        max_memory.map(|m| m / 2),
    );

-    let mut facet_exists_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
-    let mut facet_is_null_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
-    let mut facet_is_empty_docids = BTreeMap::<FieldId, RoaringBitmap>::new();
+    // The tuples represents the Del and Add side for a bitmap
+    let mut facet_exists_docids = BTreeMap::<FieldId, (RoaringBitmap, RoaringBitmap)>::new();
+    let mut facet_is_null_docids = BTreeMap::<FieldId, (RoaringBitmap, RoaringBitmap)>::new();
+    let mut facet_is_empty_docids = BTreeMap::<FieldId, (RoaringBitmap, RoaringBitmap)>::new();
+
+    // We create two buffers for mutable ref issues with closures.
+    let mut numbers_key_buffer = Vec::new();
+    let mut strings_key_buffer = Vec::new();

-    let mut key_buffer = Vec::new();
    let mut cursor = obkv_documents.into_cursor()?;
    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
        let obkv = obkv::KvReader::new(value);

        for (field_id, field_bytes) in obkv.iter() {
            if faceted_fields.contains(&field_id) {
-                key_buffer.clear();
+                numbers_key_buffer.clear();
+                strings_key_buffer.clear();

                // Set key to the field_id
                // Note: this encoding is consistent with FieldIdCodec
-                key_buffer.extend_from_slice(&field_id.to_be_bytes());
+                numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes());
+                strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());

-                // Here, we know already that the document must be added to the “field id exists” database
                let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
                let document = BEU32::from(document).get();

-                facet_exists_docids.entry(field_id).or_default().insert(document);
-
                // For the other extraction tasks, prefix the key with the field_id and the document_id
-                key_buffer.extend_from_slice(docid_bytes);
+                numbers_key_buffer.extend_from_slice(docid_bytes);
+                strings_key_buffer.extend_from_slice(docid_bytes);

-                let value = from_slice(field_bytes).map_err(InternalError::SerdeJson)?;
+                let del_add_obkv = obkv::KvReader::new(field_bytes);
+                let del_value = match del_add_obkv.get(DelAdd::Deletion) {
+                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
+                    None => None,
+                };
+                let add_value = match del_add_obkv.get(DelAdd::Addition) {
+                    Some(bytes) => Some(from_slice(bytes).map_err(InternalError::SerdeJson)?),
+                    None => None,
+                };

-                match extract_facet_values(
-                    &value,
-                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng),
-                ) {
-                    FilterableValues::Null => {
-                        facet_is_null_docids.entry(field_id).or_default().insert(document);
-                    }
-                    FilterableValues::Empty => {
-                        facet_is_empty_docids.entry(field_id).or_default().insert(document);
-                    }
-                    FilterableValues::Values { numbers, strings } => {
-                        // insert facet numbers in sorter
-                        for number in numbers {
-                            key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
-                            if let Some(value_bytes) = f64_into_bytes(number) {
-                                key_buffer.extend_from_slice(&value_bytes);
-                                key_buffer.extend_from_slice(&number.to_be_bytes());
+                // We insert the document id on the Del and the Add side if the field exists.
+                let (ref mut del_exists, ref mut add_exists) =
+                    facet_exists_docids.entry(field_id).or_default();
+                let (ref mut del_is_null, ref mut add_is_null) =
+                    facet_is_null_docids.entry(field_id).or_default();
+                let (ref mut del_is_empty, ref mut add_is_empty) =
+                    facet_is_empty_docids.entry(field_id).or_default();

-                                fid_docid_facet_numbers_sorter
-                                    .insert(&key_buffer, ().as_bytes())?;
-                            }
+                if del_value.is_some() {
+                    del_exists.insert(document);
+                }
+                if add_value.is_some() {
+                    add_exists.insert(document);
+                }
+
+                let geo_support =
+                    geo_fields_ids.map_or(false, |(lat, lng)| field_id == lat || field_id == lng);
+                let del_filterable_values =
+                    del_value.map(|value| extract_facet_values(&value, geo_support));
+                let add_filterable_values =
+                    add_value.map(|value| extract_facet_values(&value, geo_support));
+
+                // Those closures are just here to simplify things a bit.
+                let mut insert_numbers_diff = |del_numbers, add_numbers| {
+                    insert_numbers_diff(
+                        &mut fid_docid_facet_numbers_sorter,
+                        &mut numbers_key_buffer,
+                        del_numbers,
+                        add_numbers,
+                    )
+                };
+                let mut insert_strings_diff = |del_strings, add_strings| {
+                    insert_strings_diff(
+                        &mut fid_docid_facet_strings_sorter,
+                        &mut strings_key_buffer,
+                        del_strings,
+                        add_strings,
+                    )
+                };
+
+                match (del_filterable_values, add_filterable_values) {
+                    (None, None) => (),
+                    (Some(del_filterable_values), None) => match del_filterable_values {
+                        Null => {
+                            del_is_null.insert(document);
                        }
-
-                        // insert normalized and original facet string in sorter
-                        for (normalized, original) in
-                            strings.into_iter().filter(|(n, _)| !n.is_empty())
-                        {
-                            let normalized_truncated_value: String = normalized
-                                .char_indices()
-                                .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
-                                .map(|(_, c)| c)
-                                .collect();
-
-                            key_buffer.truncate(size_of::<FieldId>() + size_of::<DocumentId>());
-                            key_buffer.extend_from_slice(normalized_truncated_value.as_bytes());
-                            fid_docid_facet_strings_sorter
-                                .insert(&key_buffer, original.as_bytes())?;
+                        Empty => {
+                            del_is_empty.insert(document);
+                        }
+                        Values { numbers, strings } => {
+                            insert_numbers_diff(numbers, vec![])?;
+                            insert_strings_diff(strings, vec![])?;
+                        }
+                    },
+                    (None, Some(add_filterable_values)) => match add_filterable_values {
+                        Null => {
+                            add_is_null.insert(document);
+                        }
+                        Empty => {
+                            add_is_empty.insert(document);
+                        }
+                        Values { numbers, strings } => {
+                            insert_numbers_diff(vec![], numbers)?;
+                            insert_strings_diff(vec![], strings)?;
+                        }
+                    },
+                    (Some(del_filterable_values), Some(add_filterable_values)) => {
+                        match (del_filterable_values, add_filterable_values) {
+                            (Null, Null) | (Empty, Empty) => (),
+                            (Null, Empty) => {
+                                del_is_null.insert(document);
+                                add_is_empty.insert(document);
+                            }
+                            (Empty, Null) => {
+                                del_is_empty.insert(document);
+                                add_is_null.insert(document);
+                            }
+                            (Null, Values { numbers, strings }) => {
+                                insert_numbers_diff(vec![], numbers)?;
+                                insert_strings_diff(vec![], strings)?;
+                                del_is_null.insert(document);
+                            }
+                            (Empty, Values { numbers, strings }) => {
+                                insert_numbers_diff(vec![], numbers)?;
+                                insert_strings_diff(vec![], strings)?;
+                                del_is_empty.insert(document);
+                            }
+                            (Values { numbers, strings }, Null) => {
+                                add_is_null.insert(document);
+                                insert_numbers_diff(numbers, vec![])?;
+                                insert_strings_diff(strings, vec![])?;
+                            }
+                            (Values { numbers, strings }, Empty) => {
+                                add_is_empty.insert(document);
+                                insert_numbers_diff(numbers, vec![])?;
+                                insert_strings_diff(strings, vec![])?;
+                            }
+                            (
+                                Values { numbers: del_numbers, strings: del_strings },
+                                Values { numbers: add_numbers, strings: add_strings },
+                            ) => {
+                                insert_numbers_diff(del_numbers, add_numbers)?;
+                                insert_strings_diff(del_strings, add_strings)?;
+                            }
                        }
                    }
                }
@ -130,14 +221,15 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        }
    }

+    let mut buffer = Vec::new();
    let mut facet_exists_docids_writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        tempfile::tempfile()?,
    );
-    for (fid, bitmap) in facet_exists_docids.into_iter() {
-        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
-        facet_exists_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
+    for (fid, (del_bitmap, add_bitmap)) in facet_exists_docids.into_iter() {
+        deladd_obkv_cbo_roaring_bitmaps(&mut buffer, &del_bitmap, &add_bitmap)?;
+        facet_exists_docids_writer.insert(fid.to_be_bytes(), &buffer)?;
    }
    let facet_exists_docids_reader = writer_into_reader(facet_exists_docids_writer)?;

@ -146,9 +238,9 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        indexer.chunk_compression_level,
        tempfile::tempfile()?,
    );
-    for (fid, bitmap) in facet_is_null_docids.into_iter() {
-        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
-        facet_is_null_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
+    for (fid, (del_bitmap, add_bitmap)) in facet_is_null_docids.into_iter() {
+        deladd_obkv_cbo_roaring_bitmaps(&mut buffer, &del_bitmap, &add_bitmap)?;
+        facet_is_null_docids_writer.insert(fid.to_be_bytes(), &buffer)?;
    }
    let facet_is_null_docids_reader = writer_into_reader(facet_is_null_docids_writer)?;

@ -157,21 +249,156 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
        indexer.chunk_compression_level,
        tempfile::tempfile()?,
    );
-    for (fid, bitmap) in facet_is_empty_docids.into_iter() {
-        let bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
-        facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &bitmap_bytes)?;
+    for (fid, (del_bitmap, add_bitmap)) in facet_is_empty_docids.into_iter() {
+        deladd_obkv_cbo_roaring_bitmaps(&mut buffer, &del_bitmap, &add_bitmap)?;
+        facet_is_empty_docids_writer.insert(fid.to_be_bytes(), &buffer)?;
    }
    let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?;

    Ok(ExtractedFacetValues {
-        docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
-        docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
+        fid_docid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
+        fid_docid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
        fid_facet_is_null_docids_chunk: facet_is_null_docids_reader,
        fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader,
        fid_facet_exists_docids_chunk: facet_exists_docids_reader,
    })
 }

+/// Generates a vector of bytes containing a DelAdd obkv with two bitmaps.
+fn deladd_obkv_cbo_roaring_bitmaps(
+    buffer: &mut Vec<u8>,
+    del_bitmap: &RoaringBitmap,
+    add_bitmap: &RoaringBitmap,
+) -> io::Result<()> {
+    buffer.clear();
+    let mut obkv = KvWriterDelAdd::new(buffer);
+    let del_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(del_bitmap).unwrap();
+    let add_bitmap_bytes = CboRoaringBitmapCodec::bytes_encode(add_bitmap).unwrap();
+    obkv.insert(DelAdd::Deletion, del_bitmap_bytes)?;
+    obkv.insert(DelAdd::Addition, add_bitmap_bytes)?;
+    obkv.finish()
+}
+
+/// Truncates a string to the biggest valid LMDB key size.
+fn truncate_string(s: String) -> String {
+    s.char_indices()
+        .take_while(|(idx, _)| idx + 4 < MAX_FACET_VALUE_LENGTH)
+        .map(|(_, c)| c)
+        .collect()
+}
+
+/// Computes the diff between both Del and Add numbers and
+/// only inserts the parts that differ in the sorter.
+fn insert_numbers_diff<MF>(
+    fid_docid_facet_numbers_sorter: &mut Sorter<MF>,
+    key_buffer: &mut Vec<u8>,
+    mut del_numbers: Vec<f64>,
+    mut add_numbers: Vec<f64>,
+) -> Result<()>
+where
+    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
+{
+    // We sort and dedup the float numbers
+    del_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
+    add_numbers.sort_unstable_by_key(|f| OrderedFloat(*f));
+    del_numbers.dedup_by_key(|f| OrderedFloat(*f));
+    add_numbers.dedup_by_key(|f| OrderedFloat(*f));
+
+    let merged_numbers_iter = itertools::merge_join_by(
+        del_numbers.into_iter().map(OrderedFloat),
+        add_numbers.into_iter().map(OrderedFloat),
+        |del, add| del.cmp(add),
+    );
+
+    // insert facet numbers in sorter
+    for eob in merged_numbers_iter {
+        key_buffer.truncate(TRUNCATE_SIZE);
+        match eob {
+            EitherOrBoth::Both(_, _) => (), // no need to touch anything
+            EitherOrBoth::Left(OrderedFloat(number)) => {
+                if let Some(value_bytes) = f64_into_bytes(number) {
+                    key_buffer.extend_from_slice(&value_bytes);
+                    key_buffer.extend_from_slice(&number.to_be_bytes());
+
+                    // We insert only the Del part of the Obkv to inform
+                    // that we only want to remove all those numbers.
+                    let mut obkv = KvWriterDelAdd::memory();
+                    obkv.insert(DelAdd::Deletion, ().as_bytes())?;
+                    let bytes = obkv.into_inner()?;
+                    fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
+                }
+            }
+            EitherOrBoth::Right(OrderedFloat(number)) => {
+                if let Some(value_bytes) = f64_into_bytes(number) {
+                    key_buffer.extend_from_slice(&value_bytes);
+                    key_buffer.extend_from_slice(&number.to_be_bytes());
+
+                    // We insert only the Add part of the Obkv to inform
+                    // that we only want to remove all those numbers.
+                    let mut obkv = KvWriterDelAdd::memory();
+                    obkv.insert(DelAdd::Addition, ().as_bytes())?;
+                    let bytes = obkv.into_inner()?;
+                    fid_docid_facet_numbers_sorter.insert(&key_buffer, bytes)?;
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Computes the diff between both Del and Add strings and
+/// only inserts the parts that differ in the sorter.
+fn insert_strings_diff<MF>(
+    fid_docid_facet_strings_sorter: &mut Sorter<MF>,
+    key_buffer: &mut Vec<u8>,
+    mut del_strings: Vec<(String, String)>,
+    mut add_strings: Vec<(String, String)>,
+) -> Result<()>
+where
+    MF: for<'a> Fn(&[u8], &[Cow<'a, [u8]>]) -> StdResult<Cow<'a, [u8]>, Error>,
+{
+    // We sort and dedup the normalized and original strings
+    del_strings.sort_unstable();
+    add_strings.sort_unstable();
+    del_strings.dedup();
+    add_strings.dedup();
+
+    let merged_strings_iter = itertools::merge_join_by(
+        del_strings.into_iter().filter(|(n, _)| !n.is_empty()),
+        add_strings.into_iter().filter(|(n, _)| !n.is_empty()),
+        |del, add| del.cmp(add),
+    );
+
+    // insert normalized and original facet string in sorter
+    for eob in merged_strings_iter {
+        key_buffer.truncate(TRUNCATE_SIZE);
+        match eob {
+            EitherOrBoth::Both(_, _) => (), // no need to touch anything
+            EitherOrBoth::Left((normalized, original)) => {
+                let truncated = truncate_string(normalized);
+                key_buffer.extend_from_slice(truncated.as_bytes());
+
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Deletion, original)?;
+                let bytes = obkv.into_inner()?;
+                fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
+            }
+            EitherOrBoth::Right((normalized, original)) => {
+                let truncated = truncate_string(normalized);
+                key_buffer.extend_from_slice(truncated.as_bytes());
+
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Addition, original)?;
+                let bytes = obkv.into_inner()?;
+                fid_docid_facet_strings_sorter.insert(&key_buffer, bytes)?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
 /// Represent what a document field contains.
 enum FilterableValues {
    /// Corresponds to the JSON `null` value.
@ -182,6 +409,7 @@ enum FilterableValues {
    Values { numbers: Vec<f64>, strings: Vec<(String, String)> },
 }

+/// Extracts the facet values of a JSON field.
 fn extract_facet_values(value: &Value, geo_field: bool) -> FilterableValues {
    fn inner_extract_facet_values(
        value: &Value,
--- a/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_fid_word_count_docids.rs
@ -1,16 +1,18 @@
-use std::collections::HashMap;
 use std::fs::File;
 use std::io::{self, BufReader};

-use grenad::Sorter;
+use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
-    try_split_array_at, GrenadParameters, MergeFn,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
+    GrenadParameters,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::{relative_from_absolute_position, DocumentId, FieldId, Result};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::Result;
+
+const MAX_COUNTED_WORDS: usize = 30;

 /// Extracts the field id word count and the documents ids where
 /// this field id with this amount of words appear.
@ -28,70 +30,62 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(

    let mut fid_word_count_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

-    // This map is assumed to not consume a lot of memory.
-    let mut document_fid_wordcount = HashMap::new();
-    let mut current_document_id = None;
-
+    let mut key_buffer = Vec::new();
+    let mut value_buffer = Vec::new();
    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, _word_bytes) = try_split_array_at(key)
+        let (document_id_bytes, fid_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);

-        let curr_document_id = *current_document_id.get_or_insert(document_id);
-        if curr_document_id != document_id {
-            drain_document_fid_wordcount_into_sorter(
-                &mut fid_word_count_docids_sorter,
-                &mut document_fid_wordcount,
-                curr_document_id,
-            )?;
-            current_document_id = Some(document_id);
+        let del_add_reader = KvReaderDelAdd::new(value);
+        let deletion = del_add_reader
+            // get deleted words
+            .get(DelAdd::Deletion)
+            // count deleted words
+            .map(|deletion| KvReaderU16::new(deletion).iter().take(MAX_COUNTED_WORDS + 1).count())
+            // keep the count if under or equal to MAX_COUNTED_WORDS
+            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
+        let addition = del_add_reader
+            // get added words
+            .get(DelAdd::Addition)
+            // count added words
+            .map(|addition| KvReaderU16::new(addition).iter().take(MAX_COUNTED_WORDS + 1).count())
+            // keep the count if under or equal to MAX_COUNTED_WORDS
+            .filter(|&word_count| word_count <= MAX_COUNTED_WORDS);
+
+        if deletion != addition {
+            // Insert deleted word count in sorter if exist.
+            if let Some(word_count) = deletion {
+                value_buffer.clear();
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                key_buffer.clear();
+                key_buffer.extend_from_slice(fid_bytes);
+                key_buffer.push(word_count as u8);
+                fid_word_count_docids_sorter
+                    .insert(&key_buffer, value_writer.into_inner().unwrap())?;
+            }
+            // Insert added word count in sorter if exist.
+            if let Some(word_count) = addition {
+                value_buffer.clear();
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key_buffer.clear();
+                key_buffer.extend_from_slice(fid_bytes);
+                key_buffer.push(word_count as u8);
+                fid_word_count_docids_sorter
+                    .insert(&key_buffer, value_writer.into_inner().unwrap())?;
+            }
        }
-
-        for position in read_u32_ne_bytes(value) {
-            let (field_id, _) = relative_from_absolute_position(position);
-
-            let value = document_fid_wordcount.entry(field_id as FieldId).or_insert(0);
-            *value += 1;
-        }
-    }
-
-    if let Some(document_id) = current_document_id {
-        // We must make sure that don't lose the current document field id
-        // word count map if we break because we reached the end of the chunk.
-        drain_document_fid_wordcount_into_sorter(
-            &mut fid_word_count_docids_sorter,
-            &mut document_fid_wordcount,
-            document_id,
-        )?;
    }

    sorter_into_reader(fid_word_count_docids_sorter, indexer)
 }
-
-fn drain_document_fid_wordcount_into_sorter(
-    fid_word_count_docids_sorter: &mut Sorter<MergeFn>,
-    document_fid_wordcount: &mut HashMap<FieldId, u32>,
-    document_id: DocumentId,
-) -> Result<()> {
-    let mut key_buffer = Vec::new();
-
-    for (fid, count) in document_fid_wordcount.drain() {
-        if count <= 30 {
-            key_buffer.clear();
-            key_buffer.extend_from_slice(&fid.to_be_bytes());
-            key_buffer.push(count as u8);
-
-            fid_word_count_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
-        }
-    }
-
-    Ok(())
-}
--- a/milli/src/update/index_documents/extract/extract_geo_points.rs
+++ b/milli/src/update/index_documents/extract/extract_geo_points.rs
@ -6,6 +6,7 @@ use serde_json::Value;

 use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 use crate::error::GeoError;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::extract_finite_float_from_value;
 use crate::{FieldId, InternalError, Result};

@ -30,39 +31,71 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
    let mut cursor = obkv_documents.into_cursor()?;
    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
        let obkv = obkv::KvReader::new(value);
-        // since we only needs the primary key when we throw an error we create this getter to
-        // lazily get it when needed
+        // since we only need the primary key when we throw an error
+        // we create this getter to lazily get it when needed
        let document_id = || -> Value {
            let document_id = obkv.get(primary_key_id).unwrap();
            serde_json::from_slice(document_id).unwrap()
        };

        // first we get the two fields
-        let lat = obkv.get(lat_fid);
-        let lng = obkv.get(lng_fid);
+        match (obkv.get(lat_fid), obkv.get(lng_fid)) {
+            (Some(lat), Some(lng)) => {
+                let deladd_lat_obkv = KvReaderDelAdd::new(lat);
+                let deladd_lng_obkv = KvReaderDelAdd::new(lng);

-        if let Some((lat, lng)) = lat.zip(lng) {
-            // then we extract the values
-            let lat = extract_finite_float_from_value(
-                serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
-            )
-            .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
+                // then we extract the values
+                let del_lat_lng = deladd_lat_obkv
+                    .get(DelAdd::Deletion)
+                    .zip(deladd_lng_obkv.get(DelAdd::Deletion))
+                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
+                    .transpose()?;
+                let add_lat_lng = deladd_lat_obkv
+                    .get(DelAdd::Addition)
+                    .zip(deladd_lng_obkv.get(DelAdd::Addition))
+                    .map(|(lat, lng)| extract_lat_lng(lat, lng, document_id))
+                    .transpose()?;

-            let lng = extract_finite_float_from_value(
-                serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
-            )
-            .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
-
-            #[allow(clippy::drop_non_drop)]
-            let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
-            writer.insert(docid_bytes, bytes)?;
-        } else if lat.is_none() && lng.is_some() {
-            return Err(GeoError::MissingLatitude { document_id: document_id() })?;
-        } else if lat.is_some() && lng.is_none() {
-            return Err(GeoError::MissingLongitude { document_id: document_id() })?;
+                if del_lat_lng != add_lat_lng {
+                    let mut obkv = KvWriterDelAdd::memory();
+                    if let Some([lat, lng]) = del_lat_lng {
+                        #[allow(clippy::drop_non_drop)]
+                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                        obkv.insert(DelAdd::Deletion, bytes)?;
+                    }
+                    if let Some([lat, lng]) = add_lat_lng {
+                        #[allow(clippy::drop_non_drop)]
+                        let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
+                        obkv.insert(DelAdd::Addition, bytes)?;
+                    }
+                    let bytes = obkv.into_inner()?;
+                    writer.insert(docid_bytes, bytes)?;
+                }
+            }
+            (None, Some(_)) => {
+                return Err(GeoError::MissingLatitude { document_id: document_id() }.into())
+            }
+            (Some(_), None) => {
+                return Err(GeoError::MissingLongitude { document_id: document_id() }.into())
+            }
+            (None, None) => (),
        }
-        // else => the _geo object was `null`, there is nothing to do
    }

    writer_into_reader(writer)
 }
+
+/// Extract the finite floats lat and lng from two bytes slices.
+fn extract_lat_lng(lat: &[u8], lng: &[u8], document_id: impl Fn() -> Value) -> Result<[f64; 2]> {
+    let lat = extract_finite_float_from_value(
+        serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
+    )
+    .map_err(|lat| GeoError::BadLatitude { document_id: document_id(), value: lat })?;
+
+    let lng = extract_finite_float_from_value(
+        serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
+    )
+    .map_err(|lng| GeoError::BadLongitude { document_id: document_id(), value: lng })?;
+
+    Ok([lat, lng])
+}
--- a/milli/src/update/index_documents/extract/extract_vector_points.rs
+++ b/milli/src/update/index_documents/extract/extract_vector_points.rs
@ -1,13 +1,24 @@
+use std::cmp::Ordering;
 use std::convert::TryFrom;
 use std::fs::File;
-use std::io::{self, BufReader};
+use std::io::{self, BufReader, BufWriter};
+use std::mem::size_of;
+use std::str::from_utf8;

 use bytemuck::cast_slice;
+use grenad::Writer;
+use itertools::EitherOrBoth;
+use ordered_float::OrderedFloat;
 use serde_json::{from_slice, Value};

 use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
 use crate::error::UserError;
-use crate::{FieldId, InternalError, Result, VectorOrArrayOfVectors};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::update::index_documents::helpers::try_split_at;
+use crate::{DocumentId, FieldId, InternalError, Result, VectorOrArrayOfVectors};
+
+/// The length of the elements that are always in the buffer when inserting new values.
+const TRUNCATE_SIZE: usize = size_of::<DocumentId>();

 /// Extracts the embedding vector contained in each document under the `_vectors` field.
 ///
@ -16,7 +27,6 @@ use crate::{FieldId, InternalError, Result, VectorOrArrayOfVectors};
 pub fn extract_vector_points<R: io::Read + io::Seek>(
    obkv_documents: grenad::Reader<R>,
    indexer: GrenadParameters,
-    primary_key_id: FieldId,
    vectors_fid: FieldId,
 ) -> Result<grenad::Reader<BufReader<File>>> {
    puffin::profile_function!();
@ -27,43 +37,112 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
        tempfile::tempfile()?,
    );

+    let mut key_buffer = Vec::new();
    let mut cursor = obkv_documents.into_cursor()?;
-    while let Some((docid_bytes, value)) = cursor.move_on_next()? {
+    while let Some((key, value)) = cursor.move_on_next()? {
+        // this must always be serialized as (docid, external_docid);
+        let (docid_bytes, external_id_bytes) =
+            try_split_at(key, std::mem::size_of::<DocumentId>()).unwrap();
+        debug_assert!(from_utf8(external_id_bytes).is_ok());
+
        let obkv = obkv::KvReader::new(value);
+        key_buffer.clear();
+        key_buffer.extend_from_slice(docid_bytes);

        // since we only needs the primary key when we throw an error we create this getter to
        // lazily get it when needed
-        let document_id = || -> Value {
-            let document_id = obkv.get(primary_key_id).unwrap();
-            from_slice(document_id).unwrap()
-        };
+        let document_id = || -> Value { from_utf8(external_id_bytes).unwrap().into() };

        // first we retrieve the _vectors field
-        if let Some(vectors) = obkv.get(vectors_fid) {
-            // extract the vectors
-            let vectors = match from_slice(vectors) {
-                Ok(vectors) => VectorOrArrayOfVectors::into_array_of_vectors(vectors),
-                Err(_) => {
-                    return Err(UserError::InvalidVectorsType {
-                        document_id: document_id(),
-                        value: from_slice(vectors).map_err(InternalError::SerdeJson)?,
-                    }
-                    .into())
-                }
-            };
+        if let Some(value) = obkv.get(vectors_fid) {
+            let vectors_obkv = KvReaderDelAdd::new(value);

-            if let Some(vectors) = vectors {
-                for (i, vector) in vectors.into_iter().enumerate().take(u16::MAX as usize) {
-                    let index = u16::try_from(i).unwrap();
-                    let mut key = docid_bytes.to_vec();
-                    key.extend_from_slice(&index.to_be_bytes());
-                    let bytes = cast_slice(&vector);
-                    writer.insert(key, bytes)?;
-                }
-            }
+            // then we extract the values
+            let del_vectors = vectors_obkv
+                .get(DelAdd::Deletion)
+                .map(|vectors| extract_vectors(vectors, document_id))
+                .transpose()?
+                .flatten();
+            let add_vectors = vectors_obkv
+                .get(DelAdd::Addition)
+                .map(|vectors| extract_vectors(vectors, document_id))
+                .transpose()?
+                .flatten();
+
+            // and we finally push the unique vectors into the writer
+            push_vectors_diff(
+                &mut writer,
+                &mut key_buffer,
+                del_vectors.unwrap_or_default(),
+                add_vectors.unwrap_or_default(),
+            )?;
        }
-        // else => the `_vectors` object was `null`, there is nothing to do
    }

    writer_into_reader(writer)
 }
+
+/// Computes the diff between both Del and Add numbers and
+/// only inserts the parts that differ in the sorter.
+fn push_vectors_diff(
+    writer: &mut Writer<BufWriter<File>>,
+    key_buffer: &mut Vec<u8>,
+    mut del_vectors: Vec<Vec<f32>>,
+    mut add_vectors: Vec<Vec<f32>>,
+) -> Result<()> {
+    // We sort and dedup the vectors
+    del_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
+    add_vectors.sort_unstable_by(|a, b| compare_vectors(a, b));
+    del_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
+    add_vectors.dedup_by(|a, b| compare_vectors(a, b).is_eq());
+
+    let merged_vectors_iter =
+        itertools::merge_join_by(del_vectors, add_vectors, |del, add| compare_vectors(del, add));
+
+    // insert vectors into the writer
+    for (i, eob) in merged_vectors_iter.into_iter().enumerate().take(u16::MAX as usize) {
+        // Generate the key by extending the unique index to it.
+        key_buffer.truncate(TRUNCATE_SIZE);
+        let index = u16::try_from(i).unwrap();
+        key_buffer.extend_from_slice(&index.to_be_bytes());
+
+        match eob {
+            EitherOrBoth::Both(_, _) => (), // no need to touch anything
+            EitherOrBoth::Left(vector) => {
+                // We insert only the Del part of the Obkv to inform
+                // that we only want to remove all those vectors.
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Deletion, cast_slice(&vector))?;
+                let bytes = obkv.into_inner()?;
+                writer.insert(&key_buffer, bytes)?;
+            }
+            EitherOrBoth::Right(vector) => {
+                // We insert only the Add part of the Obkv to inform
+                // that we only want to remove all those vectors.
+                let mut obkv = KvWriterDelAdd::memory();
+                obkv.insert(DelAdd::Addition, cast_slice(&vector))?;
+                let bytes = obkv.into_inner()?;
+                writer.insert(&key_buffer, bytes)?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Compares two vectors by using the OrderingFloat helper.
+fn compare_vectors(a: &[f32], b: &[f32]) -> Ordering {
+    a.iter().copied().map(OrderedFloat).cmp(b.iter().copied().map(OrderedFloat))
+}
+
+/// Extracts the vectors from a JSON value.
+fn extract_vectors(value: &[u8], document_id: impl Fn() -> Value) -> Result<Option<Vec<Vec<f32>>>> {
+    match from_slice(value) {
+        Ok(vectors) => Ok(VectorOrArrayOfVectors::into_array_of_vectors(vectors)),
+        Err(_) => Err(UserError::InvalidVectorsType {
+            document_id: document_id(),
+            value: from_slice(value).map_err(InternalError::SerdeJson)?,
+        }
+        .into()),
+    }
+}
--- a/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_docids.rs
@ -1,18 +1,20 @@
-use std::collections::HashSet;
+use std::collections::{BTreeSet, HashSet};
 use std::fs::File;
 use std::io::{self, BufReader};
-use std::iter::FromIterator;

-use roaring::RoaringBitmap;
+use heed::BytesDecode;
+use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, merge_roaring_bitmaps, serialize_roaring_bitmap, sorter_into_reader,
-    try_split_array_at, GrenadParameters,
+    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader,
+    try_split_array_at, writer_into_reader, GrenadParameters,
 };
 use crate::error::SerializationError;
+use crate::heed_codec::StrBEU16Codec;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::update::index_documents::helpers::read_u32_ne_bytes;
-use crate::{relative_from_absolute_position, FieldId, Result};
+use crate::update::del_add::{is_noop_del_add_obkv, DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::update::MergeFn;
+use crate::{DocumentId, FieldId, Result};

 /// Extracts the word and the documents ids where this word appear.
 ///
@ -26,65 +28,152 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
    docid_word_positions: grenad::Reader<R>,
    indexer: GrenadParameters,
    exact_attributes: &HashSet<FieldId>,
-) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
+) -> Result<(
+    grenad::Reader<BufReader<File>>,
+    grenad::Reader<BufReader<File>>,
+    grenad::Reader<BufReader<File>>,
+)> {
    puffin::profile_function!();

    let max_memory = indexer.max_memory_by_thread();

-    let mut word_docids_sorter = create_sorter(
+    let mut word_fid_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
-        max_memory.map(|x| x / 2),
+        max_memory.map(|x| x / 3),
+    );
+    let mut key_buffer = Vec::new();
+    let mut del_words = BTreeSet::new();
+    let mut add_words = BTreeSet::new();
+    let mut cursor = docid_word_positions.into_cursor()?;
+    while let Some((key, value)) = cursor.move_on_next()? {
+        let (document_id_bytes, fid_bytes) = try_split_array_at(key)
+            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+        let (fid_bytes, _) = try_split_array_at(fid_bytes)
+            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
+        let document_id = u32::from_be_bytes(document_id_bytes);
+        let fid = u16::from_be_bytes(fid_bytes);
+
+        let del_add_reader = KvReaderDelAdd::new(value);
+        // extract all unique words to remove.
+        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
+            for (_pos, word) in KvReaderU16::new(deletion).iter() {
+                del_words.insert(word.to_vec());
+            }
+        }
+
+        // extract all unique additional words.
+        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
+            for (_pos, word) in KvReaderU16::new(addition).iter() {
+                add_words.insert(word.to_vec());
+            }
+        }
+
+        words_into_sorter(
+            document_id,
+            fid,
+            &mut key_buffer,
+            &del_words,
+            &add_words,
+            &mut word_fid_docids_sorter,
+        )?;
+
+        del_words.clear();
+        add_words.clear();
+    }
+
+    let mut word_docids_sorter = create_sorter(
+        grenad::SortAlgorithm::Unstable,
+        merge_deladd_cbo_roaring_bitmaps,
+        indexer.chunk_compression_type,
+        indexer.chunk_compression_level,
+        indexer.max_nb_chunks,
+        max_memory.map(|x| x / 3),
    );

    let mut exact_word_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
-        max_memory.map(|x| x / 2),
+        max_memory.map(|x| x / 3),
    );

-    let mut value_buffer = Vec::new();
-    let mut cursor = docid_word_positions.into_cursor()?;
-    while let Some((key, positions)) = cursor.move_on_next()? {
-        let (document_id_bytes, word_bytes) = try_split_array_at(key)
+    let mut word_fid_docids_writer = create_writer(
+        indexer.chunk_compression_type,
+        indexer.chunk_compression_level,
+        tempfile::tempfile()?,
+    );
+
+    let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;
+    // TODO: replace sorters by writers by accumulating values into a buffer before inserting them.
+    while let Some((key, value)) = iter.next()? {
+        // only keep the value if their is a change to apply in the DB.
+        if !is_noop_del_add_obkv(KvReaderDelAdd::new(value)) {
+            word_fid_docids_writer.insert(key, value)?;
+        }
+
+        let (word, fid) = StrBEU16Codec::bytes_decode(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
-        let document_id = u32::from_be_bytes(document_id_bytes);

-        let bitmap = RoaringBitmap::from_iter(Some(document_id));
-        serialize_roaring_bitmap(&bitmap, &mut value_buffer)?;
-
-        // If there are no exact attributes, we do not need to iterate over positions.
-        if exact_attributes.is_empty() {
-            word_docids_sorter.insert(word_bytes, &value_buffer)?;
+        // every words contained in an attribute set to exact must be pushed in the exact_words list.
+        if exact_attributes.contains(&fid) {
+            exact_word_docids_sorter.insert(word.as_bytes(), value)?;
        } else {
-            let mut added_to_exact = false;
-            let mut added_to_word_docids = false;
-            for position in read_u32_ne_bytes(positions) {
-                // as soon as we know that this word had been to both readers, we don't need to
-                // iterate over the positions.
-                if added_to_exact && added_to_word_docids {
-                    break;
-                }
-                let (fid, _) = relative_from_absolute_position(position);
-                if exact_attributes.contains(&fid) && !added_to_exact {
-                    exact_word_docids_sorter.insert(word_bytes, &value_buffer)?;
-                    added_to_exact = true;
-                } else if !added_to_word_docids {
-                    word_docids_sorter.insert(word_bytes, &value_buffer)?;
-                    added_to_word_docids = true;
-                }
-            }
+            word_docids_sorter.insert(word.as_bytes(), value)?;
        }
    }

    Ok((
        sorter_into_reader(word_docids_sorter, indexer)?,
        sorter_into_reader(exact_word_docids_sorter, indexer)?,
+        writer_into_reader(word_fid_docids_writer)?,
    ))
 }
+
+fn words_into_sorter(
+    document_id: DocumentId,
+    fid: FieldId,
+    key_buffer: &mut Vec<u8>,
+    del_words: &BTreeSet<Vec<u8>>,
+    add_words: &BTreeSet<Vec<u8>>,
+    word_fid_docids_sorter: &mut grenad::Sorter<MergeFn>,
+) -> Result<()> {
+    puffin::profile_function!();
+
+    use itertools::merge_join_by;
+    use itertools::EitherOrBoth::{Both, Left, Right};
+
+    let mut buffer = Vec::new();
+    for eob in merge_join_by(del_words.iter(), add_words.iter(), |d, a| d.cmp(a)) {
+        buffer.clear();
+        let mut value_writer = KvWriterDelAdd::new(&mut buffer);
+        let word_bytes = match eob {
+            Left(word_bytes) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                word_bytes
+            }
+            Right(word_bytes) => {
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                word_bytes
+            }
+            Both(word_bytes, _) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                word_bytes
+            }
+        };
+
+        key_buffer.clear();
+        key_buffer.extend_from_slice(word_bytes);
+        key_buffer.push(0);
+        key_buffer.extend_from_slice(&fid.to_be_bytes());
+        word_fid_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
+    }
+
+    Ok(())
+}
--- a/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_fid_docids.rs
@ -1,51 +0,0 @@
-use std::fs::File;
-use std::io::{self, BufReader};
-
-use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
-    try_split_array_at, GrenadParameters,
-};
-use crate::error::SerializationError;
-use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::{relative_from_absolute_position, DocumentId, Result};
-
-/// Extracts the word, field id, and the documents ids where this word appear at this field id.
-#[logging_timer::time]
-pub fn extract_word_fid_docids<R: io::Read + io::Seek>(
-    docid_word_positions: grenad::Reader<R>,
-    indexer: GrenadParameters,
-) -> Result<grenad::Reader<BufReader<File>>> {
-    puffin::profile_function!();
-
-    let max_memory = indexer.max_memory_by_thread();
-
-    let mut word_fid_docids_sorter = create_sorter(
-        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
-        indexer.chunk_compression_type,
-        indexer.chunk_compression_level,
-        indexer.max_nb_chunks,
-        max_memory,
-    );
-
-    let mut key_buffer = Vec::new();
-    let mut cursor = docid_word_positions.into_cursor()?;
-    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, word_bytes) = try_split_array_at(key)
-            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
-        let document_id = DocumentId::from_be_bytes(document_id_bytes);
-
-        for position in read_u32_ne_bytes(value) {
-            key_buffer.clear();
-            key_buffer.extend_from_slice(word_bytes);
-            key_buffer.push(0);
-            let (fid, _) = relative_from_absolute_position(position);
-            key_buffer.extend_from_slice(&fid.to_be_bytes());
-            word_fid_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
-        }
-    }
-
-    let word_fid_docids_reader = sorter_into_reader(word_fid_docids_sorter, indexer)?;
-
-    Ok(word_fid_docids_reader)
-}
--- a/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_pair_proximity_docids.rs
@ -1,16 +1,18 @@
-use std::cmp::Ordering;
-use std::collections::{BinaryHeap, HashMap};
+use std::collections::{BTreeMap, VecDeque};
 use std::fs::File;
 use std::io::BufReader;
-use std::{cmp, io, mem, str, vec};
+use std::{cmp, io};
+
+use obkv::KvReaderU16;

 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
-    try_split_array_at, GrenadParameters, MergeFn,
+    create_sorter, create_writer, merge_deladd_cbo_roaring_bitmaps, try_split_array_at,
+    writer_into_reader, GrenadParameters, MergeFn,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::proximity::{positions_proximity, MAX_DISTANCE};
+use crate::proximity::{index_proximity, MAX_DISTANCE};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::{DocumentId, Result};

 /// Extracts the best proximity between pairs of words and the documents ids where this pair appear.
@ -26,58 +28,137 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(

    let max_memory = indexer.max_memory_by_thread();

-    let mut word_pair_proximity_docids_sorter = create_sorter(
-        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
-        indexer.chunk_compression_type,
-        indexer.chunk_compression_level,
-        indexer.max_nb_chunks,
-        max_memory.map(|m| m / 2),
-    );
+    let mut word_pair_proximity_docids_sorters: Vec<_> = (1..MAX_DISTANCE)
+        .map(|_| {
+            create_sorter(
+                grenad::SortAlgorithm::Unstable,
+                merge_deladd_cbo_roaring_bitmaps,
+                indexer.chunk_compression_type,
+                indexer.chunk_compression_level,
+                indexer.max_nb_chunks,
+                max_memory.map(|m| m / MAX_DISTANCE as usize),
+            )
+        })
+        .collect();

-    // This map is assumed to not consume a lot of memory.
-    let mut document_word_positions_heap = BinaryHeap::new();
+    let mut del_word_positions: VecDeque<(String, u16)> =
+        VecDeque::with_capacity(MAX_DISTANCE as usize);
+    let mut add_word_positions: VecDeque<(String, u16)> =
+        VecDeque::with_capacity(MAX_DISTANCE as usize);
+    let mut del_word_pair_proximity = BTreeMap::new();
+    let mut add_word_pair_proximity = BTreeMap::new();
    let mut current_document_id = None;

    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, word_bytes) = try_split_array_at(key)
+        let (document_id_bytes, _fid_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = u32::from_be_bytes(document_id_bytes);
-        let word = str::from_utf8(word_bytes)?;

-        let curr_document_id = *current_document_id.get_or_insert(document_id);
-        if curr_document_id != document_id {
-            let document_word_positions_heap = mem::take(&mut document_word_positions_heap);
+        // if we change document, we fill the sorter
+        if current_document_id.map_or(false, |id| id != document_id) {
+            puffin::profile_scope!("Document into sorter");
+
            document_word_positions_into_sorter(
-                curr_document_id,
-                document_word_positions_heap,
-                &mut word_pair_proximity_docids_sorter,
+                current_document_id.unwrap(),
+                &del_word_pair_proximity,
+                &add_word_pair_proximity,
+                &mut word_pair_proximity_docids_sorters,
            )?;
-            current_document_id = Some(document_id);
+            del_word_pair_proximity.clear();
+            add_word_pair_proximity.clear();
        }

-        let word = word.to_string();
-        let mut positions: Vec<_> = read_u32_ne_bytes(value).collect();
-        positions.sort_unstable();
-        let mut iter = positions.into_iter();
-        if let Some(position) = iter.next() {
-            document_word_positions_heap.push(PeekedWordPosition { word, position, iter });
-        }
+        current_document_id = Some(document_id);
+
+        let (del, add): (Result<_>, Result<_>) = rayon::join(
+            || {
+                // deletions
+                if let Some(deletion) = KvReaderDelAdd::new(value).get(DelAdd::Deletion) {
+                    for (position, word) in KvReaderU16::new(deletion).iter() {
+                        // drain the proximity window until the head word is considered close to the word we are inserting.
+                        while del_word_positions.get(0).map_or(false, |(_w, p)| {
+                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
+                        }) {
+                            word_positions_into_word_pair_proximity(
+                                &mut del_word_positions,
+                                &mut del_word_pair_proximity,
+                            )?;
+                        }
+
+                        // insert the new word.
+                        let word = std::str::from_utf8(word)?;
+                        del_word_positions.push_back((word.to_string(), position));
+                    }
+
+                    while !del_word_positions.is_empty() {
+                        word_positions_into_word_pair_proximity(
+                            &mut del_word_positions,
+                            &mut del_word_pair_proximity,
+                        )?;
+                    }
+                }
+
+                Ok(())
+            },
+            || {
+                // additions
+                if let Some(addition) = KvReaderDelAdd::new(value).get(DelAdd::Addition) {
+                    for (position, word) in KvReaderU16::new(addition).iter() {
+                        // drain the proximity window until the head word is considered close to the word we are inserting.
+                        while add_word_positions.get(0).map_or(false, |(_w, p)| {
+                            index_proximity(*p as u32, position as u32) >= MAX_DISTANCE
+                        }) {
+                            word_positions_into_word_pair_proximity(
+                                &mut add_word_positions,
+                                &mut add_word_pair_proximity,
+                            )?;
+                        }
+
+                        // insert the new word.
+                        let word = std::str::from_utf8(word)?;
+                        add_word_positions.push_back((word.to_string(), position));
+                    }
+
+                    while !add_word_positions.is_empty() {
+                        word_positions_into_word_pair_proximity(
+                            &mut add_word_positions,
+                            &mut add_word_pair_proximity,
+                        )?;
+                    }
+                }
+
+                Ok(())
+            },
+        );
+
+        del?;
+        add?;
    }

    if let Some(document_id) = current_document_id {
-        // We must make sure that don't lose the current document field id
-        // word count map if we break because we reached the end of the chunk.
-        let document_word_positions_heap = mem::take(&mut document_word_positions_heap);
+        puffin::profile_scope!("Final document into sorter");
        document_word_positions_into_sorter(
            document_id,
-            document_word_positions_heap,
-            &mut word_pair_proximity_docids_sorter,
+            &del_word_pair_proximity,
+            &add_word_pair_proximity,
+            &mut word_pair_proximity_docids_sorters,
        )?;
    }
+    {
+        puffin::profile_scope!("sorter_into_reader");
+        let mut writer = create_writer(
+            indexer.chunk_compression_type,
+            indexer.chunk_compression_level,
+            tempfile::tempfile()?,
+        );

-    sorter_into_reader(word_pair_proximity_docids_sorter, indexer)
+        for sorter in word_pair_proximity_docids_sorters {
+            sorter.write_into_stream_writer(&mut writer)?;
+        }
+
+        writer_into_reader(writer)
+    }
 }

 /// Fills the list of all pairs of words with the shortest proximity between 1 and 7 inclusive.
@ -86,96 +167,66 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
 /// close to each other.
 fn document_word_positions_into_sorter(
    document_id: DocumentId,
-    mut word_positions_heap: BinaryHeap<PeekedWordPosition<vec::IntoIter<u32>>>,
-    word_pair_proximity_docids_sorter: &mut grenad::Sorter<MergeFn>,
+    del_word_pair_proximity: &BTreeMap<(String, String), u8>,
+    add_word_pair_proximity: &BTreeMap<(String, String), u8>,
+    word_pair_proximity_docids_sorters: &mut [grenad::Sorter<MergeFn>],
 ) -> Result<()> {
-    let mut word_pair_proximity = HashMap::new();
-    let mut ordered_peeked_word_positions = Vec::new();
-    while !word_positions_heap.is_empty() {
-        while let Some(peeked_word_position) = word_positions_heap.pop() {
-            ordered_peeked_word_positions.push(peeked_word_position);
-            if ordered_peeked_word_positions.len() == 7 {
-                break;
-            }
-        }
-
-        if let Some((head, tail)) = ordered_peeked_word_positions.split_first() {
-            for PeekedWordPosition { word, position, .. } in tail {
-                let prox = positions_proximity(head.position, *position);
-                if prox > 0 && prox < MAX_DISTANCE {
-                    word_pair_proximity
-                        .entry((head.word.clone(), word.clone()))
-                        .and_modify(|p| {
-                            *p = cmp::min(*p, prox);
-                        })
-                        .or_insert(prox);
-                }
-            }
-
-            // Push the tail in the heap.
-            let tail_iter = ordered_peeked_word_positions.drain(1..);
-            word_positions_heap.extend(tail_iter);
-
-            // Advance the head and push it in the heap.
-            if let Some(mut head) = ordered_peeked_word_positions.pop() {
-                if let Some(next_position) = head.iter.next() {
-                    let prox = positions_proximity(head.position, next_position);
-
-                    if prox > 0 && prox < MAX_DISTANCE {
-                        word_pair_proximity
-                            .entry((head.word.clone(), head.word.clone()))
-                            .and_modify(|p| {
-                                *p = cmp::min(*p, prox);
-                            })
-                            .or_insert(prox);
-                    }
-
-                    word_positions_heap.push(PeekedWordPosition {
-                        word: head.word,
-                        position: next_position,
-                        iter: head.iter,
-                    });
-                }
-            }
-        }
-    }
+    use itertools::merge_join_by;
+    use itertools::EitherOrBoth::{Both, Left, Right};

+    let mut buffer = Vec::new();
    let mut key_buffer = Vec::new();
-    for ((w1, w2), prox) in word_pair_proximity {
+    for eob in
+        merge_join_by(del_word_pair_proximity.iter(), add_word_pair_proximity.iter(), |d, a| {
+            d.cmp(a)
+        })
+    {
+        buffer.clear();
+        let mut value_writer = KvWriterDelAdd::new(&mut buffer);
+        let ((w1, w2), prox) = match eob {
+            Left(key_value) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                key_value
+            }
+            Right(key_value) => {
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key_value
+            }
+            Both(key_value, _) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key_value
+            }
+        };
+
        key_buffer.clear();
-        key_buffer.push(prox as u8);
+        key_buffer.push(*prox);
        key_buffer.extend_from_slice(w1.as_bytes());
        key_buffer.push(0);
        key_buffer.extend_from_slice(w2.as_bytes());

-        word_pair_proximity_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
+        word_pair_proximity_docids_sorters[*prox as usize - 1]
+            .insert(&key_buffer, value_writer.into_inner().unwrap())?;
    }

    Ok(())
 }

-struct PeekedWordPosition<I> {
-    word: String,
-    position: u32,
-    iter: I,
-}
-
-impl<I> Ord for PeekedWordPosition<I> {
-    fn cmp(&self, other: &Self) -> Ordering {
-        self.position.cmp(&other.position).reverse()
-    }
-}
-
-impl<I> PartialOrd for PeekedWordPosition<I> {
-    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
-        Some(self.cmp(other))
-    }
-}
-
-impl<I> Eq for PeekedWordPosition<I> {}
-
-impl<I> PartialEq for PeekedWordPosition<I> {
-    fn eq(&self, other: &Self) -> bool {
-        self.position == other.position
+fn word_positions_into_word_pair_proximity(
+    word_positions: &mut VecDeque<(String, u16)>,
+    word_pair_proximity: &mut BTreeMap<(String, String), u8>,
+) -> Result<()> {
+    let (head_word, head_position) = word_positions.pop_front().unwrap();
+    for (word, position) in word_positions.iter() {
+        let prox = index_proximity(head_position as u32, *position as u32) as u8;
+        if prox > 0 && prox < MAX_DISTANCE as u8 {
+            word_pair_proximity
+                .entry((head_word.clone(), word.clone()))
+                .and_modify(|p| {
+                    *p = cmp::min(*p, prox);
+                })
+                .or_insert(prox);
+        }
    }
+    Ok(())
 }
--- a/milli/src/update/index_documents/extract/extract_word_position_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_word_position_docids.rs
@ -1,13 +1,18 @@
+use std::collections::BTreeSet;
 use std::fs::File;
 use std::io::{self, BufReader};

+use obkv::KvReaderU16;
+
 use super::helpers::{
-    create_sorter, merge_cbo_roaring_bitmaps, read_u32_ne_bytes, sorter_into_reader,
-    try_split_array_at, GrenadParameters,
+    create_sorter, merge_deladd_cbo_roaring_bitmaps, sorter_into_reader, try_split_array_at,
+    GrenadParameters,
 };
 use crate::error::SerializationError;
 use crate::index::db_name::DOCID_WORD_POSITIONS;
-use crate::{bucketed_position, relative_from_absolute_position, DocumentId, Result};
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
+use crate::update::MergeFn;
+use crate::{bucketed_position, DocumentId, Result};

 /// Extracts the word positions and the documents ids where this word appear.
 ///
@ -24,32 +29,111 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(

    let mut word_position_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Unstable,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
        indexer.max_nb_chunks,
        max_memory,
    );

+    let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
+    let mut add_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
+    let mut current_document_id: Option<u32> = None;
    let mut key_buffer = Vec::new();
    let mut cursor = docid_word_positions.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
-        let (document_id_bytes, word_bytes) = try_split_array_at(key)
+        let (document_id_bytes, _fid_bytes) = try_split_array_at(key)
            .ok_or(SerializationError::Decoding { db_name: Some(DOCID_WORD_POSITIONS) })?;
        let document_id = DocumentId::from_be_bytes(document_id_bytes);

-        for position in read_u32_ne_bytes(value) {
-            key_buffer.clear();
-            key_buffer.extend_from_slice(word_bytes);
-            key_buffer.push(0);
-            let (_, position) = relative_from_absolute_position(position);
-            let position = bucketed_position(position);
-            key_buffer.extend_from_slice(&position.to_be_bytes());
-            word_position_docids_sorter.insert(&key_buffer, document_id.to_ne_bytes())?;
+        if current_document_id.map_or(false, |id| document_id != id) {
+            words_position_into_sorter(
+                current_document_id.unwrap(),
+                &mut key_buffer,
+                &del_word_positions,
+                &add_word_positions,
+                &mut word_position_docids_sorter,
+            )?;
+            del_word_positions.clear();
+            add_word_positions.clear();
+        }
+
+        current_document_id = Some(document_id);
+
+        let del_add_reader = KvReaderDelAdd::new(value);
+        // extract all unique words to remove.
+        if let Some(deletion) = del_add_reader.get(DelAdd::Deletion) {
+            for (position, word_bytes) in KvReaderU16::new(deletion).iter() {
+                let position = bucketed_position(position);
+                del_word_positions.insert((position, word_bytes.to_vec()));
+            }
+        }
+
+        // extract all unique additional words.
+        if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
+            for (position, word_bytes) in KvReaderU16::new(addition).iter() {
+                let position = bucketed_position(position);
+                add_word_positions.insert((position, word_bytes.to_vec()));
+            }
        }
    }

+    if let Some(document_id) = current_document_id {
+        words_position_into_sorter(
+            document_id,
+            &mut key_buffer,
+            &del_word_positions,
+            &add_word_positions,
+            &mut word_position_docids_sorter,
+        )?;
+    }
+
+    // TODO remove noop DelAdd OBKV
    let word_position_docids_reader = sorter_into_reader(word_position_docids_sorter, indexer)?;

    Ok(word_position_docids_reader)
 }
+
+fn words_position_into_sorter(
+    document_id: DocumentId,
+    key_buffer: &mut Vec<u8>,
+    del_word_positions: &BTreeSet<(u16, Vec<u8>)>,
+    add_word_positions: &BTreeSet<(u16, Vec<u8>)>,
+    word_position_docids_sorter: &mut grenad::Sorter<MergeFn>,
+) -> Result<()> {
+    puffin::profile_function!();
+
+    use itertools::merge_join_by;
+    use itertools::EitherOrBoth::{Both, Left, Right};
+
+    let mut buffer = Vec::new();
+    for eob in merge_join_by(del_word_positions.iter(), add_word_positions.iter(), |d, a| d.cmp(a))
+    {
+        buffer.clear();
+        let mut value_writer = KvWriterDelAdd::new(&mut buffer);
+        let (position, word_bytes) = match eob {
+            Left(key) => {
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                key
+            }
+            Right(key) => {
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key
+            }
+            Both(key, _) => {
+                // both values needs to be kept because it will be used in other extractors.
+                value_writer.insert(DelAdd::Deletion, document_id.to_ne_bytes()).unwrap();
+                value_writer.insert(DelAdd::Addition, document_id.to_ne_bytes()).unwrap();
+                key
+            }
+        };
+
+        key_buffer.clear();
+        key_buffer.extend_from_slice(word_bytes);
+        key_buffer.push(0);
+        key_buffer.extend_from_slice(&position.to_be_bytes());
+        word_position_docids_sorter.insert(&key_buffer, value_writer.into_inner().unwrap())?;
+    }
+
+    Ok(())
+}
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -6,7 +6,6 @@ mod extract_fid_word_count_docids;
 mod extract_geo_points;
 mod extract_vector_points;
 mod extract_word_docids;
-mod extract_word_fid_docids;
 mod extract_word_pair_proximity_docids;
 mod extract_word_position_docids;

@ -26,12 +25,11 @@ use self::extract_fid_word_count_docids::extract_fid_word_count_docids;
 use self::extract_geo_points::extract_geo_points;
 use self::extract_vector_points::extract_vector_points;
 use self::extract_word_docids::extract_word_docids;
-use self::extract_word_fid_docids::extract_word_fid_docids;
 use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
 use self::extract_word_position_docids::extract_word_position_docids;
 use super::helpers::{
-    as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
-    GrenadParameters, MergeFn, MergeableReader,
+    as_cloneable_grenad, merge_deladd_cbo_roaring_bitmaps, CursorClonableMmap, GrenadParameters,
+    MergeFn, MergeableReader,
 };
 use super::{helpers, TypedChunk};
 use crate::{FieldId, Result};
@ -65,7 +63,6 @@ pub(crate) fn data_from_obkv_documents(
                indexer,
                lmdb_writer_sx.clone(),
                vectors_field_id,
-                primary_key_id,
            )
        })
        .collect::<Result<()>>()?;
@ -94,9 +91,9 @@ pub(crate) fn data_from_obkv_documents(
    let (
        docid_word_positions_chunks,
        (
-            docid_fid_facet_numbers_chunks,
+            fid_docid_facet_numbers_chunks,
            (
-                docid_fid_facet_strings_chunks,
+                fid_docid_facet_strings_chunks,
                (
                    facet_is_null_docids_chunks,
                    (facet_is_empty_docids_chunks, facet_exists_docids_chunks),
@ -110,7 +107,7 @@ pub(crate) fn data_from_obkv_documents(
        let lmdb_writer_sx = lmdb_writer_sx.clone();
        rayon::spawn(move || {
            debug!("merge {} database", "facet-id-exists-docids");
-            match facet_exists_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+            match facet_exists_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
                Ok(reader) => {
                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetExistsDocids(reader)));
                }
@ -126,7 +123,7 @@ pub(crate) fn data_from_obkv_documents(
        let lmdb_writer_sx = lmdb_writer_sx.clone();
        rayon::spawn(move || {
            debug!("merge {} database", "facet-id-is-null-docids");
-            match facet_is_null_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+            match facet_is_null_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
                Ok(reader) => {
                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsNullDocids(reader)));
                }
@ -142,7 +139,7 @@ pub(crate) fn data_from_obkv_documents(
        let lmdb_writer_sx = lmdb_writer_sx.clone();
        rayon::spawn(move || {
            debug!("merge {} database", "facet-id-is-empty-docids");
-            match facet_is_empty_docids_chunks.merge(merge_cbo_roaring_bitmaps, &indexer) {
+            match facet_is_empty_docids_chunks.merge(merge_deladd_cbo_roaring_bitmaps, &indexer) {
                Ok(reader) => {
                    let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdFacetIsEmptyDocids(reader)));
                }
@ -158,7 +155,7 @@ pub(crate) fn data_from_obkv_documents(
        indexer,
        lmdb_writer_sx.clone(),
        extract_word_pair_proximity_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::WordPairProximityDocids,
        "word-pair-proximity-docids",
    );
@ -168,24 +165,31 @@ pub(crate) fn data_from_obkv_documents(
        indexer,
        lmdb_writer_sx.clone(),
        extract_fid_word_count_docids,
-        merge_cbo_roaring_bitmaps,
-        TypedChunk::FieldIdWordcountDocids,
+        merge_deladd_cbo_roaring_bitmaps,
+        TypedChunk::FieldIdWordCountDocids,
        "field-id-wordcount-docids",
    );

    spawn_extraction_task::<
        _,
        _,
-        Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)>,
+        Vec<(
+            grenad::Reader<BufReader<File>>,
+            grenad::Reader<BufReader<File>>,
+            grenad::Reader<BufReader<File>>,
+        )>,
    >(
        docid_word_positions_chunks.clone(),
        indexer,
        lmdb_writer_sx.clone(),
        move |doc_word_pos, indexer| extract_word_docids(doc_word_pos, indexer, &exact_attributes),
-        merge_roaring_bitmaps,
-        |(word_docids_reader, exact_word_docids_reader)| TypedChunk::WordDocids {
-            word_docids_reader,
-            exact_word_docids_reader,
+        merge_deladd_cbo_roaring_bitmaps,
+        |(word_docids_reader, exact_word_docids_reader, word_fid_docids_reader)| {
+            TypedChunk::WordDocids {
+                word_docids_reader,
+                exact_word_docids_reader,
+                word_fid_docids_reader,
+            }
        },
        "word-docids",
    );
@ -195,36 +199,27 @@ pub(crate) fn data_from_obkv_documents(
        indexer,
        lmdb_writer_sx.clone(),
        extract_word_position_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::WordPositionDocids,
        "word-position-docids",
    );
-    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        docid_word_positions_chunks,
-        indexer,
-        lmdb_writer_sx.clone(),
-        extract_word_fid_docids,
-        merge_cbo_roaring_bitmaps,
-        TypedChunk::WordFidDocids,
-        "word-fid-docids",
-    );

    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        docid_fid_facet_strings_chunks,
+        fid_docid_facet_strings_chunks,
        indexer,
        lmdb_writer_sx.clone(),
        extract_facet_string_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::FieldIdFacetStringDocids,
        "field-id-facet-string-docids",
    );

    spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
-        docid_fid_facet_numbers_chunks,
+        fid_docid_facet_numbers_chunks,
        indexer,
        lmdb_writer_sx,
        extract_facet_number_docids,
-        merge_cbo_roaring_bitmaps,
+        merge_deladd_cbo_roaring_bitmaps,
        TypedChunk::FieldIdFacetNumberDocids,
        "field-id-facet-number-docids",
    );
@ -278,7 +273,6 @@ fn send_original_documents_data(
    indexer: GrenadParameters,
    lmdb_writer_sx: Sender<Result<TypedChunk>>,
    vectors_field_id: Option<FieldId>,
-    primary_key_id: FieldId,
 ) -> Result<()> {
    let original_documents_chunk =
        original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
@ -287,12 +281,7 @@ fn send_original_documents_data(
        let documents_chunk_cloned = original_documents_chunk.clone();
        let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
        rayon::spawn(move || {
-            let result = extract_vector_points(
-                documents_chunk_cloned,
-                indexer,
-                primary_key_id,
-                vectors_field_id,
-            );
+            let result = extract_vector_points(documents_chunk_cloned, indexer, vectors_field_id);
            let _ = match result {
                Ok(vector_points) => {
                    lmdb_writer_sx_cloned.send(Ok(TypedChunk::VectorPoints(vector_points)))
@ -356,10 +345,10 @@ fn send_and_extract_flattened_documents_data(
        });
    }

-    let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
+    let (docid_word_positions_chunk, fid_docid_facet_values_chunks): (Result<_>, Result<_>) =
        rayon::join(
            || {
-                let (documents_ids, docid_word_positions_chunk, script_language_pair) =
+                let (docid_word_positions_chunk, script_language_pair) =
                    extract_docid_word_positions(
                        flattened_documents_chunk.clone(),
                        indexer,
@ -370,9 +359,6 @@ fn send_and_extract_flattened_documents_data(
                        max_positions_per_attributes,
                    )?;

-                // send documents_ids to DB writer
-                let _ = lmdb_writer_sx.send(Ok(TypedChunk::NewDocumentsIds(documents_ids)));
-
                // send docid_word_positions_chunk to DB writer
                let docid_word_positions_chunk =
                    unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? };
@ -384,8 +370,8 @@ fn send_and_extract_flattened_documents_data(
            },
            || {
                let ExtractedFacetValues {
-                    docid_fid_facet_numbers_chunk,
-                    docid_fid_facet_strings_chunk,
+                    fid_docid_facet_numbers_chunk,
+                    fid_docid_facet_strings_chunk,
                    fid_facet_is_null_docids_chunk,
                    fid_facet_is_empty_docids_chunk,
                    fid_facet_exists_docids_chunk,
@ -396,26 +382,26 @@ fn send_and_extract_flattened_documents_data(
                    geo_fields_ids,
                )?;

-                // send docid_fid_facet_numbers_chunk to DB writer
-                let docid_fid_facet_numbers_chunk =
-                    unsafe { as_cloneable_grenad(&docid_fid_facet_numbers_chunk)? };
+                // send fid_docid_facet_numbers_chunk to DB writer
+                let fid_docid_facet_numbers_chunk =
+                    unsafe { as_cloneable_grenad(&fid_docid_facet_numbers_chunk)? };

                let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetNumbers(
-                    docid_fid_facet_numbers_chunk.clone(),
+                    fid_docid_facet_numbers_chunk.clone(),
                )));

-                // send docid_fid_facet_strings_chunk to DB writer
-                let docid_fid_facet_strings_chunk =
-                    unsafe { as_cloneable_grenad(&docid_fid_facet_strings_chunk)? };
+                // send fid_docid_facet_strings_chunk to DB writer
+                let fid_docid_facet_strings_chunk =
+                    unsafe { as_cloneable_grenad(&fid_docid_facet_strings_chunk)? };

                let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetStrings(
-                    docid_fid_facet_strings_chunk.clone(),
+                    fid_docid_facet_strings_chunk.clone(),
                )));

                Ok((
-                    docid_fid_facet_numbers_chunk,
+                    fid_docid_facet_numbers_chunk,
                    (
-                        docid_fid_facet_strings_chunk,
+                        fid_docid_facet_strings_chunk,
                        (
                            fid_facet_is_null_docids_chunk,
                            (fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk),
@ -425,5 +411,5 @@ fn send_and_extract_flattened_documents_data(
            },
        );

-    Ok((docid_word_positions_chunk?, docid_fid_facet_values_chunks?))
+    Ok((docid_word_positions_chunk?, fid_docid_facet_values_chunks?))
 }
--- a/milli/src/update/index_documents/helpers/grenad_helpers.rs
+++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs
@ -1,14 +1,12 @@
 use std::borrow::Cow;
 use std::fs::File;
 use std::io::{self, BufReader, BufWriter, Seek};
-use std::time::Instant;

 use grenad::{CompressionType, Sorter};
 use heed::types::ByteSlice;
-use log::debug;

 use super::{ClonableMmap, MergeFn};
-use crate::error::InternalError;
+use crate::update::index_documents::valid_lmdb_key;
 use crate::Result;

 pub type CursorClonableMmap = io::Cursor<ClonableMmap>;
@ -47,6 +45,7 @@ pub fn create_sorter(
        builder.allow_realloc(false);
    }
    builder.sort_algorithm(sort_algorithm);
+    builder.sort_in_parallel(true);
    builder.build()
 }

@ -54,6 +53,7 @@ pub fn sorter_into_reader(
    sorter: grenad::Sorter<MergeFn>,
    indexer: GrenadParameters,
 ) -> Result<grenad::Reader<BufReader<File>>> {
+    puffin::profile_function!();
    let mut writer = create_writer(
        indexer.chunk_compression_type,
        indexer.chunk_compression_level,
@ -115,6 +115,32 @@ impl MergeableReader for Vec<(grenad::Reader<BufReader<File>>, grenad::Reader<Bu
    }
 }

+impl MergeableReader
+    for Vec<(
+        grenad::Reader<BufReader<File>>,
+        grenad::Reader<BufReader<File>>,
+        grenad::Reader<BufReader<File>>,
+    )>
+{
+    type Output = (
+        grenad::Reader<BufReader<File>>,
+        grenad::Reader<BufReader<File>>,
+        grenad::Reader<BufReader<File>>,
+    );
+
+    fn merge(self, merge_fn: MergeFn, params: &GrenadParameters) -> Result<Self::Output> {
+        let mut m1 = MergerBuilder::new(merge_fn);
+        let mut m2 = MergerBuilder::new(merge_fn);
+        let mut m3 = MergerBuilder::new(merge_fn);
+        for (r1, r2, r3) in self.into_iter() {
+            m1.push(r1)?;
+            m2.push(r2)?;
+            m3.push(r3)?;
+        }
+        Ok((m1.finish(params)?, m2.finish(params)?, m3.finish(params)?))
+    }
+}
+
 struct MergerBuilder<R>(grenad::MergerBuilder<R, MergeFn>);

 impl<R: io::Read + io::Seek> MergerBuilder<R> {
@ -195,11 +221,13 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
        );

        while let Some((document_id, obkv)) = cursor.move_on_next()? {
-            obkv_documents.insert(document_id, obkv)?;
-            current_chunk_size += document_id.len() as u64 + obkv.len() as u64;
+            if !obkv.is_empty() {
+                obkv_documents.insert(document_id, obkv)?;
+                current_chunk_size += document_id.len() as u64 + obkv.len() as u64;

-            if current_chunk_size >= documents_chunk_size as u64 {
-                return writer_into_reader(obkv_documents).map(Some);
+                if current_chunk_size >= documents_chunk_size as u64 {
+                    return writer_into_reader(obkv_documents).map(Some);
+                }
            }
        }

@ -210,45 +238,46 @@ pub fn grenad_obkv_into_chunks<R: io::Read + io::Seek>(
    Ok(std::iter::from_fn(move || transposer().transpose()))
 }

-pub fn sorter_into_lmdb_database(
-    wtxn: &mut heed::RwTxn,
-    database: heed::PolyDatabase,
+/// Write provided sorter in database using serialize_value function.
+/// merge_values function is used if an entry already exist in the database.
+pub fn write_sorter_into_database<K, V, FS, FM>(
    sorter: Sorter<MergeFn>,
-    merge: MergeFn,
-) -> Result<()> {
+    database: &heed::Database<K, V>,
+    wtxn: &mut heed::RwTxn,
+    index_is_empty: bool,
+    serialize_value: FS,
+    merge_values: FM,
+) -> Result<()>
+where
+    FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
+    FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
+{
    puffin::profile_function!();
-    debug!("Writing MTBL sorter...");
-    let before = Instant::now();
+
+    let mut buffer = Vec::new();
+    let database = database.remap_types::<ByteSlice, ByteSlice>();

    let mut merger_iter = sorter.into_stream_merger_iter()?;
-    if database.is_empty(wtxn)? {
-        let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
-        while let Some((k, v)) = merger_iter.next()? {
-            // safety: we don't keep references from inside the LMDB database.
-            unsafe { out_iter.append(k, v)? };
-        }
-    } else {
-        while let Some((k, v)) = merger_iter.next()? {
-            let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, k)?;
-            match iter.next().transpose()? {
-                Some((key, old_val)) if key == k => {
-                    let vals = vec![Cow::Borrowed(old_val), Cow::Borrowed(v)];
-                    let val = merge(k, &vals).map_err(|_| {
-                        // TODO just wrap this error?
-                        InternalError::IndexingMergingKeys { process: "get-put-merge" }
-                    })?;
-                    // safety: we don't keep references from inside the LMDB database.
-                    unsafe { iter.put_current(k, &val)? };
+    while let Some((key, value)) = merger_iter.next()? {
+        if valid_lmdb_key(key) {
+            buffer.clear();
+            let value = if index_is_empty {
+                Some(serialize_value(value, &mut buffer)?)
+            } else {
+                match database.get(wtxn, key)? {
+                    Some(prev_value) => merge_values(value, prev_value, &mut buffer)?,
+                    None => Some(serialize_value(value, &mut buffer)?),
                }
-                _ => {
-                    drop(iter);
-                    database.put::<_, ByteSlice, ByteSlice>(wtxn, k, v)?;
+            };
+            match value {
+                Some(value) => database.put(wtxn, key, value)?,
+                None => {
+                    database.delete(wtxn, key)?;
                }
            }
        }
    }

-    debug!("MTBL sorter writen in {:.02?}!", before.elapsed());
    Ok(())
 }

--- a/milli/src/update/index_documents/helpers/merge_functions.rs
+++ b/milli/src/update/index_documents/helpers/merge_functions.rs
@ -6,22 +6,12 @@ use std::result::Result as StdResult;
 use roaring::RoaringBitmap;

 use crate::heed_codec::CboRoaringBitmapCodec;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
 use crate::update::index_documents::transform::Operation;
 use crate::Result;

 pub type MergeFn = for<'a> fn(&[u8], &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>>;

-pub fn concat_u32s_array<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
-    if values.len() == 1 {
-        Ok(values[0].clone())
-    } else {
-        let capacity = values.iter().map(|v| v.len()).sum::<usize>();
-        let mut output = Vec::with_capacity(capacity);
-        values.iter().for_each(|integers| output.extend_from_slice(integers));
-        Ok(Cow::Owned(output))
-    }
-}
-
 pub fn serialize_roaring_bitmap(bitmap: &RoaringBitmap, buffer: &mut Vec<u8>) -> io::Result<()> {
    buffer.clear();
    buffer.reserve(bitmap.serialized_size());
@ -75,57 +65,123 @@ pub fn keep_latest_obkv<'a>(_key: &[u8], obkvs: &[Cow<'a, [u8]>]) -> Result<Cow<
    Ok(obkvs.last().unwrap().clone())
 }

-pub fn merge_two_obkvs(base: obkv::KvReaderU16, update: obkv::KvReaderU16, buffer: &mut Vec<u8>) {
+pub fn merge_two_del_add_obkvs(
+    base: obkv::KvReaderU16,
+    update: obkv::KvReaderU16,
+    merge_additions: bool,
+    buffer: &mut Vec<u8>,
+) {
    use itertools::merge_join_by;
    use itertools::EitherOrBoth::{Both, Left, Right};

    buffer.clear();

    let mut writer = obkv::KvWriter::new(buffer);
+    let mut value_buffer = Vec::new();
    for eob in merge_join_by(base.iter(), update.iter(), |(b, _), (u, _)| b.cmp(u)) {
        match eob {
-            Both(_, (k, v)) | Left((k, v)) | Right((k, v)) => writer.insert(k, v).unwrap(),
+            Left((k, v)) => {
+                if merge_additions {
+                    writer.insert(k, v).unwrap()
+                } else {
+                    // If merge_additions is false, recreate an obkv keeping the deletions only.
+                    value_buffer.clear();
+                    let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                    let base_reader = KvReaderDelAdd::new(v);
+
+                    if let Some(deletion) = base_reader.get(DelAdd::Deletion) {
+                        value_writer.insert(DelAdd::Deletion, deletion).unwrap();
+                        value_writer.finish().unwrap();
+                        writer.insert(k, &value_buffer).unwrap()
+                    }
+                }
+            }
+            Right((k, v)) => writer.insert(k, v).unwrap(),
+            Both((k, base), (_, update)) => {
+                // merge deletions and additions.
+                value_buffer.clear();
+                let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
+                let base_reader = KvReaderDelAdd::new(base);
+                let update_reader = KvReaderDelAdd::new(update);
+
+                // keep newest deletion.
+                if let Some(deletion) = update_reader
+                    .get(DelAdd::Deletion)
+                    .or_else(|| base_reader.get(DelAdd::Deletion))
+                {
+                    value_writer.insert(DelAdd::Deletion, deletion).unwrap();
+                }
+
+                // keep base addition only if merge_additions is true.
+                let base_addition =
+                    merge_additions.then(|| base_reader.get(DelAdd::Addition)).flatten();
+                // keep newest addition.
+                // TODO use or_else
+                if let Some(addition) = update_reader.get(DelAdd::Addition).or(base_addition) {
+                    value_writer.insert(DelAdd::Addition, addition).unwrap();
+                }
+
+                value_writer.finish().unwrap();
+                writer.insert(k, &value_buffer).unwrap()
+            }
        }
    }

    writer.finish().unwrap();
 }

-/// Merge all the obks in the order we see them.
-pub fn merge_obkvs_and_operations<'a>(
+/// Merge all the obkvs from the newest to the oldest.
+fn inner_merge_del_add_obkvs<'a>(
+    obkvs: &[Cow<'a, [u8]>],
+    merge_additions: bool,
+) -> Result<Cow<'a, [u8]>> {
+    // pop the newest operation from the list.
+    let (newest, obkvs) = obkvs.split_last().unwrap();
+    // keep the operation type for the returned value.
+    let newest_operation_type = newest[0];
+
+    // treat the newest obkv as the starting point of the merge.
+    let mut acc_operation_type = newest_operation_type;
+    let mut acc = newest[1..].to_vec();
+    let mut buffer = Vec::new();
+    // reverse iter from the most recent to the oldest.
+    for current in obkvs.iter().rev() {
+        // if in the previous iteration there was a complete deletion,
+        // stop the merge process.
+        if acc_operation_type == Operation::Deletion as u8 {
+            break;
+        }
+
+        let newest = obkv::KvReader::new(&acc);
+        let oldest = obkv::KvReader::new(&current[1..]);
+        merge_two_del_add_obkvs(oldest, newest, merge_additions, &mut buffer);
+
+        // we want the result of the merge into our accumulator.
+        std::mem::swap(&mut acc, &mut buffer);
+        acc_operation_type = current[0];
+    }
+
+    acc.insert(0, newest_operation_type);
+    Ok(Cow::from(acc))
+}
+
+/// Merge all the obkvs from the newest to the oldest.
+pub fn obkvs_merge_additions_and_deletions<'a>(
    _key: &[u8],
    obkvs: &[Cow<'a, [u8]>],
 ) -> Result<Cow<'a, [u8]>> {
-    // [add, add, delete, add, add]
-    // we can ignore everything that happened before the last delete.
-    let starting_position =
-        obkvs.iter().rposition(|obkv| obkv[0] == Operation::Deletion as u8).unwrap_or(0);
-
-    // [add, add, delete]
-    // if the last operation was a deletion then we simply return the deletion
-    if starting_position == obkvs.len() - 1 && obkvs.last().unwrap()[0] == Operation::Deletion as u8
-    {
-        return Ok(obkvs[obkvs.len() - 1].clone());
-    }
-    let mut buffer = Vec::new();
-
-    // (add, add, delete) [add, add]
-    // in the other case, no deletion will be encountered during the merge
-    let mut ret =
-        obkvs[starting_position..].iter().cloned().fold(Vec::new(), |mut acc, current| {
-            let first = obkv::KvReader::new(&acc);
-            let second = obkv::KvReader::new(&current[1..]);
-            merge_two_obkvs(first, second, &mut buffer);
-
-            // we want the result of the merge into our accumulator
-            std::mem::swap(&mut acc, &mut buffer);
-            acc
-        });
-
-    ret.insert(0, Operation::Addition as u8);
-    Ok(Cow::from(ret))
+    inner_merge_del_add_obkvs(obkvs, true)
 }

+/// Merge all the obkvs deletions from the newest to the oldest and keep only the newest additions.
+pub fn obkvs_keep_last_addition_merge_deletions<'a>(
+    _key: &[u8],
+    obkvs: &[Cow<'a, [u8]>],
+) -> Result<Cow<'a, [u8]>> {
+    inner_merge_del_add_obkvs(obkvs, false)
+}
+
+/// Do a union of all the CboRoaringBitmaps in the values.
 pub fn merge_cbo_roaring_bitmaps<'a>(
    _key: &[u8],
    values: &[Cow<'a, [u8]>],
@ -138,3 +194,52 @@ pub fn merge_cbo_roaring_bitmaps<'a>(
        Ok(Cow::from(vec))
    }
 }
+
+/// Do a union of CboRoaringBitmaps on both sides of a DelAdd obkv
+/// separately and outputs a new DelAdd with both unions.
+pub fn merge_deladd_cbo_roaring_bitmaps<'a>(
+    _key: &[u8],
+    values: &[Cow<'a, [u8]>],
+) -> Result<Cow<'a, [u8]>> {
+    if values.len() == 1 {
+        Ok(values[0].clone())
+    } else {
+        // Retrieve the bitmaps from both sides
+        let mut del_bitmaps_bytes = Vec::new();
+        let mut add_bitmaps_bytes = Vec::new();
+        for value in values {
+            let obkv = KvReaderDelAdd::new(value);
+            if let Some(bitmap_bytes) = obkv.get(DelAdd::Deletion) {
+                del_bitmaps_bytes.push(bitmap_bytes);
+            }
+            if let Some(bitmap_bytes) = obkv.get(DelAdd::Addition) {
+                add_bitmaps_bytes.push(bitmap_bytes);
+            }
+        }
+
+        let mut output_deladd_obkv = KvWriterDelAdd::memory();
+        let mut buffer = Vec::new();
+        CboRoaringBitmapCodec::merge_into(del_bitmaps_bytes, &mut buffer)?;
+        output_deladd_obkv.insert(DelAdd::Deletion, &buffer)?;
+        buffer.clear();
+        CboRoaringBitmapCodec::merge_into(add_bitmaps_bytes, &mut buffer)?;
+        output_deladd_obkv.insert(DelAdd::Addition, &buffer)?;
+        output_deladd_obkv.into_inner().map(Cow::from).map_err(Into::into)
+    }
+}
+
+/// A function that merges a DelAdd of bitmao into an already existing bitmap.
+///
+/// The first argument is the DelAdd obkv of CboRoaringBitmaps and
+/// the second one is the CboRoaringBitmap to merge into.
+pub fn merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap<'a>(
+    deladd_obkv: &[u8],
+    previous: &[u8],
+    buffer: &'a mut Vec<u8>,
+) -> Result<Option<&'a [u8]>> {
+    Ok(CboRoaringBitmapCodec::merge_deladd_into(
+        KvReaderDelAdd::new(deladd_obkv),
+        previous,
+        buffer,
+    )?)
+}
--- a/milli/src/update/index_documents/helpers/mod.rs
+++ b/milli/src/update/index_documents/helpers/mod.rs
@ -9,13 +9,14 @@ pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
 use fst::{IntoStreamer, Streamer};
 pub use grenad_helpers::{
    as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks,
-    merge_ignore_values, sorter_into_lmdb_database, sorter_into_reader, writer_into_reader,
+    merge_ignore_values, sorter_into_reader, write_sorter_into_database, writer_into_reader,
    GrenadParameters, MergeableReader,
 };
 pub use merge_functions::{
-    concat_u32s_array, keep_first, keep_latest_obkv, merge_btreeset_string,
-    merge_cbo_roaring_bitmaps, merge_obkvs_and_operations, merge_roaring_bitmaps,
-    serialize_roaring_bitmap, MergeFn,
+    keep_first, keep_latest_obkv, merge_btreeset_string, merge_cbo_roaring_bitmaps,
+    merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
+    merge_roaring_bitmaps, obkvs_keep_last_addition_merge_deletions,
+    obkvs_merge_additions_and_deletions, serialize_roaring_bitmap, MergeFn,
 };

 use crate::MAX_WORD_LENGTH;
@ -44,10 +45,6 @@ where
    Some((head, tail))
 }

-pub fn read_u32_ne_bytes(bytes: &[u8]) -> impl Iterator<Item = u32> + '_ {
-    bytes.chunks_exact(4).flat_map(TryInto::try_into).map(u32::from_ne_bytes)
-}
-
 /// Converts an fst Stream into an HashSet of Strings.
 pub fn fst_stream_into_hashset<'f, I, S>(stream: I) -> HashSet<Vec<u8>>
 where
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -20,11 +20,13 @@ use slice_group_by::GroupBy;
 use typed_chunk::{write_typed_chunk_into_index, TypedChunk};

 use self::enrich::enrich_documents_batch;
-pub use self::enrich::{extract_finite_float_from_value, DocumentId};
+pub use self::enrich::{extract_finite_float_from_value, validate_geo_from_json, DocumentId};
 pub use self::helpers::{
    as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
-    fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
-    sorter_into_lmdb_database, valid_lmdb_key, writer_into_reader, ClonableMmap, MergeFn,
+    fst_stream_into_vec, merge_btreeset_string, merge_cbo_roaring_bitmaps,
+    merge_deladd_cbo_roaring_bitmaps, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
+    merge_roaring_bitmaps, valid_lmdb_key, write_sorter_into_database, writer_into_reader,
+    ClonableMmap, MergeFn,
 };
 use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
 pub use self::transform::{Transform, TransformOutput};
@ -32,13 +34,12 @@ use crate::documents::{obkv_to_object, DocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 pub use crate::update::index_documents::helpers::CursorClonableMmap;
 use crate::update::{
-    self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
-    WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
+    IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
 };
-use crate::{Index, Result, RoaringBitmapCodec};
+use crate::{CboRoaringBitmapCodec, Index, Result};

 static MERGED_DATABASE_COUNT: usize = 7;
-static PREFIX_DATABASE_COUNT: usize = 5;
+static PREFIX_DATABASE_COUNT: usize = 4;
 static TOTAL_POSTING_DATABASE_COUNT: usize = MERGED_DATABASE_COUNT + PREFIX_DATABASE_COUNT;

 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
@ -86,7 +87,6 @@ pub struct IndexDocumentsConfig {
    pub words_positions_level_group_size: Option<NonZeroU32>,
    pub words_positions_min_level_size: Option<NonZeroU32>,
    pub update_method: IndexDocumentsMethod,
-    pub deletion_strategy: DeletionStrategy,
    pub autogenerate_docids: bool,
 }

@ -178,6 +178,7 @@ where

        // Early return when there is no document to add
        if to_delete.is_empty() {
+            // Maintains Invariant: remove documents actually always returns Ok for the inner result
            return Ok((self, Ok(0)));
        }

@ -190,14 +191,48 @@ where

        self.deleted_documents += deleted_documents;

+        // Maintains Invariant: remove documents actually always returns Ok for the inner result
        Ok((self, Ok(deleted_documents)))
    }

+    /// Removes documents from db using their internal document ids.
+    ///
+    /// # Warning
+    ///
+    /// This function is dangerous and will only work correctly if:
+    ///
+    /// - All the passed ids currently exist in the database
+    /// - No batching using the standards `remove_documents` and `add_documents` took place
+    ///
+    /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
+    pub fn remove_documents_from_db_no_batch(
+        mut self,
+        to_delete: &RoaringBitmap,
+    ) -> Result<(Self, u64)> {
+        puffin::profile_function!();
+
+        // Early return when there is no document to add
+        if to_delete.is_empty() {
+            return Ok((self, 0));
+        }
+
+        let deleted_documents = self
+            .transform
+            .as_mut()
+            .expect("Invalid document deletion state")
+            .remove_documents_from_db_no_batch(to_delete, self.wtxn, &self.should_abort)?
+            as u64;
+
+        self.deleted_documents += deleted_documents;
+
+        Ok((self, deleted_documents))
+    }
+
    #[logging_timer::time("IndexDocuments::{}")]
    pub fn execute(mut self) -> Result<DocumentAdditionResult> {
        puffin::profile_function!();

-        if self.added_documents == 0 {
+        if self.added_documents == 0 && self.deleted_documents == 0 {
            let number_of_documents = self.index.number_of_documents(self.wtxn)?;
            return Ok(DocumentAdditionResult { indexed_documents: 0, number_of_documents });
        }
@ -241,9 +276,6 @@ where
            primary_key,
            fields_ids_map,
            field_distribution,
-            new_external_documents_ids,
-            new_documents_ids,
-            replaced_documents_ids,
            documents_count,
            original_documents,
            flattened_documents,
@ -367,29 +399,12 @@ where
                let _ = lmdb_writer_sx.send(Err(e));
            }

-            // needs to be droped to avoid channel waiting lock.
+            // needs to be dropped to avoid channel waiting lock.
            drop(lmdb_writer_sx)
        });

-        // We delete the documents that this document addition replaces. This way we are
-        // able to simply insert all the documents even if they already exist in the database.
-        if !replaced_documents_ids.is_empty() {
-            let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
-            deletion_builder.strategy(self.config.deletion_strategy);
-            debug!("documents to delete {:?}", replaced_documents_ids);
-            deletion_builder.delete_documents(&replaced_documents_ids);
-            let deleted_documents_result = deletion_builder.execute_inner()?;
-            debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
-        }
-
-        let index_documents_ids = self.index.documents_ids(self.wtxn)?;
-        let index_is_empty = index_documents_ids.is_empty();
+        let index_is_empty = self.index.number_of_documents(self.wtxn)? == 0;
        let mut final_documents_ids = RoaringBitmap::new();
-        let mut word_pair_proximity_docids = None;
-        let mut word_position_docids = None;
-        let mut word_fid_docids = None;
-        let mut word_docids = None;
-        let mut exact_word_docids = None;

        let mut databases_seen = 0;
        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
@ -397,35 +412,40 @@ where
            total_databases: TOTAL_POSTING_DATABASE_COUNT,
        });

+        let mut word_position_docids = None;
+        let mut word_fid_docids = None;
+        let mut word_docids = None;
+        let mut exact_word_docids = None;
+
        for result in lmdb_writer_rx {
            if (self.should_abort)() {
                return Err(Error::InternalError(InternalError::AbortedIndexation));
            }

            let typed_chunk = match result? {
-                TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
+                TypedChunk::WordDocids {
+                    word_docids_reader,
+                    exact_word_docids_reader,
+                    word_fid_docids_reader,
+                } => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_docids_reader)? };
                    word_docids = Some(cloneable_chunk);
                    let cloneable_chunk =
                        unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
                    exact_word_docids = Some(cloneable_chunk);
-                    TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader }
-                }
-                TypedChunk::WordPairProximityDocids(chunk) => {
-                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
-                    word_pair_proximity_docids = Some(cloneable_chunk);
-                    TypedChunk::WordPairProximityDocids(chunk)
+                    let cloneable_chunk = unsafe { as_cloneable_grenad(&word_fid_docids_reader)? };
+                    word_fid_docids = Some(cloneable_chunk);
+                    TypedChunk::WordDocids {
+                        word_docids_reader,
+                        exact_word_docids_reader,
+                        word_fid_docids_reader,
+                    }
                }
                TypedChunk::WordPositionDocids(chunk) => {
                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
                    word_position_docids = Some(cloneable_chunk);
                    TypedChunk::WordPositionDocids(chunk)
                }
-                TypedChunk::WordFidDocids(chunk) => {
-                    let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
-                    word_fid_docids = Some(cloneable_chunk);
-                    TypedChunk::WordFidDocids(chunk)
-                }
                otherwise => otherwise,
            };

@ -457,25 +477,16 @@ where

        // We write the primary key field id into the main database
        self.index.put_primary_key(self.wtxn, &primary_key)?;
-
-        // We write the external documents ids into the main database.
-        let mut external_documents_ids = self.index.external_documents_ids(self.wtxn)?;
-        external_documents_ids.insert_ids(&new_external_documents_ids)?;
-        let external_documents_ids = external_documents_ids.into_static();
-        self.index.put_external_documents_ids(self.wtxn, &external_documents_ids)?;
-
-        let all_documents_ids = index_documents_ids | new_documents_ids;
-        self.index.put_documents_ids(self.wtxn, &all_documents_ids)?;
+        let number_of_documents = self.index.number_of_documents(self.wtxn)?;

        self.execute_prefix_databases(
            word_docids,
            exact_word_docids,
-            word_pair_proximity_docids,
            word_position_docids,
            word_fid_docids,
        )?;

-        Ok(all_documents_ids.len())
+        Ok(number_of_documents)
    }

    #[logging_timer::time("IndexDocuments::{}")]
@ -483,7 +494,6 @@ where
        self,
        word_docids: Option<grenad::Reader<CursorClonableMmap>>,
        exact_word_docids: Option<grenad::Reader<CursorClonableMmap>>,
-        word_pair_proximity_docids: Option<grenad::Reader<CursorClonableMmap>>,
        word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
        word_fid_docids: Option<grenad::Reader<CursorClonableMmap>>,
    ) -> Result<()>
@ -604,32 +614,6 @@ where
            total_databases: TOTAL_POSTING_DATABASE_COUNT,
        });

-        if let Some(word_pair_proximity_docids) = word_pair_proximity_docids {
-            // Run the word prefix pair proximity docids update operation.
-            PrefixWordPairsProximityDocids::new(
-                self.wtxn,
-                self.index,
-                self.indexer_config.chunk_compression_type,
-                self.indexer_config.chunk_compression_level,
-            )
-            .execute(
-                word_pair_proximity_docids,
-                &new_prefix_fst_words,
-                &common_prefix_fst_words,
-                &del_prefix_fst_words,
-            )?;
-        }
-
-        if (self.should_abort)() {
-            return Err(Error::InternalError(InternalError::AbortedIndexation));
-        }
-
-        databases_seen += 1;
-        (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
-            databases_seen,
-            total_databases: TOTAL_POSTING_DATABASE_COUNT,
-        });
-
        if let Some(word_position_docids) = word_position_docids {
            // Run the words prefix position docids update operation.
            let mut builder = WordPrefixIntegerDocids::new(
@ -687,8 +671,8 @@ where
 fn execute_word_prefix_docids(
    txn: &mut heed::RwTxn,
    reader: grenad::Reader<Cursor<ClonableMmap>>,
-    word_docids_db: Database<Str, RoaringBitmapCodec>,
-    word_prefix_docids_db: Database<Str, RoaringBitmapCodec>,
+    word_docids_db: Database<Str, CboRoaringBitmapCodec>,
+    word_prefix_docids_db: Database<Str, CboRoaringBitmapCodec>,
    indexer_config: &IndexerConfig,
    new_prefix_fst_words: &[String],
    common_prefix_fst_words: &[&[String]],
@ -709,14 +693,15 @@ fn execute_word_prefix_docids(
 #[cfg(test)]
 mod tests {
    use big_s::S;
+    use fst::IntoStreamer;
+    use heed::RwTxn;
    use maplit::hashset;

    use super::*;
    use crate::documents::documents_batch_reader_from_objects;
    use crate::index::tests::TempIndex;
    use crate::search::TermsMatchingStrategy;
-    use crate::update::DeleteDocuments;
-    use crate::{db_snap, BEU16};
+    use crate::{db_snap, Filter, Search, BEU16};

    #[test]
    fn simple_document_replacement() {
@ -807,11 +792,10 @@ mod tests {
        assert_eq!(count, 1);

        // Check that we get only one document from the database.
-        // Since the document has been deleted and re-inserted, its internal docid has been incremented to 1
-        let docs = index.documents(&rtxn, Some(1)).unwrap();
+        let docs = index.documents(&rtxn, Some(0)).unwrap();
        assert_eq!(docs.len(), 1);
        let (id, doc) = docs[0];
-        assert_eq!(id, 1);
+        assert_eq!(id, 0);

        // Check that this document is equal to the last one sent.
        let mut doc_iter = doc.iter();
@ -872,7 +856,7 @@ mod tests {
        assert_eq!(count, 3);

        // the document 0 has been deleted and reinserted with the id 3
-        let docs = index.documents(&rtxn, vec![1, 2, 3]).unwrap();
+        let docs = index.documents(&rtxn, vec![1, 2, 0]).unwrap();
        let kevin_position =
            docs.iter().position(|(_, d)| d.get(0).unwrap() == br#""updated kevin""#).unwrap();
        assert_eq!(kevin_position, 2);
@ -1018,7 +1002,6 @@ mod tests {
        assert_eq!(count, 6);

        db_snap!(index, word_docids, "updated");
-        db_snap!(index, soft_deleted_documents_ids, "updated", @"[0, 1, 4, ]");

        drop(rtxn);
    }
@ -1121,17 +1104,15 @@ mod tests {
                { "objectId": 30,  "title": "Hamlet", "_geo": { "lat": 12, "lng": 89 } }
            ]))
            .unwrap();
-        let mut wtxn = index.write_txn().unwrap();
-        assert_eq!(index.primary_key(&wtxn).unwrap(), Some("objectId"));

        // Delete not all of the documents but some of them.
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.delete_external_id("30");
-        builder.execute().unwrap();
+        index.delete_document("30");

-        let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
-        assert!(external_documents_ids.get("30").is_none());
-        wtxn.commit().unwrap();
+        let txn = index.read_txn().unwrap();
+        assert_eq!(index.primary_key(&txn).unwrap(), Some("objectId"));
+
+        let external_documents_ids = index.external_documents_ids();
+        assert!(external_documents_ids.get(&txn, "30").unwrap().is_none());

        index
            .add_documents(documents!([
@ -1140,8 +1121,8 @@ mod tests {
            .unwrap();

        let wtxn = index.write_txn().unwrap();
-        let external_documents_ids = index.external_documents_ids(&wtxn).unwrap();
-        assert!(external_documents_ids.get("30").is_some());
+        let external_documents_ids = index.external_documents_ids();
+        assert!(external_documents_ids.get(&wtxn, "30").unwrap().is_some());
        wtxn.commit().unwrap();

        index
@ -1435,8 +1416,10 @@ mod tests {
        index.add_documents(documents!({ "a" : { "b" : { "c" :  1 }}})).unwrap();

        let rtxn = index.read_txn().unwrap();
-        let external_documents_ids = index.external_documents_ids(&rtxn).unwrap();
-        assert!(external_documents_ids.get("1").is_some());
+        let all_documents_count = index.all_documents(&rtxn).unwrap().count();
+        assert_eq!(all_documents_count, 1);
+        let external_documents_ids = index.external_documents_ids();
+        assert!(external_documents_ids.get(&rtxn, "1").unwrap().is_some());
    }

    #[test]
@ -1490,12 +1473,6 @@ mod tests {
        3   2    second       second
        3   3    third        third
        "###);
-        db_snap!(index, string_faceted_documents_ids, @r###"
-        0   []
-        1   []
-        2   []
-        3   [0, 1, 2, 3, ]
-        "###);

        let rtxn = index.read_txn().unwrap();

@ -1519,12 +1496,6 @@ mod tests {

        db_snap!(index, facet_id_string_docids, @"");
        db_snap!(index, field_id_docid_facet_strings, @"");
-        db_snap!(index, string_faceted_documents_ids, @r###"
-        0   []
-        1   []
-        2   []
-        3   [0, 1, 2, 3, ]
-        "###);

        let rtxn = index.read_txn().unwrap();

@ -1551,12 +1522,6 @@ mod tests {
        3   2    second       second
        3   3    third        third
        "###);
-        db_snap!(index, string_faceted_documents_ids, @r###"
-        0   []
-        1   []
-        2   []
-        3   [0, 1, 2, 3, ]
-        "###);

        let rtxn = index.read_txn().unwrap();

@ -1719,7 +1684,7 @@ mod tests {

        let wtxn = index.read_txn().unwrap();

-        let map = index.external_documents_ids(&wtxn).unwrap().to_hash_map();
+        let map = index.external_documents_ids().to_hash_map(&wtxn).unwrap();
        let ids = map.values().collect::<HashSet<_>>();

        assert_eq!(ids.len(), map.len());
@ -2531,17 +2496,8 @@ mod tests {
        db_snap!(index, word_fid_docids, 2, @"a48d3f88db33f94bc23110a673ea49e4");
        db_snap!(index, word_position_docids, 2, @"3c9e66c6768ae2cf42b46b2c46e46a83");

-        let mut wtxn = index.write_txn().unwrap();
-
        // Delete not all of the documents but some of them.
-        let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        builder.strategy(DeletionStrategy::AlwaysHard);
-        builder.delete_external_id("0");
-        builder.delete_external_id("3");
-        let result = builder.execute().unwrap();
-        println!("{result:?}");
-
-        wtxn.commit().unwrap();
+        index.delete_documents(vec!["0".into(), "3".into()]);

        db_snap!(index, word_fid_docids, 3, @"4c2e2a1832e5802796edc1638136d933");
        db_snap!(index, word_position_docids, 3, @"74f556b91d161d997a89468b4da1cb8f");
@ -2596,8 +2552,7 @@ mod tests {
            ),
        ]
        */
-        let mut index = TempIndex::new();
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
+        let index = TempIndex::new();

        // START OF BATCH

@ -2637,8 +2592,7 @@ mod tests {
        {"id":1,"doggo":"bernese"}
        "###);
        db_snap!(index, external_documents_ids, @r###"
-        soft:
-        hard:
+        docids:
        1                        0
        "###);

@ -2683,13 +2637,10 @@ mod tests {
        "###);

        db_snap!(index, external_documents_ids, @r###"
-        soft:
-        hard:
+        docids:
        0                        1
        "###);

-        db_snap!(index, soft_deleted_documents_ids, @"[]");
-
        // BATCH 3

        println!("--- ENTERING BATCH 3");
@ -2731,4 +2682,537 @@ mod tests {
        let res = index.search(&rtxn).execute().unwrap();
        index.documents(&rtxn, res.documents_ids).unwrap();
    }
+
+    fn delete_documents<'t>(
+        wtxn: &mut RwTxn<'t, '_>,
+        index: &'t TempIndex,
+        external_ids: &[&str],
+    ) -> Vec<u32> {
+        let external_document_ids = index.external_documents_ids();
+        let ids_to_delete: Vec<u32> = external_ids
+            .iter()
+            .map(|id| external_document_ids.get(wtxn, id).unwrap().unwrap())
+            .collect();
+
+        // Delete some documents.
+        index.delete_documents_using_wtxn(
+            wtxn,
+            external_ids.iter().map(ToString::to_string).collect(),
+        );
+
+        ids_to_delete
+    }
+
+    #[test]
+    fn delete_documents_with_numbers_as_primary_key() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "id": 0, "name": "kevin", "object": { "key1": "value1", "key2": "value2" } },
+                    { "id": 1, "name": "kevina", "array": ["I", "am", "fine"] },
+                    { "id": 2, "name": "benoit", "array_of_object": [{ "wow": "amazing" }] }
+                ]),
+            )
+            .unwrap();
+
+        // delete those documents, ids are synchronous therefore 0, 1, and 2.
+        index.delete_documents_using_wtxn(&mut wtxn, vec![S("0"), S("1"), S("2")]);
+
+        wtxn.commit().unwrap();
+
+        // All these snapshots should be empty since the database was cleared
+        db_snap!(index, documents_ids);
+        db_snap!(index, word_docids);
+        db_snap!(index, word_pair_proximity_docids);
+        db_snap!(index, facet_id_exists_docids);
+
+        let rtxn = index.read_txn().unwrap();
+
+        assert!(index.field_distribution(&rtxn).unwrap().is_empty());
+    }
+
+    #[test]
+    fn delete_documents_with_strange_primary_key() {
+        let index = TempIndex::new();
+
+        index
+            .update_settings(|settings| settings.set_searchable_fields(vec!["name".to_string()]))
+            .unwrap();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "mysuperid": 0, "name": "kevin" },
+                    { "mysuperid": 1, "name": "kevina" },
+                    { "mysuperid": 2, "name": "benoit" }
+                ]),
+            )
+            .unwrap();
+        wtxn.commit().unwrap();
+
+        let mut wtxn = index.write_txn().unwrap();
+
+        // Delete not all of the documents but some of them.
+        index.delete_documents_using_wtxn(&mut wtxn, vec![S("0"), S("1")]);
+
+        wtxn.commit().unwrap();
+
+        db_snap!(index, documents_ids);
+        db_snap!(index, word_docids);
+        db_snap!(index, word_pair_proximity_docids);
+    }
+
+    #[test]
+    fn filtered_placeholder_search_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+                settings.set_filterable_fields(hashset! { S("label"), S("label2") });
+            })
+            .unwrap();
+
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "docid": "1_4",  "label": ["sign"] },
+                    { "docid": "1_5",  "label": ["letter"] },
+                    { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"] },
+                    { "docid": "1_36", "label": ["drawing","painting","pattern"] },
+                    { "docid": "1_37", "label": ["art","drawing","outdoor"] },
+                    { "docid": "1_38", "label": ["aquarium","art","drawing"] },
+                    { "docid": "1_39", "label": ["abstract"] },
+                    { "docid": "1_40", "label": ["cartoon"] },
+                    { "docid": "1_41", "label": ["art","drawing"] },
+                    { "docid": "1_42", "label": ["art","pattern"] },
+                    { "docid": "1_43", "label": ["abstract","art","drawing","pattern"] },
+                    { "docid": "1_44", "label": ["drawing"] },
+                    { "docid": "1_45", "label": ["art"] },
+                    { "docid": "1_46", "label": ["abstract","colorfulness","pattern"] },
+                    { "docid": "1_47", "label": ["abstract","pattern"] },
+                    { "docid": "1_52", "label": ["abstract","cartoon"] },
+                    { "docid": "1_57", "label": ["abstract","drawing","pattern"] },
+                    { "docid": "1_58", "label": ["abstract","art","cartoon"] },
+                    { "docid": "1_68", "label": ["design"] },
+                    { "docid": "1_69", "label": ["geometry"] },
+                    { "docid": "1_70", "label2": ["geometry", 1.2] },
+                    { "docid": "1_71", "label2": ["design", 2.2] },
+                    { "docid": "1_72", "label2": ["geometry", 1.2] }
+                ]),
+            )
+            .unwrap();
+
+        delete_documents(&mut wtxn, &index, &["1_4", "1_70", "1_72"]);
+
+        // Placeholder search with filter
+        let filter = Filter::from_str("label = sign").unwrap().unwrap();
+        let results = index.search(&wtxn).filter(filter).execute().unwrap();
+        assert!(results.documents_ids.is_empty());
+
+        wtxn.commit().unwrap();
+
+        db_snap!(index, word_docids);
+        db_snap!(index, facet_id_f64_docids);
+        db_snap!(index, word_pair_proximity_docids);
+        db_snap!(index, facet_id_exists_docids);
+        db_snap!(index, facet_id_string_docids);
+    }
+
+    #[test]
+    fn placeholder_search_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+            })
+            .unwrap();
+
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "docid": "1_4",  "label": ["sign"] },
+                    { "docid": "1_5",  "label": ["letter"] },
+                    { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"] },
+                    { "docid": "1_36", "label": ["drawing","painting","pattern"] },
+                    { "docid": "1_37", "label": ["art","drawing","outdoor"] },
+                    { "docid": "1_38", "label": ["aquarium","art","drawing"] },
+                    { "docid": "1_39", "label": ["abstract"] },
+                    { "docid": "1_40", "label": ["cartoon"] },
+                    { "docid": "1_41", "label": ["art","drawing"] },
+                    { "docid": "1_42", "label": ["art","pattern"] },
+                    { "docid": "1_43", "label": ["abstract","art","drawing","pattern"] },
+                    { "docid": "1_44", "label": ["drawing"] },
+                    { "docid": "1_45", "label": ["art"] },
+                    { "docid": "1_46", "label": ["abstract","colorfulness","pattern"] },
+                    { "docid": "1_47", "label": ["abstract","pattern"] },
+                    { "docid": "1_52", "label": ["abstract","cartoon"] },
+                    { "docid": "1_57", "label": ["abstract","drawing","pattern"] },
+                    { "docid": "1_58", "label": ["abstract","art","cartoon"] },
+                    { "docid": "1_68", "label": ["design"] },
+                    { "docid": "1_69", "label": ["geometry"] },
+                    { "docid": "1_70", "label2": ["geometry", 1.2] },
+                    { "docid": "1_71", "label2": ["design", 2.2] },
+                    { "docid": "1_72", "label2": ["geometry", 1.2] }
+                ]),
+            )
+            .unwrap();
+
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &["1_4"]);
+
+        // Placeholder search
+        let results = index.search(&wtxn).execute().unwrap();
+        assert!(!results.documents_ids.is_empty());
+        for id in results.documents_ids.iter() {
+            assert!(
+                !deleted_internal_ids.contains(id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+
+        wtxn.commit().unwrap();
+    }
+
+    #[test]
+    fn search_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+            })
+            .unwrap();
+
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "docid": "1_4",  "label": ["sign"] },
+                    { "docid": "1_5",  "label": ["letter"] },
+                    { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"] },
+                    { "docid": "1_36", "label": ["drawing","painting","pattern"] },
+                    { "docid": "1_37", "label": ["art","drawing","outdoor"] },
+                    { "docid": "1_38", "label": ["aquarium","art","drawing"] },
+                    { "docid": "1_39", "label": ["abstract"] },
+                    { "docid": "1_40", "label": ["cartoon"] },
+                    { "docid": "1_41", "label": ["art","drawing"] },
+                    { "docid": "1_42", "label": ["art","pattern"] },
+                    { "docid": "1_43", "label": ["abstract","art","drawing","pattern"] },
+                    { "docid": "1_44", "label": ["drawing"] },
+                    { "docid": "1_45", "label": ["art"] },
+                    { "docid": "1_46", "label": ["abstract","colorfulness","pattern"] },
+                    { "docid": "1_47", "label": ["abstract","pattern"] },
+                    { "docid": "1_52", "label": ["abstract","cartoon"] },
+                    { "docid": "1_57", "label": ["abstract","drawing","pattern"] },
+                    { "docid": "1_58", "label": ["abstract","art","cartoon"] },
+                    { "docid": "1_68", "label": ["design"] },
+                    { "docid": "1_69", "label": ["geometry"] },
+                    { "docid": "1_70", "label2": ["geometry", 1.2] },
+                    { "docid": "1_71", "label2": ["design", 2.2] },
+                    { "docid": "1_72", "label2": ["geometry", 1.2] }
+                ]),
+            )
+            .unwrap();
+
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &["1_7", "1_52"]);
+
+        // search for abstract
+        let results = index.search(&wtxn).query("abstract").execute().unwrap();
+        assert!(!results.documents_ids.is_empty());
+        for id in results.documents_ids.iter() {
+            assert!(
+                !deleted_internal_ids.contains(id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+
+        wtxn.commit().unwrap();
+    }
+
+    #[test]
+    fn geo_filtered_placeholder_search_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("id"));
+                settings.set_filterable_fields(hashset!(S("_geo")));
+                settings.set_sortable_fields(hashset!(S("_geo")));
+            })
+            .unwrap();
+
+        index.add_documents_using_wtxn(&mut wtxn, documents!([
+            { "id": "1",  "city": "Lille",             "_geo": { "lat": 50.6299, "lng": 3.0569 } },
+            { "id": "2",  "city": "Mons-en-Barœul",    "_geo": { "lat": 50.6415, "lng": 3.1106 } },
+            { "id": "3",  "city": "Hellemmes",         "_geo": { "lat": 50.6312, "lng": 3.1106 } },
+            { "id": "4",  "city": "Villeneuve-d'Ascq", "_geo": { "lat": 50.6224, "lng": 3.1476 } },
+            { "id": "5",  "city": "Hem",               "_geo": { "lat": 50.6552, "lng": 3.1897 } },
+            { "id": "6",  "city": "Roubaix",           "_geo": { "lat": 50.6924, "lng": 3.1763 } },
+            { "id": "7",  "city": "Tourcoing",         "_geo": { "lat": 50.7263, "lng": 3.1541 } },
+            { "id": "8",  "city": "Mouscron",          "_geo": { "lat": 50.7453, "lng": 3.2206 } },
+            { "id": "9",  "city": "Tournai",           "_geo": { "lat": 50.6053, "lng": 3.3758 } },
+            { "id": "10", "city": "Ghent",             "_geo": { "lat": 51.0537, "lng": 3.6957 } },
+            { "id": "11", "city": "Brussels",          "_geo": { "lat": 50.8466, "lng": 4.3370 } },
+            { "id": "12", "city": "Charleroi",         "_geo": { "lat": 50.4095, "lng": 4.4347 } },
+            { "id": "13", "city": "Mons",              "_geo": { "lat": 50.4502, "lng": 3.9623 } },
+            { "id": "14", "city": "Valenciennes",      "_geo": { "lat": 50.3518, "lng": 3.5326 } },
+            { "id": "15", "city": "Arras",             "_geo": { "lat": 50.2844, "lng": 2.7637 } },
+            { "id": "16", "city": "Cambrai",           "_geo": { "lat": 50.1793, "lng": 3.2189 } },
+            { "id": "17", "city": "Bapaume",           "_geo": { "lat": 50.1112, "lng": 2.8547 } },
+            { "id": "18", "city": "Amiens",            "_geo": { "lat": 49.9314, "lng": 2.2710 } },
+            { "id": "19", "city": "Compiègne",         "_geo": { "lat": 49.4449, "lng": 2.7913 } },
+            { "id": "20", "city": "Paris",             "_geo": { "lat": 48.9021, "lng": 2.3708 } }
+        ])).unwrap();
+
+        let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"];
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &external_ids_to_delete);
+
+        // Placeholder search with geo filter
+        let filter = Filter::from_str("_geoRadius(50.6924, 3.1763, 20000)").unwrap().unwrap();
+        let results = index.search(&wtxn).filter(filter).execute().unwrap();
+        assert!(!results.documents_ids.is_empty());
+        for id in results.documents_ids.iter() {
+            assert!(
+                !deleted_internal_ids.contains(id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+
+        wtxn.commit().unwrap();
+
+        db_snap!(index, facet_id_f64_docids);
+        db_snap!(index, facet_id_string_docids);
+    }
+
+    #[test]
+    fn get_documents_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+            })
+            .unwrap();
+
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                    { "docid": "1_4",  "label": ["sign"] },
+                    { "docid": "1_5",  "label": ["letter"] },
+                    { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"] },
+                    { "docid": "1_36", "label": ["drawing","painting","pattern"] },
+                    { "docid": "1_37", "label": ["art","drawing","outdoor"] },
+                    { "docid": "1_38", "label": ["aquarium","art","drawing"] },
+                    { "docid": "1_39", "label": ["abstract"] },
+                    { "docid": "1_40", "label": ["cartoon"] },
+                    { "docid": "1_41", "label": ["art","drawing"] },
+                    { "docid": "1_42", "label": ["art","pattern"] },
+                    { "docid": "1_43", "label": ["abstract","art","drawing","pattern"] },
+                    { "docid": "1_44", "label": ["drawing"] },
+                    { "docid": "1_45", "label": ["art"] },
+                    { "docid": "1_46", "label": ["abstract","colorfulness","pattern"] },
+                    { "docid": "1_47", "label": ["abstract","pattern"] },
+                    { "docid": "1_52", "label": ["abstract","cartoon"] },
+                    { "docid": "1_57", "label": ["abstract","drawing","pattern"] },
+                    { "docid": "1_58", "label": ["abstract","art","cartoon"] },
+                    { "docid": "1_68", "label": ["design"] },
+                    { "docid": "1_69", "label": ["geometry"] },
+                    { "docid": "1_70", "label2": ["geometry", 1.2] },
+                    { "docid": "1_71", "label2": ["design", 2.2] },
+                    { "docid": "1_72", "label2": ["geometry", 1.2] }
+                ]),
+            )
+            .unwrap();
+
+        let deleted_external_ids = ["1_7", "1_52"];
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &deleted_external_ids);
+
+        // list all documents
+        let results = index.all_documents(&wtxn).unwrap();
+        for result in results {
+            let (id, _) = result.unwrap();
+            assert!(
+                !deleted_internal_ids.contains(&id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+
+        // list internal document ids
+        let results = index.documents_ids(&wtxn).unwrap();
+        for id in results {
+            assert!(
+                !deleted_internal_ids.contains(&id),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+        wtxn.commit().unwrap();
+
+        let rtxn = index.read_txn().unwrap();
+
+        // get internal docids from deleted external document ids
+        let results = index.external_documents_ids();
+        for id in deleted_external_ids {
+            assert!(
+                results.get(&rtxn, id).unwrap().is_none(),
+                "The document {} was supposed to be deleted",
+                id
+            );
+        }
+        drop(rtxn);
+    }
+
+    #[test]
+    fn stats_should_not_return_deleted_documents() {
+        let index = TempIndex::new();
+
+        let mut wtxn = index.write_txn().unwrap();
+
+        index
+            .update_settings_using_wtxn(&mut wtxn, |settings| {
+                settings.set_primary_key(S("docid"));
+            })
+            .unwrap();
+
+        index.add_documents_using_wtxn(&mut wtxn, documents!([
+            { "docid": "1_4",  "label": ["sign"]},
+            { "docid": "1_5",  "label": ["letter"]},
+            { "docid": "1_7",  "label": ["abstract","cartoon","design","pattern"], "title": "Mickey Mouse"},
+            { "docid": "1_36", "label": ["drawing","painting","pattern"]},
+            { "docid": "1_37", "label": ["art","drawing","outdoor"]},
+            { "docid": "1_38", "label": ["aquarium","art","drawing"], "title": "Nemo"},
+            { "docid": "1_39", "label": ["abstract"]},
+            { "docid": "1_40", "label": ["cartoon"]},
+            { "docid": "1_41", "label": ["art","drawing"]},
+            { "docid": "1_42", "label": ["art","pattern"]},
+            { "docid": "1_43", "label": ["abstract","art","drawing","pattern"], "number": 32i32},
+            { "docid": "1_44", "label": ["drawing"], "number": 44i32},
+            { "docid": "1_45", "label": ["art"]},
+            { "docid": "1_46", "label": ["abstract","colorfulness","pattern"]},
+            { "docid": "1_47", "label": ["abstract","pattern"]},
+            { "docid": "1_52", "label": ["abstract","cartoon"]},
+            { "docid": "1_57", "label": ["abstract","drawing","pattern"]},
+            { "docid": "1_58", "label": ["abstract","art","cartoon"]},
+            { "docid": "1_68", "label": ["design"]},
+            { "docid": "1_69", "label": ["geometry"]}
+        ])).unwrap();
+
+        delete_documents(&mut wtxn, &index, &["1_7", "1_52"]);
+
+        // count internal documents
+        let results = index.number_of_documents(&wtxn).unwrap();
+        assert_eq!(18, results);
+
+        // count field distribution
+        let results = index.field_distribution(&wtxn).unwrap();
+        assert_eq!(Some(&18), results.get("label"));
+        assert_eq!(Some(&1), results.get("title"));
+        assert_eq!(Some(&2), results.get("number"));
+
+        wtxn.commit().unwrap();
+    }
+
+    #[test]
+    fn stored_detected_script_and_language_should_not_return_deleted_documents() {
+        use charabia::{Language, Script};
+        let index = TempIndex::new();
+        let mut wtxn = index.write_txn().unwrap();
+        index
+            .add_documents_using_wtxn(
+                &mut wtxn,
+                documents!([
+                { "id": "0", "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
+                { "id": "1", "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
+                { "id": "2", "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
+                { "id": "3", "title": "関西国際空港限定トートバッグ すもももももももものうち" },
+                { "id": "4", "title": "ภาษาไทยง่ายนิดเดียว" },
+                { "id": "5", "title": "The quick 在尊嚴和權利上一律平等。" },
+            ]))
+            .unwrap();
+
+        let key_cmn = (Script::Cj, Language::Cmn);
+        let cj_cmn_docs =
+            index.script_language_documents_ids(&wtxn, &key_cmn).unwrap().unwrap_or_default();
+        let mut expected_cj_cmn_docids = RoaringBitmap::new();
+        expected_cj_cmn_docids.push(1);
+        expected_cj_cmn_docids.push(5);
+        assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
+
+        delete_documents(&mut wtxn, &index, &["1"]);
+        wtxn.commit().unwrap();
+
+        let rtxn = index.read_txn().unwrap();
+        let cj_cmn_docs =
+            index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap_or_default();
+        let mut expected_cj_cmn_docids = RoaringBitmap::new();
+        expected_cj_cmn_docids.push(5);
+        assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
+    }
+
+    #[test]
+    fn delete_words_exact_attributes() {
+        let index = TempIndex::new();
+
+        index
+            .update_settings(|settings| {
+                settings.set_primary_key(S("id"));
+                settings.set_searchable_fields(vec![S("text"), S("exact")]);
+                settings.set_exact_attributes(vec![S("exact")].into_iter().collect());
+            })
+            .unwrap();
+
+        index
+            .add_documents(documents!([
+                { "id": 0, "text": "hello" },
+                { "id": 1, "exact": "hello"}
+            ]))
+            .unwrap();
+        db_snap!(index, word_docids, 1, @r###"
+        hello            [0, ]
+        "###);
+        db_snap!(index, exact_word_docids, 1, @r###"
+        hello            [1, ]
+        "###);
+        db_snap!(index, words_fst, 1, @"300000000000000001084cfcfc2ce1000000016000000090ea47f");
+
+        let mut wtxn = index.write_txn().unwrap();
+        let deleted_internal_ids = delete_documents(&mut wtxn, &index, &["1"]);
+        wtxn.commit().unwrap();
+
+        db_snap!(index, word_docids, 2, @r###"
+        hello            [0, ]
+        "###);
+        db_snap!(index, exact_word_docids, 2, @"");
+        db_snap!(index, words_fst, 2, @"300000000000000001084cfcfc2ce1000000016000000090ea47f");
+
+        insta::assert_snapshot!(format!("{deleted_internal_ids:?}"), @"[1]");
+        let txn = index.read_txn().unwrap();
+        let words = index.words_fst(&txn).unwrap().into_stream().into_strs().unwrap();
+        insta::assert_snapshot!(format!("{words:?}"), @r###"["hello"]"###);
+
+        let mut s = Search::new(&txn, &index);
+        s.query("hello");
+        let crate::SearchResult { documents_ids, .. } = s.execute().unwrap();
+        insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+    }
 }
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/documents_ids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/documents_ids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+[]
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/facet_id_exists_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/facet_id_exists_docids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_docids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_pair_proximity_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_numbers_as_primary_key/word_pair_proximity_docids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/documents_ids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/documents_ids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+[2, ]
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_docids.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+benoit           [2, ]
+
--- a/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_pair_proximity_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/delete_documents_with_strange_primary_key/word_pair_proximity_docids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_exists_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_exists_docids.snap
@ -1,5 +1,5 @@
 ---
-source: milli/src/update/delete_documents.rs
+source: milli/src/update/index_documents/mod.rs
 ---
 1   [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ]
 2   [21, ]
--- a/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/filtered_placeholder_search_should_not_return_deleted_documents/facet_id_f64_docids.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+2   0  2.2    1  [21, ]
+
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_string_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_string_docids.snap
@ -1,5 +1,5 @@
 ---
-source: milli/src/update/delete_documents.rs
+source: milli/src/update/index_documents/mod.rs
 ---
 1   0  abstract     1  [2, 6, 10, 13, 14, 15, 16, 17, ]
 1   0  aquarium     1  [5, ]
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_docids.snap
@ -1,5 +1,5 @@
 ---
-source: milli/src/update/delete_documents.rs
+source: milli/src/update/index_documents/mod.rs
 ---
 1                [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, ]
 2                [21, ]
--- a/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/filtered_placeholder_search_should_not_return_deleted_documents/always_hard/word_pair_proximity_docids.snap
@ -1,5 +1,5 @@
 ---
-source: milli/src/update/delete_documents.rs
+source: milli/src/update/index_documents/mod.rs
 ---
 1  1                36               [3, ]
 1  1                37               [4, ]
--- a/milli/src/update/snapshots/delete_documents.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_f64_docids.snap
+++ b/milli/src/update/snapshots/delete_documents.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/always_hard/facet_id_f64_docids.snap
@ -1,5 +1,5 @@
 ---
-source: milli/src/update/delete_documents.rs
+source: milli/src/update/index_documents/mod.rs
 ---
 3   0  48.9021 1  [19, ]
 3   0  49.9314 1  [17, ]
--- a/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/geo_filtered_placeholder_search_should_not_return_deleted_documents/facet_id_string_docids.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/index_documents/mod.rs
+---
+
--- a/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap
+++ b/milli/src/update/index_documents/snapshots/mod.rs/simple_documents_replace/updated/word_docids.snap
@ -1,60 +1,56 @@
 ---
 source: milli/src/update/index_documents/mod.rs
 ---
-0                [1, 7, ]
+0                [1, ]
 1                [2, ]
-10               [1, 7, ]
-12               [0, 8, ]
+10               [1, ]
+12               [0, ]
 1344             [3, ]
-1813             [8, ]
-2                [0, 8, ]
+1813             [0, ]
+2                [0, ]
 23               [5, ]
 25               [2, ]
-3                [0, 8, ]
+3                [0, ]
 35               [5, ]
-4                [4, 6, ]
-42               [0, 5, 8, ]
-456              [1, 7, ]
-5                [0, 8, ]
+4                [4, ]
+42               [0, 5, ]
+456              [1, ]
+5                [0, ]
 99               [2, ]
 adams            [5, ]
-adventure        [1, 7, ]
+adventure        [1, ]
 alice            [2, ]
-and              [0, 4, 6, 8, ]
-antoine          [1, 7, ]
-austen           [8, ]
-austin           [0, ]
-blood            [4, 6, ]
+and              [0, 4, ]
+antoine          [1, ]
+austen           [0, ]
+blood            [4, ]
 carroll          [2, ]
-de               [1, 7, ]
+de               [1, ]
 douglas          [5, ]
-exupery          [1, 7, ]
-fantasy          [2, 3, 4, 6, ]
+exupery          [1, ]
+fantasy          [2, 3, 4, ]
 galaxy           [5, ]
 guide            [5, ]
-half             [4, 6, ]
-harry            [4, 6, ]
+half             [4, ]
+harry            [4, ]
 hitchhiker       [5, ]
 hobbit           [3, ]
 in               [2, ]
-j                [3, 4, 6, 8, ]
-jane             [0, ]
-k                [4, 6, ]
-le               [1, ]
+j                [0, 3, 4, ]
+k                [4, ]
 lewis            [2, ]
-little           [7, ]
-petit            [1, ]
-potter           [4, 6, ]
-prejudice        [0, 8, ]
-pride            [0, 8, ]
-prince           [1, 4, 7, ]
-princess         [6, ]
+little           [1, ]
+potter           [4, ]
+prejudice        [0, ]
+pride            [0, ]
+prince           [1, ]
+princess         [4, ]
 r                [3, ]
-romance          [0, 8, ]
-rowling          [4, 6, ]
+romance          [0, ]
+rowling          [4, ]
 s                [5, ]
-saint            [1, 7, ]
-the              [3, 4, 5, 6, 7, ]
+saint            [1, ]
+the              [1, 3, 4, 5, ]
 to               [5, ]
 tolkien          [3, ]
 wonderland       [2, ]
--- a/milli/src/update/index_documents/transform.rs
+++ b/milli/src/update/index_documents/transform.rs
@ -1,5 +1,6 @@
 use std::borrow::Cow;
-use std::collections::hash_map::Entry;
+use std::collections::btree_map::Entry as BEntry;
+use std::collections::hash_map::Entry as HEntry;
 use std::collections::{HashMap, HashSet};
 use std::fs::File;
 use std::io::{Read, Seek};
@ -7,18 +8,21 @@ use std::io::{Read, Seek};
 use fxhash::FxHashMap;
 use heed::RoTxn;
 use itertools::Itertools;
-use obkv::{KvReader, KvWriter};
+use obkv::{KvReader, KvReaderU16, KvWriter};
 use roaring::RoaringBitmap;
 use serde_json::Value;
 use smartstring::SmartString;

 use super::helpers::{
-    create_sorter, create_writer, keep_latest_obkv, merge_obkvs_and_operations, MergeFn,
+    create_sorter, create_writer, keep_first, obkvs_keep_last_addition_merge_deletions,
+    obkvs_merge_additions_and_deletions, sorter_into_reader, MergeFn,
 };
 use super::{IndexDocumentsMethod, IndexerConfig};
 use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
 use crate::error::{Error, InternalError, UserError};
 use crate::index::{db_name, main_key};
+use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
+use crate::update::index_documents::GrenadParameters;
 use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
 use crate::{
    FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, BEU32,
@ -28,9 +32,6 @@ pub struct TransformOutput {
    pub primary_key: String,
    pub fields_ids_map: FieldsIdsMap,
    pub field_distribution: FieldDistribution,
-    pub new_external_documents_ids: fst::Map<Cow<'static, [u8]>>,
-    pub new_documents_ids: RoaringBitmap,
-    pub replaced_documents_ids: RoaringBitmap,
    pub documents_count: usize,
    pub original_documents: File,
    pub flattened_documents: File,
@ -106,8 +107,8 @@ impl<'a, 'i> Transform<'a, 'i> {
        // We must choose the appropriate merge function for when two or more documents
        // with the same user id must be merged or fully replaced in the same batch.
        let merge_function = match index_documents_method {
-            IndexDocumentsMethod::ReplaceDocuments => keep_latest_obkv,
-            IndexDocumentsMethod::UpdateDocuments => merge_obkvs_and_operations,
+            IndexDocumentsMethod::ReplaceDocuments => obkvs_keep_last_addition_merge_deletions,
+            IndexDocumentsMethod::UpdateDocuments => obkvs_merge_additions_and_deletions,
        };

        // We initialize the sorter with the user indexing settings.
@ -130,17 +131,13 @@ impl<'a, 'i> Transform<'a, 'i> {
            indexer_settings.max_memory.map(|mem| mem / 2),
        );
        let documents_ids = index.documents_ids(wtxn)?;
-        let soft_deleted_documents_ids = index.soft_deleted_documents_ids(wtxn)?;

        Ok(Transform {
            index,
            fields_ids_map: index.fields_ids_map(wtxn)?,
            indexer_settings,
            autogenerate_docids,
-            available_documents_ids: AvailableDocumentsIds::from_documents_ids(
-                &documents_ids,
-                &soft_deleted_documents_ids,
-            ),
+            available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids),
            original_sorter,
            flattened_sorter,
            index_documents_method,
@ -151,6 +148,7 @@ impl<'a, 'i> Transform<'a, 'i> {
        })
    }

+    #[logging_timer::time]
    pub fn read_documents<R, FP, FA>(
        &mut self,
        reader: EnrichedDocumentsBatchReader<R>,
@ -163,8 +161,10 @@ impl<'a, 'i> Transform<'a, 'i> {
        FP: Fn(UpdateIndexingStep) + Sync,
        FA: Fn() -> bool + Sync,
    {
+        puffin::profile_function!();
+
        let (mut cursor, fields_index) = reader.into_cursor_and_fields_index();
-        let external_documents_ids = self.index.external_documents_ids(wtxn)?;
+        let external_documents_ids = self.index.external_documents_ids();
        let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?;

        let primary_key = cursor.primary_key().to_string();
@ -172,7 +172,8 @@ impl<'a, 'i> Transform<'a, 'i> {
            self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?;

        let mut obkv_buffer = Vec::new();
-        let mut document_sorter_buffer = Vec::new();
+        let mut document_sorter_value_buffer = Vec::new();
+        let mut document_sorter_key_buffer = Vec::new();
        let mut documents_count = 0;
        let mut docid_buffer: Vec<u8> = Vec::new();
        let mut field_buffer: Vec<(u16, Cow<[u8]>)> = Vec::new();
@ -213,29 +214,30 @@ impl<'a, 'i> Transform<'a, 'i> {
            field_buffer_cache.sort_unstable_by(|(f1, _), (f2, _)| f1.cmp(f2));

            // Build the new obkv document.
-            let mut writer = obkv::KvWriter::new(&mut obkv_buffer);
+            let mut writer = KvWriter::new(&mut obkv_buffer);
            for (k, v) in field_buffer_cache.iter() {
                writer.insert(*k, v)?;
            }

            let mut original_docid = None;
-
            let docid = match self.new_external_documents_ids_builder.entry((*external_id).into()) {
-                Entry::Occupied(entry) => *entry.get() as u32,
-                Entry::Vacant(entry) => {
-                    // If the document was already in the db we mark it as a replaced document.
-                    // It'll be deleted later.
-                    if let Some(docid) = external_documents_ids.get(entry.key()) {
-                        // If it was already in the list of replaced documents it means it was deleted
-                        // by the remove_document method. We should starts as if it never existed.
-                        if self.replaced_documents_ids.insert(docid) {
-                            original_docid = Some(docid);
+                HEntry::Occupied(entry) => *entry.get() as u32,
+                HEntry::Vacant(entry) => {
+                    let docid = match external_documents_ids.get(wtxn, entry.key())? {
+                        Some(docid) => {
+                            // If it was already in the list of replaced documents it means it was deleted
+                            // by the remove_document method. We should starts as if it never existed.
+                            if self.replaced_documents_ids.insert(docid) {
+                                original_docid = Some(docid);
+                            }
+
+                            docid
                        }
-                    }
-                    let docid = self
-                        .available_documents_ids
-                        .next()
-                        .ok_or(UserError::DocumentLimitReached)?;
+                        None => self
+                            .available_documents_ids
+                            .next()
+                            .ok_or(UserError::DocumentLimitReached)?,
+                    };
                    entry.insert(docid as u64);
                    docid
                }
@ -263,47 +265,68 @@ impl<'a, 'i> Transform<'a, 'i> {
                    skip_insertion = true;
                } else {
                    // we associate the base document with the new key, everything will get merged later.
-                    document_sorter_buffer.clear();
-                    document_sorter_buffer.push(Operation::Addition as u8);
-                    document_sorter_buffer.extend_from_slice(base_obkv);
-                    self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
-                    match self.flatten_from_fields_ids_map(KvReader::new(base_obkv))? {
-                        Some(flattened_obkv) => {
-                            // we recreate our buffer with the flattened documents
-                            document_sorter_buffer.clear();
-                            document_sorter_buffer.push(Operation::Addition as u8);
-                            document_sorter_buffer.extend_from_slice(&flattened_obkv);
-                            self.flattened_sorter
-                                .insert(docid.to_be_bytes(), &document_sorter_buffer)?
+                    let deladd_operation = match self.index_documents_method {
+                        IndexDocumentsMethod::UpdateDocuments => {
+                            DelAddOperation::DeletionAndAddition
                        }
-                        None => self
-                            .flattened_sorter
-                            .insert(docid.to_be_bytes(), &document_sorter_buffer)?,
+                        IndexDocumentsMethod::ReplaceDocuments => DelAddOperation::Deletion,
+                    };
+                    document_sorter_key_buffer.clear();
+                    document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
+                    document_sorter_key_buffer.extend_from_slice(external_id.as_bytes());
+                    document_sorter_value_buffer.clear();
+                    document_sorter_value_buffer.push(Operation::Addition as u8);
+                    into_del_add_obkv(
+                        KvReaderU16::new(base_obkv),
+                        deladd_operation,
+                        &mut document_sorter_value_buffer,
+                    )?;
+                    self.original_sorter
+                        .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
+                    let base_obkv = KvReader::new(base_obkv);
+                    if let Some(flattened_obkv) = self.flatten_from_fields_ids_map(base_obkv)? {
+                        // we recreate our buffer with the flattened documents
+                        document_sorter_value_buffer.clear();
+                        document_sorter_value_buffer.push(Operation::Addition as u8);
+                        into_del_add_obkv(
+                            KvReaderU16::new(&flattened_obkv),
+                            deladd_operation,
+                            &mut document_sorter_value_buffer,
+                        )?;
                    }
+                    self.flattened_sorter
+                        .insert(docid.to_be_bytes(), &document_sorter_value_buffer)?;
                }
            }

            if !skip_insertion {
                self.new_documents_ids.insert(docid);

-                document_sorter_buffer.clear();
-                document_sorter_buffer.push(Operation::Addition as u8);
-                document_sorter_buffer.extend_from_slice(&obkv_buffer);
+                document_sorter_key_buffer.clear();
+                document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
+                document_sorter_key_buffer.extend_from_slice(external_id.as_bytes());
+                document_sorter_value_buffer.clear();
+                document_sorter_value_buffer.push(Operation::Addition as u8);
+                into_del_add_obkv(
+                    KvReaderU16::new(&obkv_buffer),
+                    DelAddOperation::Addition,
+                    &mut document_sorter_value_buffer,
+                )?;
                // We use the extracted/generated user id as the key for this document.
-                self.original_sorter.insert(docid.to_be_bytes(), &document_sorter_buffer)?;
+                self.original_sorter
+                    .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;

-                match self.flatten_from_fields_ids_map(KvReader::new(&obkv_buffer))? {
-                    Some(flattened_obkv) => {
-                        document_sorter_buffer.clear();
-                        document_sorter_buffer.push(Operation::Addition as u8);
-                        document_sorter_buffer.extend_from_slice(&flattened_obkv);
-                        self.flattened_sorter
-                            .insert(docid.to_be_bytes(), &document_sorter_buffer)?
-                    }
-                    None => self
-                        .flattened_sorter
-                        .insert(docid.to_be_bytes(), &document_sorter_buffer)?,
+                let flattened_obkv = KvReader::new(&obkv_buffer);
+                if let Some(obkv) = self.flatten_from_fields_ids_map(flattened_obkv)? {
+                    document_sorter_value_buffer.clear();
+                    document_sorter_value_buffer.push(Operation::Addition as u8);
+                    into_del_add_obkv(
+                        KvReaderU16::new(&obkv),
+                        DelAddOperation::Addition,
+                        &mut document_sorter_value_buffer,
+                    )?
                }
+                self.flattened_sorter.insert(docid.to_be_bytes(), &document_sorter_value_buffer)?;
            }
            documents_count += 1;

@ -338,6 +361,7 @@ impl<'a, 'i> Transform<'a, 'i> {
    /// - If the document to remove was inserted by the `read_documents` method before but was NOT present in the db,
    ///   it's added into the grenad to ensure we don't insert it + removed from the list of new documents ids.
    /// - If the document to remove was not present in either the db or the transform we do nothing.
+    #[logging_timer::time]
    pub fn remove_documents<FA>(
        &mut self,
        mut to_remove: Vec<String>,
@ -347,54 +371,176 @@ impl<'a, 'i> Transform<'a, 'i> {
    where
        FA: Fn() -> bool + Sync,
    {
+        puffin::profile_function!();
+
        // there may be duplicates in the documents to remove.
        to_remove.sort_unstable();
        to_remove.dedup();

-        let external_documents_ids = self.index.external_documents_ids(wtxn)?;
+        let external_documents_ids = self.index.external_documents_ids();

        let mut documents_deleted = 0;
+        let mut document_sorter_value_buffer = Vec::new();
+        let mut document_sorter_key_buffer = Vec::new();
        for to_remove in to_remove {
            if should_abort() {
                return Err(Error::InternalError(InternalError::AbortedIndexation));
            }

-            match self.new_external_documents_ids_builder.entry((*to_remove).into()) {
-                // if the document was added in a previous iteration of the transform we make it as deleted in the sorters.
-                Entry::Occupied(entry) => {
-                    let doc_id = *entry.get() as u32;
-                    self.original_sorter
-                        .insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
-                    self.flattened_sorter
-                        .insert(doc_id.to_be_bytes(), [Operation::Deletion as u8])?;
+            // Check if the document has been added in the current indexing process.
+            let deleted_from_current =
+                match self.new_external_documents_ids_builder.entry((*to_remove).into()) {
+                    // if the document was added in a previous iteration of the transform we make it as deleted in the sorters.
+                    HEntry::Occupied(entry) => {
+                        let docid = *entry.get() as u32;
+                        // Key is the concatenation of the internal docid and the external one.
+                        document_sorter_key_buffer.clear();
+                        document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
+                        document_sorter_key_buffer.extend_from_slice(to_remove.as_bytes());
+                        document_sorter_value_buffer.clear();
+                        document_sorter_value_buffer.push(Operation::Deletion as u8);
+                        obkv::KvWriterU16::new(&mut document_sorter_value_buffer).finish().unwrap();
+                        self.original_sorter
+                            .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
+                        self.flattened_sorter
+                            .insert(docid.to_be_bytes(), &document_sorter_value_buffer)?;

-                    // we must NOT update the list of replaced_documents_ids
-                    // Either:
-                    // 1. It's already in it and there is nothing to do
-                    // 2. It wasn't in it because the document was created by a previous batch and since
-                    //    we're removing it there is nothing to do.
-                    self.new_documents_ids.remove(doc_id);
-                    entry.remove_entry();
-                }
-                Entry::Vacant(entry) => {
-                    // If the document was already in the db we mark it as a `to_delete` document.
-                    // It'll be deleted later. We don't need to push anything to the sorters.
-                    if let Some(docid) = external_documents_ids.get(entry.key()) {
-                        self.replaced_documents_ids.insert(docid);
-                    } else {
-                        // if the document is nowehere to be found, there is nothing to do and we must NOT
-                        // increment the count of documents_deleted
-                        continue;
+                        // we must NOT update the list of replaced_documents_ids
+                        // Either:
+                        // 1. It's already in it and there is nothing to do
+                        // 2. It wasn't in it because the document was created by a previous batch and since
+                        //    we're removing it there is nothing to do.
+                        self.new_documents_ids.remove(docid);
+                        entry.remove_entry();
+                        true
                    }
+                    HEntry::Vacant(_) => false,
+                };
+
+            // If the document was already in the db we mark it as a `to_delete` document.
+            // Then we push the document in sorters in deletion mode.
+            let deleted_from_db = match external_documents_ids.get(wtxn, &to_remove)? {
+                Some(docid) => {
+                    self.remove_document_from_db(
+                        docid,
+                        to_remove,
+                        wtxn,
+                        &mut document_sorter_key_buffer,
+                        &mut document_sorter_value_buffer,
+                    )?;
+                    true
                }
+                None => false,
            };

+            // increase counter only if the document existed somewhere before.
+            if deleted_from_current || deleted_from_db {
+                documents_deleted += 1;
+            }
+        }
+
+        Ok(documents_deleted)
+    }
+
+    /// Removes documents from db using their internal document ids.
+    ///
+    /// # Warning
+    ///
+    /// This function is dangerous and will only work correctly if:
+    ///
+    /// - All the passed ids currently exist in the database
+    /// - No batching using the standards `remove_documents` and `add_documents` took place
+    ///
+    /// TODO: make it impossible to call `remove_documents` or `add_documents` on an instance that calls this function.
+    #[logging_timer::time]
+    pub fn remove_documents_from_db_no_batch<FA>(
+        &mut self,
+        to_remove: &RoaringBitmap,
+        wtxn: &mut heed::RwTxn,
+        should_abort: FA,
+    ) -> Result<usize>
+    where
+        FA: Fn() -> bool + Sync,
+    {
+        puffin::profile_function!();
+
+        let mut documents_deleted = 0;
+        let mut document_sorter_value_buffer = Vec::new();
+        let mut document_sorter_key_buffer = Vec::new();
+        let external_ids = self.index.external_id_of(wtxn, to_remove.iter())?;
+
+        for (internal_docid, external_docid) in to_remove.iter().zip(external_ids) {
+            let external_docid = external_docid?;
+            if should_abort() {
+                return Err(Error::InternalError(InternalError::AbortedIndexation));
+            }
+            self.remove_document_from_db(
+                internal_docid,
+                external_docid,
+                wtxn,
+                &mut document_sorter_key_buffer,
+                &mut document_sorter_value_buffer,
+            )?;
+
            documents_deleted += 1;
        }

        Ok(documents_deleted)
    }

+    fn remove_document_from_db(
+        &mut self,
+        internal_docid: u32,
+        external_docid: String,
+        txn: &heed::RoTxn,
+        document_sorter_key_buffer: &mut Vec<u8>,
+        document_sorter_value_buffer: &mut Vec<u8>,
+    ) -> Result<()> {
+        self.replaced_documents_ids.insert(internal_docid);
+
+        // fetch the obkv document
+        let original_key = BEU32::new(internal_docid);
+        let base_obkv = self
+            .index
+            .documents
+            .remap_data_type::<heed::types::ByteSlice>()
+            .get(txn, &original_key)?
+            .ok_or(InternalError::DatabaseMissingEntry {
+                db_name: db_name::DOCUMENTS,
+                key: None,
+            })?;
+
+        // Key is the concatenation of the internal docid and the external one.
+        document_sorter_key_buffer.clear();
+        document_sorter_key_buffer.extend_from_slice(&internal_docid.to_be_bytes());
+        document_sorter_key_buffer.extend_from_slice(external_docid.as_bytes());
+        // push it as to delete in the original_sorter
+        document_sorter_value_buffer.clear();
+        document_sorter_value_buffer.push(Operation::Deletion as u8);
+        into_del_add_obkv(
+            KvReaderU16::new(base_obkv),
+            DelAddOperation::Deletion,
+            document_sorter_value_buffer,
+        )?;
+        self.original_sorter.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;
+
+        // flatten it and push it as to delete in the flattened_sorter
+        let flattened_obkv = KvReader::new(base_obkv);
+        if let Some(obkv) = self.flatten_from_fields_ids_map(flattened_obkv)? {
+            // we recreate our buffer with the flattened documents
+            document_sorter_value_buffer.clear();
+            document_sorter_value_buffer.push(Operation::Deletion as u8);
+            into_del_add_obkv(
+                KvReaderU16::new(&obkv),
+                DelAddOperation::Deletion,
+                document_sorter_value_buffer,
+            )?;
+        }
+        self.flattened_sorter
+            .insert(internal_docid.to_be_bytes(), &document_sorter_value_buffer)?;
+        Ok(())
+    }
+
    // Flatten a document from the fields ids map contained in self and insert the new
    // created fields. Returns `None` if the document doesn't need to be flattened.
    fn flatten_from_fields_ids_map(&mut self, obkv: KvReader<FieldId>) -> Result<Option<Vec<u8>>> {
@ -514,42 +660,10 @@ impl<'a, 'i> Transform<'a, 'i> {
        Ok(())
    }

-    fn remove_deleted_documents_from_field_distribution(
-        &self,
-        rtxn: &RoTxn,
-        field_distribution: &mut FieldDistribution,
-    ) -> Result<()> {
-        for deleted_docid in self.replaced_documents_ids.iter() {
-            let obkv = self.index.documents.get(rtxn, &BEU32::new(deleted_docid))?.ok_or(
-                InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
-            )?;
-
-            for (key, _) in obkv.iter() {
-                let name =
-                    self.fields_ids_map.name(key).ok_or(FieldIdMapMissingEntry::FieldId {
-                        field_id: key,
-                        process: "Computing field distribution in transform.",
-                    })?;
-                // We checked that the document was in the db earlier. If we can't find it it means
-                // there is an inconsistency between the field distribution and the field id map.
-                let field =
-                    field_distribution.get_mut(name).ok_or(FieldIdMapMissingEntry::FieldId {
-                        field_id: key,
-                        process: "Accessing field distribution in transform.",
-                    })?;
-                *field -= 1;
-                if *field == 0 {
-                    // since we were able to get the field right before it's safe to unwrap here
-                    field_distribution.remove(name).unwrap();
-                }
-            }
-        }
-        Ok(())
-    }
-
    /// Generate the `TransformOutput` based on the given sorter that can be generated from any
    /// format like CSV, JSON or JSON stream. This sorter must contain a key that is the document
    /// id for the user side and the value must be an obkv where keys are valid fields ids.
+    #[logging_timer::time]
    pub(crate) fn output_from_sorter<F>(
        self,
        wtxn: &mut heed::RwTxn,
@ -581,17 +695,13 @@ impl<'a, 'i> Transform<'a, 'i> {
        // 2. Add all the new documents to the field distribution
        let mut field_distribution = self.index.field_distribution(wtxn)?;

-        self.remove_deleted_documents_from_field_distribution(wtxn, &mut field_distribution)?;
-
        // Here we are going to do the document count + field distribution + `write_into_stream_writer`
        let mut iter = self.original_sorter.into_stream_merger_iter()?;
        // used only for the callback
        let mut documents_count = 0;

        while let Some((key, val)) = iter.next()? {
-            if val[0] == Operation::Deletion as u8 {
-                continue;
-            }
+            // skip first byte corresponding to the operation type (Deletion or Addition).
            let val = &val[1..];

            // send a callback to show at which step we are
@ -601,16 +711,51 @@ impl<'a, 'i> Transform<'a, 'i> {
                total_documents: self.documents_count,
            });

-            // We increment all the field of the current document in the field distribution.
-            let obkv = KvReader::new(val);
-
-            for (key, _) in obkv.iter() {
-                let name =
-                    self.fields_ids_map.name(key).ok_or(FieldIdMapMissingEntry::FieldId {
-                        field_id: key,
-                        process: "Computing field distribution in transform.",
-                    })?;
-                *field_distribution.entry(name.to_string()).or_insert(0) += 1;
+            for (key, value) in KvReader::new(val) {
+                let reader = KvReaderDelAdd::new(value);
+                match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
+                    (None, None) => {}
+                    (None, Some(_)) => {
+                        // New field
+                        let name = self.fields_ids_map.name(key).ok_or(
+                            FieldIdMapMissingEntry::FieldId {
+                                field_id: key,
+                                process: "Computing field distribution in transform.",
+                            },
+                        )?;
+                        *field_distribution.entry(name.to_string()).or_insert(0) += 1;
+                    }
+                    (Some(_), None) => {
+                        // Field removed
+                        let name = self.fields_ids_map.name(key).ok_or(
+                            FieldIdMapMissingEntry::FieldId {
+                                field_id: key,
+                                process: "Computing field distribution in transform.",
+                            },
+                        )?;
+                        match field_distribution.entry(name.to_string()) {
+                            BEntry::Vacant(_) => { /* Bug? trying to remove a non-existing field */
+                            }
+                            BEntry::Occupied(mut entry) => {
+                                // attempt to remove one
+                                match entry.get_mut().checked_sub(1) {
+                                    Some(0) => {
+                                        entry.remove();
+                                    }
+                                    Some(new_val) => {
+                                        *entry.get_mut() = new_val;
+                                    }
+                                    None => {
+                                        unreachable!("Attempting to remove a field that wasn't in the field distribution")
+                                    }
+                                }
+                            }
+                        }
+                    }
+                    (Some(_), Some(_)) => {
+                        // Value change, no field distribution change
+                    }
+                }
            }
            writer.insert(key, val)?;
        }
@ -631,9 +776,7 @@ impl<'a, 'i> Transform<'a, 'i> {
        // We get rids of the `Operation` byte and skip the deleted documents as well.
        let mut iter = self.flattened_sorter.into_stream_merger_iter()?;
        while let Some((key, val)) = iter.next()? {
-            if val[0] == Operation::Deletion as u8 {
-                continue;
-            }
+            // skip first byte corresponding to the operation type (Deletion or Addition).
            let val = &val[1..];
            writer.insert(key, val)?;
        }
@ -649,15 +792,11 @@ impl<'a, 'i> Transform<'a, 'i> {
        new_external_documents_ids_builder.into_iter().try_for_each(|(key, value)| {
            fst_new_external_documents_ids_builder.insert(key, value)
        })?;
-        let new_external_documents_ids = fst_new_external_documents_ids_builder.into_map();

        Ok(TransformOutput {
            primary_key,
            fields_ids_map: self.fields_ids_map,
            field_distribution,
-            new_external_documents_ids: new_external_documents_ids.map_data(Cow::Owned).unwrap(),
-            new_documents_ids: self.new_documents_ids,
-            replaced_documents_ids: self.replaced_documents_ids,
            documents_count: self.documents_count,
            original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
            flattened_documents: flattened_documents
@ -687,37 +826,41 @@ impl<'a, 'i> Transform<'a, 'i> {
            .to_string();
        let field_distribution = self.index.field_distribution(wtxn)?;

-        // Delete the soft deleted document ids from the maps inside the external_document_ids structure
-        let new_external_documents_ids = {
-            let mut external_documents_ids = self.index.external_documents_ids(wtxn)?;
-            external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
-            // This call should be free and can't fail since the previous method merged both fsts.
-            external_documents_ids.into_static().to_fst()?.into_owned()
-        };
-
        let documents_ids = self.index.documents_ids(wtxn)?;
        let documents_count = documents_ids.len() as usize;

-        // We create a final writer to write the new documents in order from the sorter.
-        let mut original_writer = create_writer(
+        // We initialize the sorter with the user indexing settings.
+        let mut original_sorter = create_sorter(
+            grenad::SortAlgorithm::Stable,
+            keep_first,
            self.indexer_settings.chunk_compression_type,
            self.indexer_settings.chunk_compression_level,
-            tempfile::tempfile()?,
+            self.indexer_settings.max_nb_chunks,
+            self.indexer_settings.max_memory.map(|mem| mem / 2),
        );

-        // We create a final writer to write the new documents in order from the sorter.
-        let mut flattened_writer = create_writer(
+        // We initialize the sorter with the user indexing settings.
+        let mut flattened_sorter = create_sorter(
+            grenad::SortAlgorithm::Stable,
+            keep_first,
            self.indexer_settings.chunk_compression_type,
            self.indexer_settings.chunk_compression_level,
-            tempfile::tempfile()?,
+            self.indexer_settings.max_nb_chunks,
+            self.indexer_settings.max_memory.map(|mem| mem / 2),
        );

        let mut obkv_buffer = Vec::new();
-        for result in self.index.all_documents(wtxn)? {
-            let (docid, obkv) = result?;
+        let mut document_sorter_key_buffer = Vec::new();
+        let mut document_sorter_value_buffer = Vec::new();
+        for result in self.index.external_documents_ids().iter(wtxn)? {
+            let (external_id, docid) = result?;
+            let obkv = self.index.documents.get(wtxn, &docid)?.ok_or(
+                InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None },
+            )?;
+            let docid = docid.get();

            obkv_buffer.clear();
-            let mut obkv_writer = obkv::KvWriter::<_, FieldId>::new(&mut obkv_buffer);
+            let mut obkv_writer = KvWriter::<_, FieldId>::new(&mut obkv_buffer);

            // We iterate over the new `FieldsIdsMap` ids in order and construct the new obkv.
            for (id, name) in new_fields_ids_map.iter() {
@ -727,7 +870,17 @@ impl<'a, 'i> Transform<'a, 'i> {
            }

            let buffer = obkv_writer.into_inner()?;
-            original_writer.insert(docid.to_be_bytes(), &buffer)?;
+
+            document_sorter_key_buffer.clear();
+            document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
+            document_sorter_key_buffer.extend_from_slice(external_id.as_bytes());
+            document_sorter_value_buffer.clear();
+            into_del_add_obkv(
+                KvReaderU16::new(buffer),
+                DelAddOperation::Addition,
+                &mut document_sorter_value_buffer,
+            )?;
+            original_sorter.insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?;

            // Once we have the document. We're going to flatten it
            // and insert it in the flattened sorter.
@ -762,29 +915,34 @@ impl<'a, 'i> Transform<'a, 'i> {
                let value = serde_json::to_vec(&value).map_err(InternalError::SerdeJson)?;
                writer.insert(fid, &value)?;
            }
-            flattened_writer.insert(docid.to_be_bytes(), &buffer)?;
+            document_sorter_value_buffer.clear();
+            into_del_add_obkv(
+                KvReaderU16::new(&buffer),
+                DelAddOperation::Addition,
+                &mut document_sorter_value_buffer,
+            )?;
+            flattened_sorter.insert(docid.to_be_bytes(), &document_sorter_value_buffer)?;
        }

-        // Once we have written all the documents, we extract
-        // the file and reset the seek to be able to read it again.
-        let mut original_documents = original_writer.into_inner()?;
-        original_documents.rewind()?;
+        let grenad_params = GrenadParameters {
+            chunk_compression_type: self.indexer_settings.chunk_compression_type,
+            chunk_compression_level: self.indexer_settings.chunk_compression_level,
+            max_memory: self.indexer_settings.max_memory,
+            max_nb_chunks: self.indexer_settings.max_nb_chunks, // default value, may be chosen.
+        };

-        let mut flattened_documents = flattened_writer.into_inner()?;
-        flattened_documents.rewind()?;
+        // Once we have written all the documents, we merge everything into a Reader.
+        let original_documents = sorter_into_reader(original_sorter, grenad_params)?;
+
+        let flattened_documents = sorter_into_reader(flattened_sorter, grenad_params)?;

        let output = TransformOutput {
            primary_key,
            fields_ids_map: new_fields_ids_map,
            field_distribution,
-            new_external_documents_ids,
-            new_documents_ids: documents_ids,
-            replaced_documents_ids: RoaringBitmap::default(),
            documents_count,
-            original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
-            flattened_documents: flattened_documents
-                .into_inner()
-                .map_err(|err| err.into_error())?,
+            original_documents: original_documents.into_inner().into_inner(),
+            flattened_documents: flattened_documents.into_inner().into_inner(),
        };

        let new_facets = output.compute_real_facets(wtxn, self.index)?;
@ -828,38 +986,111 @@ mod test {

    #[test]
    fn merge_obkvs() {
-        let mut doc_0 = Vec::new();
-        let mut kv_writer = KvWriter::new(&mut doc_0);
+        let mut additive_doc_0 = Vec::new();
+        let mut deletive_doc_0 = Vec::new();
+        let mut del_add_doc_0 = Vec::new();
+        let mut kv_writer = KvWriter::memory();
        kv_writer.insert(0_u8, [0]).unwrap();
-        kv_writer.finish().unwrap();
-        doc_0.insert(0, Operation::Addition as u8);
-
-        let ret = merge_obkvs_and_operations(&[], &[Cow::from(doc_0.as_slice())]).unwrap();
-        assert_eq!(*ret, doc_0);
-
-        let ret = merge_obkvs_and_operations(
-            &[],
-            &[Cow::from([Operation::Deletion as u8].as_slice()), Cow::from(doc_0.as_slice())],
+        let buffer = kv_writer.into_inner().unwrap();
+        into_del_add_obkv(
+            KvReaderU16::new(&buffer),
+            DelAddOperation::Addition,
+            &mut additive_doc_0,
        )
        .unwrap();
-        assert_eq!(*ret, doc_0);
-
-        let ret = merge_obkvs_and_operations(
-            &[],
-            &[Cow::from(doc_0.as_slice()), Cow::from([Operation::Deletion as u8].as_slice())],
+        additive_doc_0.insert(0, Operation::Addition as u8);
+        into_del_add_obkv(
+            KvReaderU16::new(&buffer),
+            DelAddOperation::Deletion,
+            &mut deletive_doc_0,
        )
        .unwrap();
-        assert_eq!(*ret, [Operation::Deletion as u8]);
+        deletive_doc_0.insert(0, Operation::Deletion as u8);
+        into_del_add_obkv(
+            KvReaderU16::new(&buffer),
+            DelAddOperation::DeletionAndAddition,
+            &mut del_add_doc_0,
+        )
+        .unwrap();
+        del_add_doc_0.insert(0, Operation::Addition as u8);

-        let ret = merge_obkvs_and_operations(
+        let mut additive_doc_1 = Vec::new();
+        let mut kv_writer = KvWriter::memory();
+        kv_writer.insert(1_u8, [1]).unwrap();
+        let buffer = kv_writer.into_inner().unwrap();
+        into_del_add_obkv(
+            KvReaderU16::new(&buffer),
+            DelAddOperation::Addition,
+            &mut additive_doc_1,
+        )
+        .unwrap();
+        additive_doc_1.insert(0, Operation::Addition as u8);
+
+        let mut additive_doc_0_1 = Vec::new();
+        let mut kv_writer = KvWriter::memory();
+        kv_writer.insert(0_u8, [0]).unwrap();
+        kv_writer.insert(1_u8, [1]).unwrap();
+        let buffer = kv_writer.into_inner().unwrap();
+        into_del_add_obkv(
+            KvReaderU16::new(&buffer),
+            DelAddOperation::Addition,
+            &mut additive_doc_0_1,
+        )
+        .unwrap();
+        additive_doc_0_1.insert(0, Operation::Addition as u8);
+
+        let ret = obkvs_merge_additions_and_deletions(&[], &[Cow::from(additive_doc_0.as_slice())])
+            .unwrap();
+        assert_eq!(*ret, additive_doc_0);
+
+        let ret = obkvs_merge_additions_and_deletions(
+            &[],
+            &[Cow::from(deletive_doc_0.as_slice()), Cow::from(additive_doc_0.as_slice())],
+        )
+        .unwrap();
+        assert_eq!(*ret, del_add_doc_0);
+
+        let ret = obkvs_merge_additions_and_deletions(
+            &[],
+            &[Cow::from(additive_doc_0.as_slice()), Cow::from(deletive_doc_0.as_slice())],
+        )
+        .unwrap();
+        assert_eq!(*ret, deletive_doc_0);
+
+        let ret = obkvs_merge_additions_and_deletions(
            &[],
            &[
-                Cow::from([Operation::Addition as u8, 1].as_slice()),
-                Cow::from([Operation::Deletion as u8].as_slice()),
-                Cow::from(doc_0.as_slice()),
+                Cow::from(additive_doc_1.as_slice()),
+                Cow::from(deletive_doc_0.as_slice()),
+                Cow::from(additive_doc_0.as_slice()),
            ],
        )
        .unwrap();
-        assert_eq!(*ret, doc_0);
+        assert_eq!(*ret, del_add_doc_0);
+
+        let ret = obkvs_merge_additions_and_deletions(
+            &[],
+            &[Cow::from(additive_doc_1.as_slice()), Cow::from(additive_doc_0.as_slice())],
+        )
+        .unwrap();
+        assert_eq!(*ret, additive_doc_0_1);
+
+        let ret = obkvs_keep_last_addition_merge_deletions(
+            &[],
+            &[Cow::from(additive_doc_1.as_slice()), Cow::from(additive_doc_0.as_slice())],
+        )
+        .unwrap();
+        assert_eq!(*ret, additive_doc_0);
+
+        let ret = obkvs_keep_last_addition_merge_deletions(
+            &[],
+            &[
+                Cow::from(deletive_doc_0.as_slice()),
+                Cow::from(additive_doc_1.as_slice()),
+                Cow::from(additive_doc_0.as_slice()),
+            ],
+        )
+        .unwrap();
+        assert_eq!(*ret, del_add_doc_0);
    }
 }
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@ -1,5 +1,4 @@
-use std::borrow::Cow;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
@ -9,32 +8,40 @@ use charabia::{Language, Script};
 use grenad::MergerBuilder;
 use heed::types::ByteSlice;
 use heed::RwTxn;
+use log::error;
+use obkv::{KvReader, KvWriter};
+use ordered_float::OrderedFloat;
 use roaring::RoaringBitmap;

 use super::helpers::{
-    self, merge_ignore_values, serialize_roaring_bitmap, valid_lmdb_key, CursorClonableMmap,
+    self, merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap, merge_ignore_values,
+    valid_lmdb_key, CursorClonableMmap,
 };
 use super::{ClonableMmap, MergeFn};
 use crate::distance::NDotProductPoint;
 use crate::error::UserError;
+use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
 use crate::facet::FacetType;
+use crate::index::db_name::DOCUMENTS;
 use crate::index::Hnsw;
+use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
 use crate::update::facet::FacetsUpdate;
 use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at};
-use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, BEU32};
+use crate::{
+    lat_lng_to_xyz, DocumentId, FieldId, GeoPoint, Index, Result, SerializationError, BEU32,
+};

 pub(crate) enum TypedChunk {
    FieldIdDocidFacetStrings(grenad::Reader<CursorClonableMmap>),
    FieldIdDocidFacetNumbers(grenad::Reader<CursorClonableMmap>),
    Documents(grenad::Reader<CursorClonableMmap>),
-    FieldIdWordcountDocids(grenad::Reader<BufReader<File>>),
-    NewDocumentsIds(RoaringBitmap),
+    FieldIdWordCountDocids(grenad::Reader<BufReader<File>>),
    WordDocids {
        word_docids_reader: grenad::Reader<BufReader<File>>,
        exact_word_docids_reader: grenad::Reader<BufReader<File>>,
+        word_fid_docids_reader: grenad::Reader<BufReader<File>>,
    },
    WordPositionDocids(grenad::Reader<BufReader<File>>),
-    WordFidDocids(grenad::Reader<BufReader<File>>),
    WordPairProximityDocids(grenad::Reader<BufReader<File>>),
    FieldIdFacetStringDocids(grenad::Reader<BufReader<File>>),
    FieldIdFacetNumberDocids(grenad::Reader<BufReader<File>>),
@ -43,7 +50,7 @@ pub(crate) enum TypedChunk {
    FieldIdFacetIsEmptyDocids(grenad::Reader<BufReader<File>>),
    GeoPoints(grenad::Reader<BufReader<File>>),
    VectorPoints(grenad::Reader<BufReader<File>>),
-    ScriptLanguageDocids(HashMap<(Script, Language), RoaringBitmap>),
+    ScriptLanguageDocids(HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>),
 }

 impl TypedChunk {
@ -58,23 +65,22 @@ impl TypedChunk {
            TypedChunk::Documents(grenad) => {
                format!("Documents {{ number_of_entries: {} }}", grenad.len())
            }
-            TypedChunk::FieldIdWordcountDocids(grenad) => {
+            TypedChunk::FieldIdWordCountDocids(grenad) => {
                format!("FieldIdWordcountDocids {{ number_of_entries: {} }}", grenad.len())
            }
-            TypedChunk::NewDocumentsIds(grenad) => {
-                format!("NewDocumentsIds {{ number_of_entries: {} }}", grenad.len())
-            }
-            TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => format!(
-                "WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {} }}",
+            TypedChunk::WordDocids {
+                word_docids_reader,
+                exact_word_docids_reader,
+                word_fid_docids_reader,
+            } => format!(
+                "WordDocids {{ word_docids_reader: {}, exact_word_docids_reader: {}, word_fid_docids_reader: {} }}",
                word_docids_reader.len(),
-                exact_word_docids_reader.len()
+                exact_word_docids_reader.len(),
+                word_fid_docids_reader.len()
            ),
            TypedChunk::WordPositionDocids(grenad) => {
                format!("WordPositionDocids {{ number_of_entries: {} }}", grenad.len())
            }
-            TypedChunk::WordFidDocids(grenad) => {
-                format!("WordFidDocids {{ number_of_entries: {} }}", grenad.len())
-            }
            TypedChunk::WordPairProximityDocids(grenad) => {
                format!("WordPairProximityDocids {{ number_of_entries: {} }}", grenad.len())
            }
@ -99,8 +105,8 @@ impl TypedChunk {
            TypedChunk::VectorPoints(grenad) => {
                format!("VectorPoints {{ number_of_entries: {} }}", grenad.len())
            }
-            TypedChunk::ScriptLanguageDocids(grenad) => {
-                format!("ScriptLanguageDocids {{ number_of_entries: {} }}", grenad.len())
+            TypedChunk::ScriptLanguageDocids(sl_map) => {
+                format!("ScriptLanguageDocids {{ number_of_entries: {} }}", sl_map.len())
            }
        }
    }
@ -119,34 +125,75 @@ pub(crate) fn write_typed_chunk_into_index(
    let mut is_merged_database = false;
    match typed_chunk {
        TypedChunk::Documents(obkv_documents_iter) => {
+            let mut operations: Vec<DocumentOperation> = Default::default();
+
+            let mut docids = index.documents_ids(wtxn)?;
            let mut cursor = obkv_documents_iter.into_cursor()?;
-            while let Some((key, value)) = cursor.move_on_next()? {
-                index.documents.remap_types::<ByteSlice, ByteSlice>().put(wtxn, key, value)?;
+            while let Some((key, reader)) = cursor.move_on_next()? {
+                let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
+                let reader: KvReader<FieldId> = KvReader::new(reader);
+
+                let (document_id_bytes, external_id_bytes) = try_split_array_at(key)
+                    .ok_or(SerializationError::Decoding { db_name: Some(DOCUMENTS) })?;
+                let docid = DocumentId::from_be_bytes(document_id_bytes);
+                let external_id = std::str::from_utf8(external_id_bytes)?;
+
+                for (field_id, value) in reader.iter() {
+                    let del_add_reader = KvReaderDelAdd::new(value);
+
+                    if let Some(addition) = del_add_reader.get(DelAdd::Addition) {
+                        writer.insert(field_id, addition)?;
+                    }
+                }
+
+                let db = index.documents.remap_data_type::<ByteSlice>();
+
+                if !writer.is_empty() {
+                    db.put(wtxn, &BEU32::new(docid), &writer.into_inner().unwrap())?;
+                    operations.push(DocumentOperation {
+                        external_id: external_id.to_string(),
+                        internal_id: docid,
+                        kind: DocumentOperationKind::Create,
+                    });
+                    docids.insert(docid);
+                } else {
+                    db.delete(wtxn, &BEU32::new(docid))?;
+                    operations.push(DocumentOperation {
+                        external_id: external_id.to_string(),
+                        internal_id: docid,
+                        kind: DocumentOperationKind::Delete,
+                    });
+                    docids.remove(docid);
+                }
            }
+            let external_documents_docids = index.external_documents_ids();
+            external_documents_docids.apply(wtxn, operations)?;
+            index.put_documents_ids(wtxn, &docids)?;
        }
-        TypedChunk::FieldIdWordcountDocids(fid_word_count_docids_iter) => {
+        TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {
            append_entries_into_database(
                fid_word_count_docids_iter,
                &index.field_id_word_count_docids,
                wtxn,
                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
+                deladd_serialize_add_side,
+                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
            is_merged_database = true;
        }
-        TypedChunk::NewDocumentsIds(documents_ids) => {
-            return Ok((documents_ids, is_merged_database))
-        }
-        TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } => {
+        TypedChunk::WordDocids {
+            word_docids_reader,
+            exact_word_docids_reader,
+            word_fid_docids_reader,
+        } => {
            let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
            append_entries_into_database(
                word_docids_iter.clone(),
                &index.word_docids,
                wtxn,
                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_roaring_bitmaps,
+                deladd_serialize_add_side,
+                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;

            let exact_word_docids_iter = unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?;
@ -155,8 +202,18 @@ pub(crate) fn write_typed_chunk_into_index(
                &index.exact_word_docids,
                wtxn,
                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_roaring_bitmaps,
+                deladd_serialize_add_side,
+                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
+            )?;
+
+            let word_fid_docids_iter = unsafe { as_cloneable_grenad(&word_fid_docids_reader) }?;
+            append_entries_into_database(
+                word_fid_docids_iter,
+                &index.word_fid_docids,
+                wtxn,
+                index_is_empty,
+                deladd_serialize_add_side,
+                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;

            // create fst from word docids
@ -177,19 +234,8 @@ pub(crate) fn write_typed_chunk_into_index(
                &index.word_position_docids,
                wtxn,
                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
-            )?;
-            is_merged_database = true;
-        }
-        TypedChunk::WordFidDocids(word_fid_docids_iter) => {
-            append_entries_into_database(
-                word_fid_docids_iter,
-                &index.word_fid_docids,
-                wtxn,
-                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
+                deladd_serialize_add_side,
+                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
            is_merged_database = true;
        }
@ -209,8 +255,8 @@ pub(crate) fn write_typed_chunk_into_index(
                &index.facet_id_exists_docids,
                wtxn,
                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
+                deladd_serialize_add_side,
+                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
            is_merged_database = true;
        }
@ -220,8 +266,8 @@ pub(crate) fn write_typed_chunk_into_index(
                &index.facet_id_is_null_docids,
                wtxn,
                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
+                deladd_serialize_add_side,
+                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
            is_merged_database = true;
        }
@ -231,8 +277,8 @@ pub(crate) fn write_typed_chunk_into_index(
                &index.facet_id_is_empty_docids,
                wtxn,
                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
+                deladd_serialize_add_side,
+                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
            is_merged_database = true;
        }
@ -242,8 +288,8 @@ pub(crate) fn write_typed_chunk_into_index(
                &index.word_pair_proximity_docids,
                wtxn,
                index_is_empty,
-                |value, _buffer| Ok(value),
-                merge_cbo_roaring_bitmaps,
+                deladd_serialize_add_side,
+                merge_deladd_cbo_roaring_bitmaps_into_cbo_roaring_bitmap,
            )?;
            is_merged_database = true;
        }
@ -252,8 +298,18 @@ pub(crate) fn write_typed_chunk_into_index(
                index.field_id_docid_facet_f64s.remap_types::<ByteSlice, ByteSlice>();
            let mut cursor = fid_docid_facet_number.into_cursor()?;
            while let Some((key, value)) = cursor.move_on_next()? {
+                let reader = KvReaderDelAdd::new(value);
                if valid_lmdb_key(key) {
-                    index_fid_docid_facet_numbers.put(wtxn, key, value)?;
+                    match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
+                        (None, None) => {}
+                        (None, Some(new)) => index_fid_docid_facet_numbers.put(wtxn, key, new)?,
+                        (Some(_), None) => {
+                            index_fid_docid_facet_numbers.delete(wtxn, key)?;
+                        }
+                        (Some(_), Some(new)) => {
+                            index_fid_docid_facet_numbers.put(wtxn, key, new)?
+                        }
+                    }
                }
            }
        }
@ -262,8 +318,18 @@ pub(crate) fn write_typed_chunk_into_index(
                index.field_id_docid_facet_strings.remap_types::<ByteSlice, ByteSlice>();
            let mut cursor = fid_docid_facet_string.into_cursor()?;
            while let Some((key, value)) = cursor.move_on_next()? {
+                let reader = KvReaderDelAdd::new(value);
                if valid_lmdb_key(key) {
-                    index_fid_docid_facet_strings.put(wtxn, key, value)?;
+                    match (reader.get(DelAdd::Deletion), reader.get(DelAdd::Addition)) {
+                        (None, None) => {}
+                        (None, Some(new)) => index_fid_docid_facet_strings.put(wtxn, key, new)?,
+                        (Some(_), None) => {
+                            index_fid_docid_facet_strings.delete(wtxn, key)?;
+                        }
+                        (Some(_), Some(new)) => {
+                            index_fid_docid_facet_strings.put(wtxn, key, new)?
+                        }
+                    }
                }
            }
        }
@ -276,57 +342,86 @@ pub(crate) fn write_typed_chunk_into_index(
                // convert the key back to a u32 (4 bytes)
                let docid = key.try_into().map(DocumentId::from_be_bytes).unwrap();

-                // convert the latitude and longitude back to a f64 (8 bytes)
-                let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
-                let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
-                let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
-                let xyz_point = lat_lng_to_xyz(&point);
-
-                rtree.insert(GeoPoint::new(xyz_point, (docid, point)));
-                geo_faceted_docids.insert(docid);
+                let deladd_obkv = KvReaderDelAdd::new(value);
+                if let Some(value) = deladd_obkv.get(DelAdd::Deletion) {
+                    let geopoint = extract_geo_point(value, docid);
+                    rtree.remove(&geopoint);
+                    geo_faceted_docids.remove(docid);
+                }
+                if let Some(value) = deladd_obkv.get(DelAdd::Addition) {
+                    let geopoint = extract_geo_point(value, docid);
+                    rtree.insert(geopoint);
+                    geo_faceted_docids.insert(docid);
+                }
            }
            index.put_geo_rtree(wtxn, &rtree)?;
            index.put_geo_faceted_documents_ids(wtxn, &geo_faceted_docids)?;
        }
        TypedChunk::VectorPoints(vector_points) => {
-            let (pids, mut points): (Vec<_>, Vec<_>) = match index.vector_hnsw(wtxn)? {
-                Some(hnsw) => hnsw.iter().map(|(pid, point)| (pid, point.clone())).unzip(),
-                None => Default::default(),
-            };
-
-            // Convert the PointIds into DocumentIds
-            let mut docids = Vec::new();
-            for pid in pids {
-                let docid =
-                    index.vector_id_docid.get(wtxn, &BEU32::new(pid.into_inner()))?.unwrap();
-                docids.push(docid.get());
+            let mut vectors_set = HashSet::new();
+            // We extract and store the previous vectors
+            if let Some(hnsw) = index.vector_hnsw(wtxn)? {
+                for (pid, point) in hnsw.iter() {
+                    let pid_key = BEU32::new(pid.into_inner());
+                    let docid = index.vector_id_docid.get(wtxn, &pid_key)?.unwrap().get();
+                    let vector: Vec<_> = point.iter().copied().map(OrderedFloat).collect();
+                    vectors_set.insert((docid, vector));
+                }
            }

-            let mut expected_dimensions = points.get(0).map(|p| p.len());
            let mut cursor = vector_points.into_cursor()?;
            while let Some((key, value)) = cursor.move_on_next()? {
                // convert the key back to a u32 (4 bytes)
                let (left, _index) = try_split_array_at(key).unwrap();
                let docid = DocumentId::from_be_bytes(left);
-                // convert the vector back to a Vec<f32>
-                let vector: Vec<f32> = pod_collect_to_vec(value);

-                // TODO Inform the user about the document that has a wrong `_vectors`
-                let found = vector.len();
-                let expected = *expected_dimensions.get_or_insert(found);
-                if expected != found {
-                    return Err(UserError::InvalidVectorDimensions { expected, found })?;
+                let vector_deladd_obkv = KvReaderDelAdd::new(value);
+                if let Some(value) = vector_deladd_obkv.get(DelAdd::Deletion) {
+                    // convert the vector back to a Vec<f32>
+                    let vector = pod_collect_to_vec(value).into_iter().map(OrderedFloat).collect();
+                    let key = (docid, vector);
+                    if !vectors_set.remove(&key) {
+                        error!("Unable to delete the vector: {:?}", key.1);
+                    }
+                }
+                if let Some(value) = vector_deladd_obkv.get(DelAdd::Addition) {
+                    // convert the vector back to a Vec<f32>
+                    let vector = pod_collect_to_vec(value).into_iter().map(OrderedFloat).collect();
+                    vectors_set.insert((docid, vector));
                }
-
-                points.push(NDotProductPoint::new(vector));
-                docids.push(docid);
            }

-            assert_eq!(docids.len(), points.len());
+            // Extract the most common vector dimension
+            let expected_dimension_size = {
+                let mut dims = HashMap::new();
+                vectors_set.iter().for_each(|(_, v)| *dims.entry(v.len()).or_insert(0) += 1);
+                dims.into_iter().max_by_key(|(_, count)| *count).map(|(len, _)| len)
+            };
+
+            // Ensure that the vector lengths are correct and
+            // prepare the vectors before inserting them in the HNSW.
+            let mut points = Vec::new();
+            let mut docids = Vec::new();
+            for (docid, vector) in vectors_set {
+                if expected_dimension_size.map_or(false, |expected| expected != vector.len()) {
+                    return Err(UserError::InvalidVectorDimensions {
+                        expected: expected_dimension_size.unwrap_or(vector.len()),
+                        found: vector.len(),
+                    }
+                    .into());
+                } else {
+                    let vector = vector.into_iter().map(OrderedFloat::into_inner).collect();
+                    points.push(NDotProductPoint::new(vector));
+                    docids.push(docid);
+                }
+            }

            let hnsw_length = points.len();
            let (new_hnsw, pids) = Hnsw::builder().build_hnsw(points);

+            assert_eq!(docids.len(), pids.len());
+
+            // Store the vectors in the point-docid relation database
            index.vector_id_docid.clear(wtxn)?;
            for (docid, pid) in docids.into_iter().zip(pids) {
                index.vector_id_docid.put(
@ -339,22 +434,25 @@ pub(crate) fn write_typed_chunk_into_index(
            log::debug!("There are {} entries in the HNSW so far", hnsw_length);
            index.put_vector_hnsw(wtxn, &new_hnsw)?;
        }
-        TypedChunk::ScriptLanguageDocids(hash_pair) => {
-            let mut buffer = Vec::new();
-            for (key, value) in hash_pair {
-                buffer.clear();
+        TypedChunk::ScriptLanguageDocids(sl_map) => {
+            for (key, (deletion, addition)) in sl_map {
+                let mut db_key_exists = false;
                let final_value = match index.script_language_docids.get(wtxn, &key)? {
                    Some(db_values) => {
-                        let mut db_value_buffer = Vec::new();
-                        serialize_roaring_bitmap(&db_values, &mut db_value_buffer)?;
-                        let mut new_value_buffer = Vec::new();
-                        serialize_roaring_bitmap(&value, &mut new_value_buffer)?;
-                        merge_roaring_bitmaps(&new_value_buffer, &db_value_buffer, &mut buffer)?;
-                        RoaringBitmap::deserialize_from(&buffer[..])?
+                        db_key_exists = true;
+                        (db_values - deletion) | addition
                    }
-                    None => value,
+                    None => addition,
                };
-                index.script_language_docids.put(wtxn, &key, &final_value)?;
+
+                if final_value.is_empty() {
+                    // If the database entry exists, delete it.
+                    if db_key_exists {
+                        index.script_language_docids.delete(wtxn, &key)?;
+                    }
+                } else {
+                    index.script_language_docids.put(wtxn, &key, &final_value)?;
+                }
            }
        }
    }
@ -362,6 +460,15 @@ pub(crate) fn write_typed_chunk_into_index(
    Ok((RoaringBitmap::new(), is_merged_database))
 }

+/// Converts the latitude and longitude back to an xyz GeoPoint.
+fn extract_geo_point(value: &[u8], docid: DocumentId) -> GeoPoint {
+    let (lat, tail) = helpers::try_split_array_at::<u8, 8>(value).unwrap();
+    let (lng, _) = helpers::try_split_array_at::<u8, 8>(tail).unwrap();
+    let point = [f64::from_ne_bytes(lat), f64::from_ne_bytes(lng)];
+    let xyz_point = lat_lng_to_xyz(&point);
+    GeoPoint::new(xyz_point, (docid, point))
+}
+
 fn merge_word_docids_reader_into_fst(
    word_docids_iter: grenad::Reader<io::Cursor<ClonableMmap>>,
    exact_word_docids_iter: grenad::Reader<io::Cursor<ClonableMmap>>,
@ -379,24 +486,6 @@ fn merge_word_docids_reader_into_fst(
    Ok(builder.into_set())
 }

-fn merge_roaring_bitmaps(new_value: &[u8], db_value: &[u8], buffer: &mut Vec<u8>) -> Result<()> {
-    let new_value = RoaringBitmap::deserialize_from(new_value)?;
-    let db_value = RoaringBitmap::deserialize_from(db_value)?;
-    let value = new_value | db_value;
-    Ok(serialize_roaring_bitmap(&value, buffer)?)
-}
-
-fn merge_cbo_roaring_bitmaps(
-    new_value: &[u8],
-    db_value: &[u8],
-    buffer: &mut Vec<u8>,
-) -> Result<()> {
-    Ok(CboRoaringBitmapCodec::merge_into(
-        &[Cow::Borrowed(db_value), Cow::Borrowed(new_value)],
-        buffer,
-    )?)
-}
-
 /// Write provided entries in database using serialize_value function.
 /// merge_values function is used if an entry already exist in the database.
 fn write_entries_into_database<R, K, V, FS, FM>(
@ -410,7 +499,7 @@ fn write_entries_into_database<R, K, V, FS, FM>(
 where
    R: io::Read + io::Seek,
    FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
-    FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
+    FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
 {
    puffin::profile_function!(format!("number of entries: {}", data.len()));

@ -422,17 +511,19 @@ where
        if valid_lmdb_key(key) {
            buffer.clear();
            let value = if index_is_empty {
-                serialize_value(value, &mut buffer)?
+                Some(serialize_value(value, &mut buffer)?)
            } else {
                match database.get(wtxn, key)? {
-                    Some(prev_value) => {
-                        merge_values(value, prev_value, &mut buffer)?;
-                        &buffer[..]
-                    }
-                    None => serialize_value(value, &mut buffer)?,
+                    Some(prev_value) => merge_values(value, prev_value, &mut buffer)?,
+                    None => Some(serialize_value(value, &mut buffer)?),
                }
            };
-            database.put(wtxn, key, value)?;
+            match value {
+                Some(value) => database.put(wtxn, key, value)?,
+                None => {
+                    database.delete(wtxn, key)?;
+                }
+            }
        }
    }

@ -454,7 +545,8 @@ fn append_entries_into_database<R, K, V, FS, FM>(
 where
    R: io::Read + io::Seek,
    FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
-    FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
+    FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
+    K: for<'a> heed::BytesDecode<'a>,
 {
    puffin::profile_function!(format!("number of entries: {}", data.len()));

@ -475,6 +567,12 @@ where
    let mut cursor = data.into_cursor()?;
    while let Some((key, value)) = cursor.move_on_next()? {
        if valid_lmdb_key(key) {
+            debug_assert!(
+                K::bytes_decode(key).is_some(),
+                "Couldn't decode key with the database decoder, key length: {} - key bytes: {:x?}",
+                key.len(),
+                &key
+            );
            buffer.clear();
            let value = serialize_value(value, &mut buffer)?;
            unsafe { database.append(key, value)? };
--- a/milli/src/update/mod.rs
+++ b/milli/src/update/mod.rs
@ -1,6 +1,5 @@
 pub use self::available_documents_ids::AvailableDocumentsIds;
 pub use self::clear_documents::ClearDocuments;
-pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDeletionResult};
 pub use self::facet::bulk::FacetsUpdateBulk;
 pub use self::facet::incremental::FacetsUpdateIncrementalInner;
 pub use self::index_documents::{
@ -9,10 +8,6 @@ pub use self::index_documents::{
    MergeFn,
 };
 pub use self::indexer_config::IndexerConfig;
-pub use self::prefix_word_pairs::{
-    PrefixWordPairsProximityDocids, MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB,
-    MAX_PROXIMITY_FOR_PREFIX_PROXIMITY_DB,
-};
 pub use self::settings::{Setting, Settings};
 pub use self::update_step::UpdateIndexingStep;
 pub use self::word_prefix_docids::WordPrefixDocids;
@ -21,11 +16,10 @@ pub use self::words_prefixes_fst::WordsPrefixesFst;

 mod available_documents_ids;
 mod clear_documents;
-mod delete_documents;
+pub(crate) mod del_add;
 pub(crate) mod facet;
 mod index_documents;
 mod indexer_config;
-mod prefix_word_pairs;
 mod settings;
 mod update_step;
 mod word_prefix_docids;
--- a/milli/src/update/prefix_word_pairs/mod.rs
+++ b/milli/src/update/prefix_word_pairs/mod.rs
@ -1,579 +0,0 @@
-use std::borrow::Cow;
-use std::collections::HashSet;
-use std::io::{BufReader, BufWriter};
-
-use grenad::CompressionType;
-use heed::types::ByteSlice;
-
-use super::index_documents::{merge_cbo_roaring_bitmaps, CursorClonableMmap};
-use crate::{Index, Result};
-
-mod prefix_word;
-mod word_prefix;
-
-pub use prefix_word::index_prefix_word_database;
-pub use word_prefix::index_word_prefix_database;
-
-pub const MAX_PROXIMITY_FOR_PREFIX_PROXIMITY_DB: u8 = 4;
-pub const MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB: usize = 2;
-
-pub struct PrefixWordPairsProximityDocids<'t, 'u, 'i> {
-    wtxn: &'t mut heed::RwTxn<'i, 'u>,
-    index: &'i Index,
-    max_proximity: u8,
-    max_prefix_length: usize,
-    chunk_compression_type: CompressionType,
-    chunk_compression_level: Option<u32>,
-}
-impl<'t, 'u, 'i> PrefixWordPairsProximityDocids<'t, 'u, 'i> {
-    pub fn new(
-        wtxn: &'t mut heed::RwTxn<'i, 'u>,
-        index: &'i Index,
-        chunk_compression_type: CompressionType,
-        chunk_compression_level: Option<u32>,
-    ) -> Self {
-        Self {
-            wtxn,
-            index,
-            max_proximity: MAX_PROXIMITY_FOR_PREFIX_PROXIMITY_DB,
-            max_prefix_length: MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB,
-            chunk_compression_type,
-            chunk_compression_level,
-        }
-    }
-
-    #[logging_timer::time("WordPrefixPairProximityDocids::{}")]
-    pub fn execute<'a>(
-        self,
-        new_word_pair_proximity_docids: grenad::Reader<CursorClonableMmap>,
-        new_prefix_fst_words: &'a [String],
-        common_prefix_fst_words: &[&'a [String]],
-        del_prefix_fst_words: &HashSet<Vec<u8>>,
-    ) -> Result<()> {
-        puffin::profile_function!();
-
-        index_word_prefix_database(
-            self.wtxn,
-            self.index.word_pair_proximity_docids,
-            self.index.word_prefix_pair_proximity_docids,
-            self.max_proximity,
-            self.max_prefix_length,
-            new_word_pair_proximity_docids.clone(),
-            new_prefix_fst_words,
-            common_prefix_fst_words,
-            del_prefix_fst_words,
-            self.chunk_compression_type,
-            self.chunk_compression_level,
-        )?;
-
-        index_prefix_word_database(
-            self.wtxn,
-            self.index.word_pair_proximity_docids,
-            self.index.prefix_word_pair_proximity_docids,
-            self.max_proximity,
-            self.max_prefix_length,
-            new_word_pair_proximity_docids,
-            new_prefix_fst_words,
-            common_prefix_fst_words,
-            del_prefix_fst_words,
-            self.chunk_compression_type,
-            self.chunk_compression_level,
-        )?;
-
-        Ok(())
-    }
-}
-
-// This is adapted from `sorter_into_lmdb_database`
-pub fn insert_into_database(
-    wtxn: &mut heed::RwTxn,
-    database: heed::PolyDatabase,
-    new_key: &[u8],
-    new_value: &[u8],
-) -> Result<()> {
-    let mut iter = database.prefix_iter_mut::<_, ByteSlice, ByteSlice>(wtxn, new_key)?;
-    match iter.next().transpose()? {
-        Some((key, old_val)) if new_key == key => {
-            let val =
-                merge_cbo_roaring_bitmaps(key, &[Cow::Borrowed(old_val), Cow::Borrowed(new_value)])
-                    .map_err(|_| {
-                        // TODO just wrap this error?
-                        crate::error::InternalError::IndexingMergingKeys {
-                            process: "get-put-merge",
-                        }
-                    })?;
-            // safety: we use the new_key, not the one from the database iterator, to avoid undefined behaviour
-            unsafe { iter.put_current(new_key, &val)? };
-        }
-        _ => {
-            drop(iter);
-            database.put::<_, ByteSlice, ByteSlice>(wtxn, new_key, new_value)?;
-        }
-    }
-    Ok(())
-}
-
-// This is adapted from `sorter_into_lmdb_database` and `write_into_lmdb_database`,
-// but it uses `append` if the database is empty, and it assumes that the values in the
-// writer don't conflict with values in the database.
-pub fn write_into_lmdb_database_without_merging(
-    wtxn: &mut heed::RwTxn,
-    database: heed::PolyDatabase,
-    writer: grenad::Writer<BufWriter<std::fs::File>>,
-) -> Result<()> {
-    let file = writer.into_inner()?.into_inner().map_err(|err| err.into_error())?;
-    let reader = grenad::Reader::new(BufReader::new(file))?;
-    if database.is_empty(wtxn)? {
-        let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
-        let mut cursor = reader.into_cursor()?;
-        while let Some((k, v)) = cursor.move_on_next()? {
-            // safety: the key comes from the grenad reader, not the database
-            unsafe { out_iter.append(k, v)? };
-        }
-    } else {
-        let mut cursor = reader.into_cursor()?;
-        while let Some((k, v)) = cursor.move_on_next()? {
-            database.put::<_, ByteSlice, ByteSlice>(wtxn, k, v)?;
-        }
-    }
-    Ok(())
-}
-
-#[cfg(test)]
-mod tests {
-    use std::io::Cursor;
-    use std::iter::FromIterator;
-
-    use roaring::RoaringBitmap;
-
-    use crate::db_snap;
-    use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
-    use crate::index::tests::TempIndex;
-    use crate::update::{DeleteDocuments, DeletionStrategy, IndexDocumentsMethod};
-
-    fn documents_with_enough_different_words_for_prefixes(
-        prefixes: &[&str],
-        start_id: usize,
-    ) -> Vec<crate::Object> {
-        let mut documents = Vec::new();
-        let mut id = start_id;
-        for prefix in prefixes {
-            for i in 0..50 {
-                documents.push(
-                    serde_json::json!({
-                        "id": id,
-                        "text": format!("{prefix}{i:x}"),
-                    })
-                    .as_object()
-                    .unwrap()
-                    .clone(),
-                );
-                id += 1;
-            }
-        }
-        documents
-    }
-
-    #[test]
-    fn add_new_documents() {
-        let mut index = TempIndex::new();
-        index.index_documents_config.words_prefix_threshold = Some(50);
-        index.index_documents_config.autogenerate_docids = true;
-
-        index
-            .update_settings(|settings| {
-                settings.set_searchable_fields(vec!["text".to_owned()]);
-            })
-            .unwrap();
-
-        let batch_reader_from_documents = |documents| {
-            let mut builder = DocumentsBatchBuilder::new(Vec::new());
-            for object in documents {
-                builder.append_json_object(&object).unwrap();
-            }
-            DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
-        };
-
-        let mut documents = documents_with_enough_different_words_for_prefixes(&["a", "be"], 0);
-        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
-        documents.push(
-            serde_json::json!({
-                "id": "9000",
-                "text": "At an amazing and beautiful house"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-        documents.push(
-            serde_json::json!({
-                "id": "9001",
-                "text": "The bell rings at 5 am"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-
-        let documents = batch_reader_from_documents(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, word_prefix_pair_proximity_docids, "initial");
-        db_snap!(index, prefix_word_pair_proximity_docids, "initial");
-
-        let mut documents = documents_with_enough_different_words_for_prefixes(&["am", "an"], 100);
-        documents.push(
-            serde_json::json!({
-                "id": "9002",
-                "text": "At an extraordinary house"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-        let documents = batch_reader_from_documents(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, word_pair_proximity_docids, "update");
-        db_snap!(index, word_prefix_pair_proximity_docids, "update");
-        db_snap!(index, prefix_word_pair_proximity_docids, "update");
-    }
-    #[test]
-    fn batch_bug_3043() {
-        // https://github.com/meilisearch/meilisearch/issues/3043
-        let mut index = TempIndex::new();
-        index.index_documents_config.words_prefix_threshold = Some(50);
-        index.index_documents_config.autogenerate_docids = true;
-
-        index
-            .update_settings(|settings| {
-                settings.set_searchable_fields(vec!["text".to_owned()]);
-            })
-            .unwrap();
-
-        let batch_reader_from_documents = |documents| {
-            let mut builder = DocumentsBatchBuilder::new(Vec::new());
-            for object in documents {
-                builder.append_json_object(&object).unwrap();
-            }
-            DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
-        };
-
-        let mut documents = documents_with_enough_different_words_for_prefixes(&["y"], 0);
-        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
-        documents.push(
-            serde_json::json!({
-                "text": "x y"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-        documents.push(
-            serde_json::json!({
-                "text": "x a y"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-
-        let documents = batch_reader_from_documents(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, word_pair_proximity_docids);
-        db_snap!(index, word_prefix_pair_proximity_docids);
-        db_snap!(index, prefix_word_pair_proximity_docids);
-    }
-
-    #[test]
-    fn hard_delete_and_reupdate() {
-        let mut index = TempIndex::new();
-        index.index_documents_config.words_prefix_threshold = Some(50);
-
-        index
-            .update_settings(|settings| {
-                settings.set_primary_key("id".to_owned());
-                settings.set_searchable_fields(vec!["text".to_owned()]);
-            })
-            .unwrap();
-
-        let batch_reader_from_documents = |documents| {
-            let mut builder = DocumentsBatchBuilder::new(Vec::new());
-            for object in documents {
-                builder.append_json_object(&object).unwrap();
-            }
-            DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
-        };
-
-        let mut documents = documents_with_enough_different_words_for_prefixes(&["a"], 0);
-        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
-        documents.push(
-            serde_json::json!({
-                "id": 9000,
-                "text": "At an amazing and beautiful house"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-        documents.push(
-            serde_json::json!({
-                "id": 9001,
-                "text": "The bell rings at 5 am"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-
-        let documents = batch_reader_from_documents(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, documents_ids, "initial");
-        db_snap!(index, word_docids, "initial");
-        db_snap!(index, word_prefix_pair_proximity_docids, "initial");
-        db_snap!(index, prefix_word_pair_proximity_docids, "initial");
-
-        let mut wtxn = index.write_txn().unwrap();
-        let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        delete.strategy(DeletionStrategy::AlwaysHard);
-        delete.delete_documents(&RoaringBitmap::from_iter([50]));
-        delete.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, documents_ids, "first_delete");
-        db_snap!(index, word_docids, "first_delete");
-        db_snap!(index, word_prefix_pair_proximity_docids, "first_delete");
-        db_snap!(index, prefix_word_pair_proximity_docids, "first_delete");
-
-        let mut wtxn = index.write_txn().unwrap();
-        let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        delete.strategy(DeletionStrategy::AlwaysHard);
-        delete.delete_documents(&RoaringBitmap::from_iter(0..50));
-        delete.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, documents_ids, "second_delete");
-        db_snap!(index, word_docids, "second_delete");
-        db_snap!(index, word_prefix_pair_proximity_docids, "second_delete");
-        db_snap!(index, prefix_word_pair_proximity_docids, "second_delete");
-
-        let documents = documents_with_enough_different_words_for_prefixes(&["b"], 1000);
-        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
-
-        index.add_documents(batch_reader_from_documents(documents)).unwrap();
-
-        db_snap!(index, documents_ids, "reupdate");
-        db_snap!(index, word_docids, "reupdate");
-        db_snap!(index, word_prefix_pair_proximity_docids, "reupdate");
-        db_snap!(index, prefix_word_pair_proximity_docids, "reupdate");
-    }
-
-    #[test]
-    fn soft_delete_and_reupdate() {
-        let mut index = TempIndex::new();
-        index.index_documents_config.words_prefix_threshold = Some(50);
-
-        index
-            .update_settings(|settings| {
-                settings.set_primary_key("id".to_owned());
-                settings.set_searchable_fields(vec!["text".to_owned()]);
-            })
-            .unwrap();
-
-        let batch_reader_from_documents = |documents| {
-            let mut builder = DocumentsBatchBuilder::new(Vec::new());
-            for object in documents {
-                builder.append_json_object(&object).unwrap();
-            }
-            DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
-        };
-
-        let mut documents = documents_with_enough_different_words_for_prefixes(&["a"], 0);
-        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
-        documents.push(
-            serde_json::json!({
-                "id": 9000,
-                "text": "At an amazing and beautiful house"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-        documents.push(
-            serde_json::json!({
-                "id": 9001,
-                "text": "The bell rings at 5 am"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-
-        let documents = batch_reader_from_documents(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, documents_ids, "initial");
-        db_snap!(index, word_docids, "initial");
-        db_snap!(index, word_prefix_pair_proximity_docids, "initial");
-        db_snap!(index, prefix_word_pair_proximity_docids, "initial");
-
-        let mut wtxn = index.write_txn().unwrap();
-        let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        delete.strategy(DeletionStrategy::AlwaysSoft);
-        delete.delete_documents(&RoaringBitmap::from_iter([50]));
-        delete.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, documents_ids, "first_delete");
-        db_snap!(index, word_docids, "first_delete");
-        db_snap!(index, word_prefix_pair_proximity_docids, "first_delete");
-        db_snap!(index, prefix_word_pair_proximity_docids, "first_delete");
-
-        let mut wtxn = index.write_txn().unwrap();
-        let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
-        delete.strategy(DeletionStrategy::AlwaysSoft);
-
-        delete.delete_documents(&RoaringBitmap::from_iter(0..50));
-        delete.execute().unwrap();
-        wtxn.commit().unwrap();
-
-        db_snap!(index, documents_ids, "second_delete");
-        db_snap!(index, word_docids, "second_delete");
-        db_snap!(index, word_prefix_pair_proximity_docids, "second_delete");
-        db_snap!(index, prefix_word_pair_proximity_docids, "second_delete");
-
-        let documents = documents_with_enough_different_words_for_prefixes(&["b"], 1000);
-        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
-
-        index.add_documents(batch_reader_from_documents(documents)).unwrap();
-
-        db_snap!(index, documents_ids, "reupdate");
-        db_snap!(index, word_docids, "reupdate");
-        db_snap!(index, word_prefix_pair_proximity_docids, "reupdate");
-        db_snap!(index, prefix_word_pair_proximity_docids, "reupdate");
-    }
-
-    #[test]
-    fn replace_soft_deletion() {
-        let mut index = TempIndex::new();
-        index.index_documents_config.words_prefix_threshold = Some(50);
-        index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
-
-        index
-            .update_settings(|settings| {
-                settings.set_primary_key("id".to_owned());
-                settings.set_searchable_fields(vec!["text".to_owned()]);
-            })
-            .unwrap();
-
-        let batch_reader_from_documents = |documents| {
-            let mut builder = DocumentsBatchBuilder::new(Vec::new());
-            for object in documents {
-                builder.append_json_object(&object).unwrap();
-            }
-            DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
-        };
-
-        let mut documents = documents_with_enough_different_words_for_prefixes(&["a"], 0);
-        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
-        documents.push(
-            serde_json::json!({
-                "id": 9000,
-                "text": "At an amazing house"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-        documents.push(
-            serde_json::json!({
-                "id": 9001,
-                "text": "The bell rings"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-
-        let documents = batch_reader_from_documents(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, documents_ids, "initial");
-        db_snap!(index, word_docids, "initial");
-        db_snap!(index, word_prefix_pair_proximity_docids, "initial");
-        db_snap!(index, prefix_word_pair_proximity_docids, "initial");
-
-        let documents = documents_with_enough_different_words_for_prefixes(&["b"], 0);
-        index.add_documents(batch_reader_from_documents(documents)).unwrap();
-
-        db_snap!(index, documents_ids, "replaced");
-        db_snap!(index, word_docids, "replaced");
-        db_snap!(index, word_prefix_pair_proximity_docids, "replaced");
-        db_snap!(index, prefix_word_pair_proximity_docids, "replaced");
-        db_snap!(index, soft_deleted_documents_ids, "replaced", @"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, ]");
-    }
-
-    #[test]
-    fn replace_hard_deletion() {
-        let mut index = TempIndex::new();
-        index.index_documents_config.words_prefix_threshold = Some(50);
-        index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
-        index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
-
-        index
-            .update_settings(|settings| {
-                settings.set_primary_key("id".to_owned());
-                settings.set_searchable_fields(vec!["text".to_owned()]);
-            })
-            .unwrap();
-
-        let batch_reader_from_documents = |documents| {
-            let mut builder = DocumentsBatchBuilder::new(Vec::new());
-            for object in documents {
-                builder.append_json_object(&object).unwrap();
-            }
-            DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
-        };
-
-        let mut documents = documents_with_enough_different_words_for_prefixes(&["a"], 0);
-        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
-        documents.push(
-            serde_json::json!({
-                "id": 9000,
-                "text": "At an amazing house"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-        documents.push(
-            serde_json::json!({
-                "id": 9001,
-                "text": "The bell rings"
-            })
-            .as_object()
-            .unwrap()
-            .clone(),
-        );
-
-        let documents = batch_reader_from_documents(documents);
-        index.add_documents(documents).unwrap();
-
-        db_snap!(index, documents_ids, "initial");
-        db_snap!(index, word_docids, "initial");
-        db_snap!(index, word_prefix_pair_proximity_docids, "initial");
-        db_snap!(index, prefix_word_pair_proximity_docids, "initial");
-
-        let documents = documents_with_enough_different_words_for_prefixes(&["b"], 0);
-        index.add_documents(batch_reader_from_documents(documents)).unwrap();
-
-        db_snap!(index, documents_ids, "replaced");
-        db_snap!(index, word_docids, "replaced");
-        db_snap!(index, word_prefix_pair_proximity_docids, "replaced");
-        db_snap!(index, prefix_word_pair_proximity_docids, "replaced");
-        db_snap!(index, soft_deleted_documents_ids, "replaced", @"[]");
-    }
-}
--- a/milli/src/update/prefix_word_pairs/prefix_word.rs
+++ b/milli/src/update/prefix_word_pairs/prefix_word.rs
@ -1,182 +0,0 @@
-use std::borrow::Cow;
-use std::collections::{BTreeMap, HashSet};
-
-use grenad::CompressionType;
-use heed::types::ByteSlice;
-use heed::BytesDecode;
-use log::debug;
-
-use crate::update::index_documents::{create_writer, CursorClonableMmap};
-use crate::update::prefix_word_pairs::{
-    insert_into_database, write_into_lmdb_database_without_merging,
-};
-use crate::{CboRoaringBitmapCodec, Result, U8StrStrCodec, UncheckedU8StrStrCodec};
-
-#[allow(clippy::too_many_arguments)]
-#[logging_timer::time]
-pub fn index_prefix_word_database(
-    wtxn: &mut heed::RwTxn,
-    word_pair_proximity_docids: heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
-    prefix_word_pair_proximity_docids: heed::Database<U8StrStrCodec, CboRoaringBitmapCodec>,
-    max_proximity: u8,
-    max_prefix_length: usize,
-    new_word_pair_proximity_docids: grenad::Reader<CursorClonableMmap>,
-    new_prefix_fst_words: &[String],
-    common_prefix_fst_words: &[&[String]],
-    del_prefix_fst_words: &HashSet<Vec<u8>>,
-    chunk_compression_type: CompressionType,
-    chunk_compression_level: Option<u32>,
-) -> Result<()> {
-    puffin::profile_function!();
-
-    let max_proximity = max_proximity - 1;
-    debug!("Computing and writing the word prefix pair proximity docids into LMDB on disk...");
-
-    let common_prefixes: Vec<_> = common_prefix_fst_words
-        .iter()
-        .flat_map(|s| s.iter())
-        .map(|s| s.as_str())
-        .filter(|s| s.len() <= max_prefix_length)
-        .collect();
-
-    for proximity in 1..max_proximity {
-        for prefix in common_prefixes.iter() {
-            let mut prefix_key = vec![proximity];
-            prefix_key.extend_from_slice(prefix.as_bytes());
-            let mut cursor = new_word_pair_proximity_docids.clone().into_prefix_iter(prefix_key)?;
-            // This is the core of the algorithm
-            execute_on_word_pairs_and_prefixes(
-                proximity,
-                prefix.as_bytes(),
-                // the next two arguments tell how to iterate over the new word pairs
-                &mut cursor,
-                |cursor| {
-                    if let Some((key, value)) = cursor.next()? {
-                        let (_, _, word2) = UncheckedU8StrStrCodec::bytes_decode(key)
-                            .ok_or(heed::Error::Decoding)?;
-                        Ok(Some((word2, value)))
-                    } else {
-                        Ok(None)
-                    }
-                },
-                // and this argument tells what to do with each new key (proximity, prefix, word2) and value (roaring bitmap)
-                |key, value| {
-                    insert_into_database(
-                        wtxn,
-                        *prefix_word_pair_proximity_docids.as_polymorph(),
-                        key,
-                        value,
-                    )
-                },
-            )?;
-        }
-    }
-
-    // Now we do the same thing with the new prefixes and all word pairs in the DB
-    let new_prefixes: Vec<_> = new_prefix_fst_words
-        .iter()
-        .map(|s| s.as_str())
-        .filter(|s| s.len() <= max_prefix_length)
-        .collect();
-
-    // Since we read the DB, we can't write to it directly, so we add each new (word1, prefix, proximity)
-    // element in an intermediary grenad
-    let mut writer =
-        create_writer(chunk_compression_type, chunk_compression_level, tempfile::tempfile()?);
-
-    for proximity in 1..max_proximity {
-        for prefix in new_prefixes.iter() {
-            let mut prefix_key = vec![proximity];
-            prefix_key.extend_from_slice(prefix.as_bytes());
-            let mut db_iter = word_pair_proximity_docids
-                .as_polymorph()
-                .prefix_iter::<_, ByteSlice, ByteSlice>(wtxn, prefix_key.as_slice())?
-                .remap_key_type::<UncheckedU8StrStrCodec>();
-            execute_on_word_pairs_and_prefixes(
-                proximity,
-                prefix.as_bytes(),
-                &mut db_iter,
-                |db_iter| {
-                    db_iter
-                        .next()
-                        .transpose()
-                        .map(|x| x.map(|((_, _, word2), value)| (word2, value)))
-                        .map_err(|e| e.into())
-                },
-                |key, value| writer.insert(key, value).map_err(|e| e.into()),
-            )?;
-            drop(db_iter);
-        }
-    }
-
-    // and then we write the grenad into the DB
-    // Since the grenad contains only new prefixes, we know in advance that none
-    // of its elements already exist in the DB, thus there is no need to specify
-    // how to merge conflicting elements
-    write_into_lmdb_database_without_merging(
-        wtxn,
-        *prefix_word_pair_proximity_docids.as_polymorph(),
-        writer,
-    )?;
-
-    // All of the word prefix pairs in the database that have a w2
-    // that is contained in the `suppr_pw` set must be removed as well.
-    if !del_prefix_fst_words.is_empty() {
-        let mut iter =
-            prefix_word_pair_proximity_docids.remap_data_type::<ByteSlice>().iter_mut(wtxn)?;
-        while let Some(((_, prefix, _), _)) = iter.next().transpose()? {
-            if del_prefix_fst_words.contains(prefix.as_bytes()) {
-                // Delete this entry as the w2 prefix is no more in the words prefix fst.
-                unsafe { iter.del_current()? };
-            }
-        }
-    }
-
-    Ok(())
-}
-
-/// This is the core of the algorithm to initialise the Prefix Word Pair Proximity Docids database.
-///
-/// Its arguments are:
-/// - an iterator over the words following the given `prefix` with the given `proximity`
-/// - a closure to describe how to handle the new computed (proximity, prefix, word2) elements
-fn execute_on_word_pairs_and_prefixes<I>(
-    proximity: u8,
-    prefix: &[u8],
-    iter: &mut I,
-    mut next_word2_and_docids: impl for<'a> FnMut(&'a mut I) -> Result<Option<(&'a [u8], &'a [u8])>>,
-    mut insert: impl for<'a> FnMut(&'a [u8], &'a [u8]) -> Result<()>,
-) -> Result<()> {
-    let mut batch: BTreeMap<Vec<u8>, Vec<Cow<'static, [u8]>>> = BTreeMap::default();
-
-    // Memory usage check:
-    // The content of the loop will be called for each `word2` that follows a word beginning
-    // with `prefix` with the given proximity.
-    // In practice, I don't think the batch can ever get too big.
-    while let Some((word2, docids)) = next_word2_and_docids(iter)? {
-        let entry = batch.entry(word2.to_owned()).or_default();
-        entry.push(Cow::Owned(docids.to_owned()));
-    }
-
-    let mut key_buffer = Vec::with_capacity(512);
-    key_buffer.push(proximity);
-    key_buffer.extend_from_slice(prefix);
-    key_buffer.push(0);
-
-    let mut value_buffer = Vec::with_capacity(65_536);
-
-    for (word2, docids) in batch {
-        key_buffer.truncate(prefix.len() + 2);
-        value_buffer.clear();
-
-        key_buffer.extend_from_slice(&word2);
-        let data = if docids.len() > 1 {
-            CboRoaringBitmapCodec::merge_into(&docids, &mut value_buffer)?;
-            value_buffer.as_slice()
-        } else {
-            &docids[0]
-        };
-        insert(key_buffer.as_slice(), data)?;
-    }
-    Ok(())
-}
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/initial/prefix_word_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/initial/prefix_word_pair_proximity_docids.snap
@ -1,20 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  a    5                [101, ]
-1  a    amazing          [100, ]
-1  a    an               [100, ]
-1  a    and              [100, ]
-1  a    beautiful        [100, ]
-1  b    house            [100, ]
-1  b    rings            [101, ]
-1  be   house            [100, ]
-1  be   rings            [101, ]
-2  a    am               [101, ]
-2  a    amazing          [100, ]
-2  a    and              [100, ]
-2  a    beautiful        [100, ]
-2  a    house            [100, ]
-2  b    at               [101, ]
-2  be   at               [101, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/initial/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/initial/word_prefix_pair_proximity_docids.snap
@ -1,23 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  5                a    [101, ]
-1  amazing          a    [100, ]
-1  an               a    [100, ]
-1  and              b    [100, ]
-1  and              be   [100, ]
-1  at               a    [100, ]
-1  rings            a    [101, ]
-1  the              b    [101, ]
-1  the              be   [101, ]
-2  amazing          b    [100, ]
-2  amazing          be   [100, ]
-2  an               a    [100, ]
-2  at               a    [100, 101, ]
-2  bell             a    [101, ]
-3  an               b    [100, ]
-3  an               be   [100, ]
-3  at               a    [100, ]
-3  rings            a    [101, ]
-3  the              a    [101, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/prefix_word_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/prefix_word_pair_proximity_docids.snap
@ -1,29 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  a    5                [101, ]
-1  a    amazing          [100, ]
-1  a    an               [100, 202, ]
-1  a    and              [100, ]
-1  a    beautiful        [100, ]
-1  a    extraordinary    [202, ]
-1  am   and              [100, ]
-1  an   amazing          [100, ]
-1  an   beautiful        [100, ]
-1  an   extraordinary    [202, ]
-1  b    house            [100, ]
-1  b    rings            [101, ]
-1  be   house            [100, ]
-1  be   rings            [101, ]
-2  a    am               [101, ]
-2  a    amazing          [100, ]
-2  a    and              [100, ]
-2  a    beautiful        [100, ]
-2  a    extraordinary    [202, ]
-2  a    house            [100, 202, ]
-2  am   beautiful        [100, ]
-2  an   and              [100, ]
-2  an   house            [100, 202, ]
-2  b    at               [101, ]
-2  be   at               [101, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_pair_proximity_docids.snap
@ -1,33 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  5                am               [101, ]
-1  amazing          and              [100, ]
-1  an               amazing          [100, ]
-1  an               extraordinary    [202, ]
-1  and              beautiful        [100, ]
-1  at               5                [101, ]
-1  at               an               [100, 202, ]
-1  beautiful        house            [100, ]
-1  bell             rings            [101, ]
-1  extraordinary    house            [202, ]
-1  rings            at               [101, ]
-1  the              bell             [101, ]
-2  amazing          beautiful        [100, ]
-2  an               and              [100, ]
-2  an               house            [202, ]
-2  and              house            [100, ]
-2  at               am               [101, ]
-2  at               amazing          [100, ]
-2  at               extraordinary    [202, ]
-2  bell             at               [101, ]
-2  rings            5                [101, ]
-2  the              rings            [101, ]
-3  amazing          house            [100, ]
-3  an               beautiful        [100, ]
-3  at               and              [100, ]
-3  at               house            [202, ]
-3  bell             5                [101, ]
-3  rings            am               [101, ]
-3  the              at               [101, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/add_new_documents/update/word_prefix_pair_proximity_docids.snap
@ -1,31 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  5                a    [101, ]
-1  5                am   [101, ]
-1  amazing          a    [100, ]
-1  amazing          an   [100, ]
-1  an               a    [100, ]
-1  an               am   [100, ]
-1  and              b    [100, ]
-1  and              be   [100, ]
-1  at               a    [100, 202, ]
-1  at               an   [100, 202, ]
-1  rings            a    [101, ]
-1  the              b    [101, ]
-1  the              be   [101, ]
-2  amazing          b    [100, ]
-2  amazing          be   [100, ]
-2  an               a    [100, ]
-2  an               an   [100, ]
-2  at               a    [100, 101, ]
-2  at               am   [100, 101, ]
-2  bell             a    [101, ]
-3  an               b    [100, ]
-3  an               be   [100, ]
-3  at               a    [100, ]
-3  at               an   [100, ]
-3  rings            a    [101, ]
-3  rings            am   [101, ]
-3  the              a    [101, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/batch_bug_3043/prefix_word_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/batch_bug_3043/prefix_word_pair_proximity_docids.snap
@ -1,4 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/batch_bug_3043/word_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/batch_bug_3043/word_pair_proximity_docids.snap
@ -1,8 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  a                y                [51, ]
-1  x                a                [51, ]
-1  x                y                [50, ]
-2  x                y                [51, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/batch_bug_3043/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/batch_bug_3043/word_prefix_pair_proximity_docids.snap
@ -1,7 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  a                y    [51, ]
-1  x                y    [50, ]
-2  x                y    [51, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/documents_ids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/documents_ids.snap
@ -1,4 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 51, ]
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/prefix_word_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/prefix_word_pair_proximity_docids.snap
@ -1,6 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  a    5                [51, ]
-2  a    am               [51, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/word_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/word_docids.snap
@ -1,60 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-5                [51, ]
-a0               [0, ]
-a1               [1, ]
-a10              [16, ]
-a11              [17, ]
-a12              [18, ]
-a13              [19, ]
-a14              [20, ]
-a15              [21, ]
-a16              [22, ]
-a17              [23, ]
-a18              [24, ]
-a19              [25, ]
-a1a              [26, ]
-a1b              [27, ]
-a1c              [28, ]
-a1d              [29, ]
-a1e              [30, ]
-a1f              [31, ]
-a2               [2, ]
-a20              [32, ]
-a21              [33, ]
-a22              [34, ]
-a23              [35, ]
-a24              [36, ]
-a25              [37, ]
-a26              [38, ]
-a27              [39, ]
-a28              [40, ]
-a29              [41, ]
-a2a              [42, ]
-a2b              [43, ]
-a2c              [44, ]
-a2d              [45, ]
-a2e              [46, ]
-a2f              [47, ]
-a3               [3, ]
-a30              [48, ]
-a31              [49, ]
-a4               [4, ]
-a5               [5, ]
-a6               [6, ]
-a7               [7, ]
-a8               [8, ]
-a9               [9, ]
-aa               [10, ]
-ab               [11, ]
-ac               [12, ]
-ad               [13, ]
-ae               [14, ]
-af               [15, ]
-am               [51, ]
-at               [51, ]
-bell             [51, ]
-rings            [51, ]
-the              [51, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/first_delete/word_prefix_pair_proximity_docids.snap
@ -1,10 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  5                a    [51, ]
-1  rings            a    [51, ]
-2  at               a    [51, ]
-2  bell             a    [51, ]
-3  rings            a    [51, ]
-3  the              a    [51, ]
-
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/initial/documents_ids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/initial/documents_ids.snap
@ -1,4 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, ]
--- a/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/initial/prefix_word_pair_proximity_docids.snap
+++ b/milli/src/update/prefix_word_pairs/snapshots/mod.rs/hard_delete_and_reupdate/initial/prefix_word_pair_proximity_docids.snap
@ -1,14 +0,0 @@
---
-source: milli/src/update/prefix_word_pairs/mod.rs
---
-1  a    5                [51, ]
-1  a    amazing          [50, ]
-1  a    an               [50, ]
-1  a    and              [50, ]
-1  a    beautiful        [50, ]
-2  a    am               [51, ]
-2  a    amazing          [50, ]
-2  a    and              [50, ]
-2  a    beautiful        [50, ]
-2  a    house            [50, ]
-
--- a/Show More
+++ b/Show More