fix the mixed dataset geosearch indexing bug

This commit is contained in:
Tamo 2022-05-16 15:55:18 +02:00
parent 65e6aa0de2
commit 0af399a6d7
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
2 changed files with 56 additions and 17 deletions

View File

@ -35,26 +35,28 @@ pub fn extract_geo_points<R: io::Read + io::Seek>(
};
// first we get the two fields
let lat = obkv
.get(lat_fid)
.ok_or_else(|| GeoError::MissingLatitude { document_id: primary_key() })?;
let lng = obkv
.get(lng_fid)
.ok_or_else(|| GeoError::MissingLongitude { document_id: primary_key() })?;
let lat = obkv.get(lat_fid);
let lng = obkv.get(lng_fid);
// then we extract the values
let lat = extract_float_from_value(
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
)
.map_err(|lat| GeoError::BadLatitude { document_id: primary_key(), value: lat })?;
if let Some((lat, lng)) = lat.zip(lng) {
// then we extract the values
let lat = extract_float_from_value(
serde_json::from_slice(lat).map_err(InternalError::SerdeJson)?,
)
.map_err(|lat| GeoError::BadLatitude { document_id: primary_key(), value: lat })?;
let lng = extract_float_from_value(
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
)
.map_err(|lng| GeoError::BadLongitude { document_id: primary_key(), value: lng })?;
let lng = extract_float_from_value(
serde_json::from_slice(lng).map_err(InternalError::SerdeJson)?,
)
.map_err(|lng| GeoError::BadLongitude { document_id: primary_key(), value: lng })?;
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
writer.insert(docid_bytes, bytes)?;
let bytes: [u8; 16] = concat_arrays![lat.to_ne_bytes(), lng.to_ne_bytes()];
writer.insert(docid_bytes, bytes)?;
} else if lat.is_none() && lng.is_some() {
return Err(GeoError::MissingLatitude { document_id: primary_key() })?;
} else if lat.is_some() && lng.is_none() {
return Err(GeoError::MissingLongitude { document_id: primary_key() })?;
}
}
Ok(writer_into_reader(writer)?)

View File

@ -1006,6 +1006,43 @@ mod tests {
wtxn.commit().unwrap();
}
#[test]
fn mixed_geo_documents() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// We send 6 documents and mix the ones that have _geo and those that don't have it.
let mut wtxn = index.write_txn().unwrap();
let documents = documents!([
{ "id": 2, "price": 3.5, "_geo": { "lat": 12, "lng": 42 } },
{ "id": 456 },
{ "id": 1 },
{ "id": 1344 },
{ "id": 4 },
{ "id": 42, "_geo": { "lat": 35, "lng": 23 } }
]);
let config = IndexerConfig::default();
let indexing_config = IndexDocumentsConfig {
update_method: IndexDocumentsMethod::ReplaceDocuments,
..Default::default()
};
let mut builder =
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| ()).unwrap();
builder.add_documents(documents).unwrap();
builder.execute().unwrap();
wtxn.commit().unwrap();
let mut wtxn = index.write_txn().unwrap();
let mut builder = update::Settings::new(&mut wtxn, &index, &config);
let faceted_fields = hashset!(S("_geo"));
builder.set_filterable_fields(faceted_fields);
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();
}
#[test]
fn index_all_flavour_of_geo() {
let path = tempfile::tempdir().unwrap();