Compare commits

...

5 Commits

Author SHA1 Message Date
Clément Renault
00315a1989
Merge 94a1f5a8ea into 057fcb3993 2024-11-20 09:11:38 +03:00
Lukas Kalbertodt
057fcb3993
Add indices field to _matchesPosition to specify where in an array a match comes from (#5005)
Some checks are pending
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h4m31s
* Remove unreachable code

* Add `indices` field to `MatchBounds`

For matches inside arrays, this field holds the indices of the array
elements that matched. For example, searching for `cat` inside
`{ "a": ["dog", "cat", "fox"] }` would return `indices: [1]`. For nested
arrays, this contains multiple indices, starting with the one for the
top-most array. For matches in fields without arrays, `indices` is not
serialized (does not exist) to save space.
2024-11-20 01:00:43 +01:00
meili-bors[bot]
c1d8ee2a8d
Merge #5048
Some checks failed
Test suite / Tests almost all features (push) Has been skipped
Test suite / Test disabled tokenization (push) Has been skipped
Test suite / Tests on ubuntu-20.04 (push) Failing after 14s
Test suite / Run tests in debug (push) Failing after 24s
Test suite / Tests on ${{ matrix.os }} (windows-2022) (push) Failing after 57s
Test suite / Run Rustfmt (push) Successful in 1m36s
Test suite / Run Clippy (push) Successful in 6m8s
Indexing bench (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of indexing (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for geo (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for songs (push) / Run and upload benchmarks (push) Waiting to run
Benchmarks of search for Wikipedia articles (push) / Run and upload benchmarks (push) Waiting to run
Run the indexing fuzzer / Setup the action (push) Successful in 1h4m23s
Test suite / Tests on ${{ matrix.os }} (macos-13) (push) Has been cancelled
5048: Reverse the order of the task queue r=Kerollmops a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5047

## What does this PR do?
- Provide a new parameter to reverse the order of the task queue
- Add tests
- Remove some unrelated tests that were duplicated in tests/tasks/mod.rs and tests/tasks/error.rs


Co-authored-by: Tamo <tamo@meilisearch.com>
2024-11-18 16:24:12 +00:00
Tamo
2eb1801e85 reverse the order of the task queue 2024-11-07 19:19:44 +01:00
Clément Renault
94a1f5a8ea
First draft just for the commands 2024-10-31 16:30:05 +01:00
10 changed files with 275 additions and 230 deletions

View File

@ -48,6 +48,27 @@ cargo xtask bench --no-dashboard -- workloads/my_workload_1.json workloads/my_wo
For processing the results, look at [Looking at benchmark results/Without dashboard](#without-dashboard).
#### Sending a workload by hand
Sometimes you want to visualize the metrics of a worlkoad that comes from a custom report.
It is not quite easy to trick the benchboard in thinking that your report is legitimate but here are the commands you can run to upload your firefox report on a running benchboard.
```bash
# Name this hostname whatever you want
echo '{ "hostname": "the-best-place" }' | xh PUT 'http://127.0.0.1:9001/api/v1/machine'
# You'll receive an UUID from this command that we will call $invocation_uuid
echo '{ "commit": { "sha1": "1234567", "commit_date": "2024-09-05 12:00:12.0 +00:00:00", "message": "A cool message" }, "machine_hostname": "the-best-place", "max_workloads": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/invocation'
# Just use UUID from the previous command
# and you'll receive another UUID that we will call $workload_uuid
echo '{ "invocation_uuid": "$invocation_uuid", "name": "toto", "max_runs": 1 }' | xh PUT 'http://127.0.0.1:9001/api/v1/workload'
# And now use your $workload_uuid and the content of your firefox report
# but don't forget to convert your firefox report from JSONLines into an object
echo '{ "workload_uuid": "$workload_uuid", "data": $REPORT_JSON_DATA }' | xh PUT 'http://127.0.0.1:9001/api/v1/run'
```
### In CI
We have dedicated runners to run workloads on CI. Currently, there are three ways of running the CI:

View File

@ -84,6 +84,8 @@ pub struct Query {
pub limit: Option<u32>,
/// The minimum [task id](`meilisearch_types::tasks::Task::uid`) to be matched
pub from: Option<u32>,
/// The order used to return the tasks. By default the newest tasks are returned first and the boolean is `false`.
pub reverse: Option<bool>,
/// The allowed [statuses](`meilisearch_types::tasks::Task::status`) of the matched tasls
pub statuses: Option<Vec<Status>>,
/// The allowed [kinds](meilisearch_types::tasks::Kind) of the matched tasks.
@ -126,6 +128,7 @@ impl Query {
Query {
limit: None,
from: None,
reverse: None,
statuses: None,
types: None,
index_uids: None,
@ -706,7 +709,12 @@ impl IndexScheduler {
let mut tasks = self.all_task_ids(rtxn)?;
if let Some(from) = &query.from {
tasks.remove_range(from.saturating_add(1)..);
let range = if query.reverse.unwrap_or_default() {
u32::MIN..*from
} else {
from.saturating_add(1)..u32::MAX
};
tasks.remove_range(range);
}
if let Some(status) = &query.statuses {
@ -826,7 +834,11 @@ impl IndexScheduler {
)?;
if let Some(limit) = query.limit {
tasks = tasks.into_iter().rev().take(limit as usize).collect();
tasks = if query.reverse.unwrap_or_default() {
tasks.into_iter().take(limit as usize).collect()
} else {
tasks.into_iter().rev().take(limit as usize).collect()
};
}
Ok(tasks)
@ -951,10 +963,13 @@ impl IndexScheduler {
let rtxn = self.env.read_txn()?;
let (tasks, total) = self.get_task_ids_from_authorized_indexes(&rtxn, &query, filters)?;
let tasks = self.get_existing_tasks(
&rtxn,
tasks.into_iter().rev().take(query.limit.unwrap_or(u32::MAX) as usize),
)?;
let tasks = if query.reverse.unwrap_or_default() {
Box::new(tasks.into_iter()) as Box<dyn Iterator<Item = u32>>
} else {
Box::new(tasks.into_iter().rev()) as Box<dyn Iterator<Item = u32>>
};
let tasks =
self.get_existing_tasks(&rtxn, tasks.take(query.limit.unwrap_or(u32::MAX) as usize))?;
let ProcessingTasks { started_at, processing, .. } =
self.processing_tasks.read().map_err(|_| Error::CorruptedTaskQueue)?.clone();

View File

@ -318,6 +318,7 @@ InvalidTaskBeforeStartedAt , InvalidRequest , BAD_REQUEST ;
InvalidTaskCanceledBy , InvalidRequest , BAD_REQUEST ;
InvalidTaskFrom , InvalidRequest , BAD_REQUEST ;
InvalidTaskLimit , InvalidRequest , BAD_REQUEST ;
InvalidTaskReverse , InvalidRequest , BAD_REQUEST ;
InvalidTaskStatuses , InvalidRequest , BAD_REQUEST ;
InvalidTaskTypes , InvalidRequest , BAD_REQUEST ;
InvalidTaskUids , InvalidRequest , BAD_REQUEST ;

View File

@ -42,6 +42,8 @@ pub struct TasksFilterQuery {
pub limit: Param<u32>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskFrom>)]
pub from: Option<Param<TaskId>>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskReverse>)]
pub reverse: Option<Param<bool>>,
#[deserr(default, error = DeserrQueryParamError<InvalidTaskUids>)]
pub uids: OptionStarOrList<u32>,
@ -73,6 +75,7 @@ impl TasksFilterQuery {
Query {
limit: Some(self.limit.0),
from: self.from.as_deref().copied(),
reverse: self.reverse.as_deref().copied(),
statuses: self.statuses.merge_star_and_none(),
types: self.types.merge_star_and_none(),
index_uids: self.index_uids.map(|x| x.to_string()).merge_star_and_none(),
@ -142,6 +145,7 @@ impl TaskDeletionOrCancelationQuery {
Query {
limit: None,
from: None,
reverse: None,
statuses: self.statuses.merge_star_and_none(),
types: self.types.merge_star_and_none(),
index_uids: self.index_uids.map(|x| x.to_string()).merge_star_and_none(),
@ -701,14 +705,14 @@ mod tests {
{
let params = "from=12&limit=15&indexUids=toto,tata-78&statuses=succeeded,enqueued&afterEnqueuedAt=2012-04-23&uids=1,2,3";
let query = deserr_query_params::<TasksFilterQuery>(params).unwrap();
snapshot!(format!("{:?}", query), @r###"TasksFilterQuery { limit: Param(15), from: Some(Param(12)), uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: List([Succeeded, Enqueued]), index_uids: List([IndexUid("toto"), IndexUid("tata-78")]), after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }"###);
snapshot!(format!("{:?}", query), @r###"TasksFilterQuery { limit: Param(15), from: Some(Param(12)), reverse: None, uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: List([Succeeded, Enqueued]), index_uids: List([IndexUid("toto"), IndexUid("tata-78")]), after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }"###);
}
{
// Stars should translate to `None` in the query
// Verify value of the default limit
let params = "indexUids=*&statuses=succeeded,*&afterEnqueuedAt=2012-04-23&uids=1,2,3";
let query = deserr_query_params::<TasksFilterQuery>(params).unwrap();
snapshot!(format!("{:?}", query), @"TasksFilterQuery { limit: Param(20), from: None, uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: Star, index_uids: Star, after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }");
snapshot!(format!("{:?}", query), @"TasksFilterQuery { limit: Param(20), from: None, reverse: None, uids: List([1, 2, 3]), canceled_by: None, types: None, statuses: Star, index_uids: Star, after_enqueued_at: Other(2012-04-24 0:00:00.0 +00:00:00), before_enqueued_at: None, after_started_at: None, before_started_at: None, after_finished_at: None, before_finished_at: None }");
}
{
// Stars should also translate to `None` in task deletion/cancelation queries

View File

@ -1733,7 +1733,10 @@ fn format_fields(
// select the attributes to retrieve
let displayable_names =
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
permissive_json_pointer::map_leaf_values(&mut document, displayable_names, |key, value| {
permissive_json_pointer::map_leaf_values(
&mut document,
displayable_names,
|key, array_indices, value| {
// To get the formatting option of each key we need to see all the rules that applies
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
@ -1764,6 +1767,7 @@ fn format_fields(
format,
&mut infos,
compute_matches,
array_indices,
locales,
);
@ -1772,7 +1776,8 @@ fn format_fields(
matches.insert(key.to_owned(), infos);
}
}
});
},
);
let selectors = formatted_options
.keys()
@ -1790,13 +1795,14 @@ fn format_value(
format_options: Option<FormatOptions>,
infos: &mut Vec<MatchBounds>,
compute_matches: bool,
array_indices: &[usize],
locales: Option<&[Language]>,
) -> Value {
match value {
Value::String(old_string) => {
let mut matcher = builder.build(&old_string, locales);
if compute_matches {
let matches = matcher.matches();
let matches = matcher.matches(array_indices);
infos.extend_from_slice(&matches[..]);
}
@ -1808,51 +1814,15 @@ fn format_value(
None => Value::String(old_string),
}
}
Value::Array(values) => Value::Array(
values
.into_iter()
.map(|v| {
format_value(
v,
builder,
format_options.map(|format_options| FormatOptions {
highlight: format_options.highlight,
crop: None,
}),
infos,
compute_matches,
locales,
)
})
.collect(),
),
Value::Object(object) => Value::Object(
object
.into_iter()
.map(|(k, v)| {
(
k,
format_value(
v,
builder,
format_options.map(|format_options| FormatOptions {
highlight: format_options.highlight,
crop: None,
}),
infos,
compute_matches,
locales,
),
)
})
.collect(),
),
// `map_leaf_values` makes sure this is only called for leaf fields
Value::Array(_) => unreachable!(),
Value::Object(_) => unreachable!(),
Value::Number(number) => {
let s = number.to_string();
let mut matcher = builder.build(&s, locales);
if compute_matches {
let matches = matcher.matches();
let matches = matcher.matches(array_indices);
infos.extend_from_slice(&matches[..]);
}

View File

@ -208,7 +208,10 @@ async fn format_nested() {
"doggos.name": [
{
"start": 0,
"length": 5
"length": 5,
"indices": [
0
]
}
]
}

View File

@ -279,6 +279,55 @@ async fn task_bad_from() {
"###);
}
#[actix_rt::test]
async fn task_bad_reverse() {
let server = Server::new_shared();
let (response, code) = server.tasks_filter("reverse=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value in parameter `reverse`: could not parse `doggo` as a boolean, expected either `true` or `false`",
"code": "invalid_task_reverse",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_reverse"
}
"###);
let (response, code) = server.tasks_filter("reverse=*").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid value in parameter `reverse`: could not parse `*` as a boolean, expected either `true` or `false`",
"code": "invalid_task_reverse",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_reverse"
}
"###);
let (response, code) = server.cancel_tasks("reverse=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown parameter `reverse`: expected one of `uids`, `canceledBy`, `types`, `statuses`, `indexUids`, `afterEnqueuedAt`, `beforeEnqueuedAt`, `afterStartedAt`, `beforeStartedAt`, `afterFinishedAt`, `beforeFinishedAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
let (response, code) = server.delete_tasks("reverse=doggo").await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Unknown parameter `reverse`: expected one of `uids`, `canceledBy`, `types`, `statuses`, `indexUids`, `afterEnqueuedAt`, `beforeEnqueuedAt`, `afterStartedAt`, `beforeStartedAt`, `afterFinishedAt`, `beforeFinishedAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
}
#[actix_rt::test]
async fn task_bad_after_enqueued_at() {
let server = Server::new_shared();

View File

@ -62,6 +62,44 @@ async fn list_tasks() {
assert_eq!(response["results"].as_array().unwrap().len(), 2);
}
#[actix_rt::test]
async fn list_tasks_pagination_and_reverse() {
let server = Server::new().await;
// First of all we want to create a lot of tasks very quickly. The fastest way is to delete a lot of unexisting indexes
let mut last_task = None;
for i in 0..10 {
let index = server.index(format!("test-{i}"));
last_task = Some(index.create(None).await.0.uid());
}
server.wait_task(last_task.unwrap()).await;
let (response, code) = server.tasks_filter("limit=3").await;
assert_eq!(code, 200);
let results = response["results"].as_array().unwrap();
let task_ids: Vec<_> = results.iter().map(|ret| ret["uid"].as_u64().unwrap()).collect();
snapshot!(format!("{task_ids:?}"), @"[9, 8, 7]");
let (response, code) = server.tasks_filter("limit=3&from=1").await;
assert_eq!(code, 200);
let results = response["results"].as_array().unwrap();
let task_ids: Vec<_> = results.iter().map(|ret| ret["uid"].as_u64().unwrap()).collect();
snapshot!(format!("{task_ids:?}"), @"[1, 0]");
// In reversed order
let (response, code) = server.tasks_filter("limit=3&reverse=true").await;
assert_eq!(code, 200);
let results = response["results"].as_array().unwrap();
let task_ids: Vec<_> = results.iter().map(|ret| ret["uid"].as_u64().unwrap()).collect();
snapshot!(format!("{task_ids:?}"), @"[0, 1, 2]");
let (response, code) = server.tasks_filter("limit=3&from=8&reverse=true").await;
assert_eq!(code, 200);
let results = response["results"].as_array().unwrap();
let task_ids: Vec<_> = results.iter().map(|ret| ret["uid"].as_u64().unwrap()).collect();
snapshot!(format!("{task_ids:?}"), @"[8, 9]");
}
#[actix_rt::test]
async fn list_tasks_with_star_filters() {
let server = Server::new().await;
@ -193,131 +231,6 @@ async fn list_tasks_status_and_type_filtered() {
assert_eq!(response["results"].as_array().unwrap().len(), 2);
}
#[actix_rt::test]
async fn get_task_filter_error() {
let server = Server::new().await;
let (response, code) = server.tasks_filter("lol=pied").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown parameter `lol`: expected one of `limit`, `from`, `uids`, `canceledBy`, `types`, `statuses`, `indexUids`, `afterEnqueuedAt`, `beforeEnqueuedAt`, `afterStartedAt`, `beforeStartedAt`, `afterFinishedAt`, `beforeFinishedAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
let (response, code) = server.tasks_filter("uids=pied").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Invalid value in parameter `uids`: could not parse `pied` as a positive integer",
"code": "invalid_task_uids",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_uids"
}
"###);
let (response, code) = server.tasks_filter("from=pied").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Invalid value in parameter `from`: could not parse `pied` as a positive integer",
"code": "invalid_task_from",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_from"
}
"###);
let (response, code) = server.tasks_filter("beforeStartedAt=pied").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Invalid value in parameter `beforeStartedAt`: `pied` is an invalid date-time. It should follow the YYYY-MM-DD or RFC 3339 date-time format.",
"code": "invalid_task_before_started_at",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_before_started_at"
}
"###);
}
#[actix_rt::test]
async fn delete_task_filter_error() {
let server = Server::new().await;
let (response, code) = server.delete_tasks("").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Query parameters to filter the tasks to delete are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.",
"code": "missing_task_filters",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_task_filters"
}
"###);
let (response, code) = server.delete_tasks("lol=pied").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown parameter `lol`: expected one of `uids`, `canceledBy`, `types`, `statuses`, `indexUids`, `afterEnqueuedAt`, `beforeEnqueuedAt`, `afterStartedAt`, `beforeStartedAt`, `afterFinishedAt`, `beforeFinishedAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
let (response, code) = server.delete_tasks("uids=pied").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Invalid value in parameter `uids`: could not parse `pied` as a positive integer",
"code": "invalid_task_uids",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_uids"
}
"###);
}
#[actix_rt::test]
async fn cancel_task_filter_error() {
let server = Server::new().await;
let (response, code) = server.cancel_tasks("").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Query parameters to filter the tasks to cancel are missing. Available query parameters are: `uids`, `indexUids`, `statuses`, `types`, `canceledBy`, `beforeEnqueuedAt`, `afterEnqueuedAt`, `beforeStartedAt`, `afterStartedAt`, `beforeFinishedAt`, `afterFinishedAt`.",
"code": "missing_task_filters",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_task_filters"
}
"###);
let (response, code) = server.cancel_tasks("lol=pied").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown parameter `lol`: expected one of `uids`, `canceledBy`, `types`, `statuses`, `indexUids`, `afterEnqueuedAt`, `beforeEnqueuedAt`, `afterStartedAt`, `beforeStartedAt`, `afterFinishedAt`, `beforeFinishedAt`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"
}
"###);
let (response, code) = server.cancel_tasks("uids=pied").await;
assert_eq!(code, 400, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Invalid value in parameter `uids`: could not parse `pied` as a positive integer",
"code": "invalid_task_uids",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_task_uids"
}
"###);
}
macro_rules! assert_valid_summarized_task {
($response:expr, $task_type:literal, $index:literal) => {{
assert_eq!($response.as_object().unwrap().len(), 5);

View File

@ -105,6 +105,8 @@ impl FormatOptions {
pub struct MatchBounds {
pub start: usize,
pub length: usize,
#[serde(skip_serializing_if = "Option::is_none")]
pub indices: Option<Vec<usize>>,
}
/// Structure used to analyze a string, compute words that match,
@ -220,15 +222,20 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
}
/// Returns boundaries of the words that match the query.
pub fn matches(&mut self) -> Vec<MatchBounds> {
pub fn matches(&mut self, array_indices: &[usize]) -> Vec<MatchBounds> {
match &self.matches {
None => self.compute_matches().matches(),
None => self.compute_matches().matches(array_indices),
Some((tokens, matches)) => matches
.iter()
.map(|m| MatchBounds {
start: tokens[m.get_first_token_pos()].byte_start,
// TODO: Why is this in chars, while start is in bytes?
length: m.char_count,
indices: if array_indices.is_empty() {
None
} else {
Some(array_indices.to_owned())
},
})
.collect(),
}

View File

@ -45,7 +45,7 @@ fn contained_in(selector: &str, key: &str) -> bool {
/// map_leaf_values(
/// value.as_object_mut().unwrap(),
/// ["jean.race.name"],
/// |key, value| match (value, key) {
/// |key, _array_indices, value| match (value, key) {
/// (Value::String(name), "jean.race.name") => *name = "patou".to_string(),
/// _ => unreachable!(),
/// },
@ -66,17 +66,18 @@ fn contained_in(selector: &str, key: &str) -> bool {
pub fn map_leaf_values<'a>(
value: &mut Map<String, Value>,
selectors: impl IntoIterator<Item = &'a str>,
mut mapper: impl FnMut(&str, &mut Value),
mut mapper: impl FnMut(&str, &[usize], &mut Value),
) {
let selectors: Vec<_> = selectors.into_iter().collect();
map_leaf_values_in_object(value, &selectors, "", &mut mapper);
map_leaf_values_in_object(value, &selectors, "", &[], &mut mapper);
}
pub fn map_leaf_values_in_object(
value: &mut Map<String, Value>,
selectors: &[&str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
array_indices: &[usize],
mapper: &mut impl FnMut(&str, &[usize], &mut Value),
) {
for (key, value) in value.iter_mut() {
let base_key = if base_key.is_empty() {
@ -94,12 +95,12 @@ pub fn map_leaf_values_in_object(
if should_continue {
match value {
Value::Object(object) => {
map_leaf_values_in_object(object, selectors, &base_key, mapper)
map_leaf_values_in_object(object, selectors, &base_key, array_indices, mapper)
}
Value::Array(array) => {
map_leaf_values_in_array(array, selectors, &base_key, mapper)
map_leaf_values_in_array(array, selectors, &base_key, array_indices, mapper)
}
value => mapper(&base_key, value),
value => mapper(&base_key, array_indices, value),
}
}
}
@ -109,13 +110,24 @@ pub fn map_leaf_values_in_array(
values: &mut [Value],
selectors: &[&str],
base_key: &str,
mapper: &mut impl FnMut(&str, &mut Value),
base_array_indices: &[usize],
mapper: &mut impl FnMut(&str, &[usize], &mut Value),
) {
for value in values.iter_mut() {
// This avoids allocating twice
let mut array_indices = Vec::with_capacity(base_array_indices.len() + 1);
array_indices.extend_from_slice(base_array_indices);
array_indices.push(0);
for (i, value) in values.iter_mut().enumerate() {
*array_indices.last_mut().unwrap() = i;
match value {
Value::Object(object) => map_leaf_values_in_object(object, selectors, base_key, mapper),
Value::Array(array) => map_leaf_values_in_array(array, selectors, base_key, mapper),
value => mapper(base_key, value),
Value::Object(object) => {
map_leaf_values_in_object(object, selectors, base_key, &array_indices, mapper)
}
Value::Array(array) => {
map_leaf_values_in_array(array, selectors, base_key, &array_indices, mapper)
}
value => mapper(base_key, &array_indices, value),
}
}
}
@ -743,12 +755,14 @@ mod tests {
}
});
map_leaf_values(value.as_object_mut().unwrap(), ["jean.race.name"], |key, value| {
match (value, key) {
map_leaf_values(
value.as_object_mut().unwrap(),
["jean.race.name"],
|key, _, value| match (value, key) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
_ => unreachable!(),
}
});
},
);
assert_eq!(
value,
@ -775,7 +789,7 @@ mod tests {
});
let mut calls = 0;
map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, value| {
map_leaf_values(value.as_object_mut().unwrap(), ["jean"], |key, _, value| {
calls += 1;
match (value, key) {
(Value::String(name), "jean.race.name") => *name = S("patou"),
@ -798,4 +812,52 @@ mod tests {
})
);
}
#[test]
fn map_array() {
let mut value: Value = json!({
"no_array": "peter",
"simple": ["foo", "bar"],
"nested": [
{
"a": [
["cat", "dog"],
["fox", "bear"],
],
"b": "hi",
},
{
"a": ["green", "blue"],
},
],
});
map_leaf_values(
value.as_object_mut().unwrap(),
["no_array", "simple", "nested"],
|_key, array_indices, value| {
*value = format!("{array_indices:?}").into();
},
);
assert_eq!(
value,
json!({
"no_array": "[]",
"simple": ["[0]", "[1]"],
"nested": [
{
"a": [
["[0, 0, 0]", "[0, 0, 1]"],
["[0, 1, 0]", "[0, 1, 1]"],
],
"b": "[0]",
},
{
"a": ["[1, 0]", "[1, 1]"],
},
],
})
);
}
}