2020-06-09 00:05:14 +08:00
|
|
|
use std::cmp;
|
2020-06-11 03:35:01 +08:00
|
|
|
use std::time::Instant;
|
|
|
|
|
2020-06-14 18:51:54 +08:00
|
|
|
use crate::iter_shortest_paths::astar_bag;
|
2020-06-09 00:05:14 +08:00
|
|
|
|
|
|
|
const ONE_ATTRIBUTE: u32 = 1000;
|
|
|
|
const MAX_DISTANCE: u32 = 8;
|
|
|
|
|
2020-06-09 23:32:25 +08:00
|
|
|
fn index_proximity(lhs: u32, rhs: u32) -> u32 {
|
2020-06-10 22:27:02 +08:00
|
|
|
if lhs <= rhs {
|
2020-06-09 23:32:25 +08:00
|
|
|
cmp::min(rhs - lhs, MAX_DISTANCE)
|
|
|
|
} else {
|
2020-06-16 18:10:23 +08:00
|
|
|
cmp::min((lhs - rhs) + 1, MAX_DISTANCE)
|
2020-06-09 23:32:25 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-16 18:10:23 +08:00
|
|
|
pub fn positions_proximity(lhs: u32, rhs: u32) -> u32 {
|
2020-06-09 23:32:25 +08:00
|
|
|
let (lhs_attr, lhs_index) = extract_position(lhs);
|
|
|
|
let (rhs_attr, rhs_index) = extract_position(rhs);
|
|
|
|
if lhs_attr != rhs_attr { MAX_DISTANCE }
|
|
|
|
else { index_proximity(lhs_index, rhs_index) }
|
|
|
|
}
|
|
|
|
|
2020-06-09 00:05:14 +08:00
|
|
|
// Returns the attribute and index parts.
|
|
|
|
fn extract_position(position: u32) -> (u32, u32) {
|
|
|
|
(position / ONE_ATTRIBUTE, position % ONE_ATTRIBUTE)
|
|
|
|
}
|
|
|
|
|
2020-06-12 18:53:08 +08:00
|
|
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
|
|
|
enum Node {
|
|
|
|
// Is this node is the first node.
|
|
|
|
Uninit,
|
|
|
|
Init {
|
|
|
|
// The layer where this node located.
|
|
|
|
layer: usize,
|
|
|
|
// The position where this node is located.
|
|
|
|
position: u32,
|
2020-06-12 20:08:10 +08:00
|
|
|
// The total accumulated proximity until this node, used for skipping nodes.
|
|
|
|
acc_proximity: u32,
|
2020-06-14 18:51:54 +08:00
|
|
|
// The parent position from the above layer.
|
|
|
|
parent_position: u32,
|
2020-06-12 18:53:08 +08:00
|
|
|
},
|
|
|
|
}
|
2020-06-09 00:05:14 +08:00
|
|
|
|
2020-06-12 18:53:08 +08:00
|
|
|
impl Node {
|
|
|
|
// TODO we must skip the successors that have already been seen
|
2020-06-11 23:43:06 +08:00
|
|
|
// TODO we must skip the successors that doesn't return any documents
|
|
|
|
// this way we are able to skip entire paths
|
2020-06-14 18:51:54 +08:00
|
|
|
fn successors(&self, positions: &[Vec<u32>], best_proximity: u32) -> Vec<(Node, u32)> {
|
2020-06-12 18:53:08 +08:00
|
|
|
match self {
|
|
|
|
Node::Uninit => {
|
2020-06-12 20:08:10 +08:00
|
|
|
positions[0].iter().map(|p| {
|
2020-06-14 18:51:54 +08:00
|
|
|
(Node::Init { layer: 0, position: *p, acc_proximity: 0, parent_position: 0 }, 0)
|
2020-06-12 20:08:10 +08:00
|
|
|
}).collect()
|
2020-06-12 18:53:08 +08:00
|
|
|
},
|
|
|
|
// We reached the highest layer
|
|
|
|
n @ Node::Init { .. } if n.is_complete(positions) => vec![],
|
2020-06-14 18:51:54 +08:00
|
|
|
Node::Init { layer, position, acc_proximity, .. } => {
|
2020-06-13 06:17:43 +08:00
|
|
|
positions[layer + 1].iter().filter_map(|p| {
|
2020-06-12 18:53:08 +08:00
|
|
|
let proximity = positions_proximity(*position, *p);
|
2020-06-14 18:51:54 +08:00
|
|
|
let node = Node::Init {
|
|
|
|
layer: layer + 1,
|
|
|
|
position: *p,
|
|
|
|
acc_proximity: acc_proximity + proximity,
|
|
|
|
parent_position: *position,
|
|
|
|
};
|
|
|
|
// We do not produce the nodes we have already seen in previous iterations loops.
|
2020-06-18 21:42:46 +08:00
|
|
|
if proximity > 7 || (node.is_complete(positions) && acc_proximity + proximity < best_proximity) {
|
2020-06-13 06:17:43 +08:00
|
|
|
None
|
2020-06-14 18:51:54 +08:00
|
|
|
} else {
|
|
|
|
Some((node, proximity))
|
2020-06-12 18:53:08 +08:00
|
|
|
}
|
|
|
|
}).collect()
|
|
|
|
}
|
|
|
|
}
|
2020-06-10 05:06:59 +08:00
|
|
|
}
|
2020-06-09 00:05:14 +08:00
|
|
|
|
2020-06-12 18:53:08 +08:00
|
|
|
fn is_complete(&self, positions: &[Vec<u32>]) -> bool {
|
|
|
|
match self {
|
|
|
|
Node::Uninit => false,
|
|
|
|
Node::Init { layer, .. } => *layer == positions.len() - 1,
|
|
|
|
}
|
2020-06-11 23:43:06 +08:00
|
|
|
}
|
|
|
|
|
2020-06-12 18:53:08 +08:00
|
|
|
fn position(&self) -> Option<u32> {
|
|
|
|
match self {
|
|
|
|
Node::Uninit => None,
|
|
|
|
Node::Init { position, .. } => Some(*position),
|
|
|
|
}
|
2020-06-09 00:05:14 +08:00
|
|
|
}
|
2020-06-14 18:51:54 +08:00
|
|
|
|
|
|
|
fn proximity(&self) -> u32 {
|
|
|
|
match self {
|
|
|
|
Node::Uninit => 0,
|
|
|
|
Node::Init { layer, position, acc_proximity, parent_position } => {
|
|
|
|
if layer.checked_sub(1).is_some() {
|
|
|
|
acc_proximity + positions_proximity(*position, *parent_position)
|
|
|
|
} else {
|
|
|
|
0
|
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-14 19:10:30 +08:00
|
|
|
fn is_reachable<F>(&self, contains_documents: &mut F) -> bool
|
2020-06-14 18:51:54 +08:00
|
|
|
where F: FnMut((usize, u32), (usize, u32)) -> bool,
|
|
|
|
{
|
|
|
|
match self {
|
|
|
|
Node::Uninit => true,
|
|
|
|
Node::Init { layer, position, parent_position, .. } => {
|
|
|
|
match layer.checked_sub(1) {
|
|
|
|
Some(parent_layer) => {
|
|
|
|
(contains_documents)((parent_layer, *parent_position), (*layer, *position))
|
|
|
|
},
|
|
|
|
None => true,
|
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
2020-06-09 00:05:14 +08:00
|
|
|
}
|
|
|
|
|
2020-06-13 06:17:43 +08:00
|
|
|
pub struct BestProximity<F> {
|
2020-06-09 00:05:14 +08:00
|
|
|
positions: Vec<Vec<u32>>,
|
2020-06-10 05:06:59 +08:00
|
|
|
best_proximity: u32,
|
2020-06-13 06:17:43 +08:00
|
|
|
contains_documents: F,
|
2020-06-09 00:05:14 +08:00
|
|
|
}
|
|
|
|
|
2020-06-13 06:17:43 +08:00
|
|
|
impl<F> BestProximity<F> {
|
|
|
|
pub fn new(positions: Vec<Vec<u32>>, contains_documents: F) -> BestProximity<F> {
|
2020-06-14 18:51:54 +08:00
|
|
|
let best_proximity = (positions.len() as u32).saturating_sub(1);
|
2020-06-13 17:16:02 +08:00
|
|
|
BestProximity { positions, best_proximity, contains_documents }
|
2020-06-10 05:06:59 +08:00
|
|
|
}
|
2020-06-09 00:05:14 +08:00
|
|
|
}
|
|
|
|
|
2020-06-13 06:17:43 +08:00
|
|
|
impl<F> Iterator for BestProximity<F>
|
2020-06-14 19:10:30 +08:00
|
|
|
where F: FnMut((usize, u32), (usize, u32)) -> bool,
|
2020-06-13 06:17:43 +08:00
|
|
|
{
|
2020-06-09 23:32:25 +08:00
|
|
|
type Item = (u32, Vec<Vec<u32>>);
|
2020-06-09 00:05:14 +08:00
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
2020-06-11 03:35:01 +08:00
|
|
|
let before = Instant::now();
|
|
|
|
|
2020-06-18 21:42:46 +08:00
|
|
|
if self.best_proximity == self.positions.len() as u32 * (MAX_DISTANCE - 1) {
|
2020-06-11 23:43:06 +08:00
|
|
|
return None;
|
2020-06-09 23:32:25 +08:00
|
|
|
}
|
2020-06-10 05:06:59 +08:00
|
|
|
|
2020-06-14 19:10:30 +08:00
|
|
|
let BestProximity { positions, best_proximity, contains_documents } = self;
|
|
|
|
|
2020-06-11 23:43:06 +08:00
|
|
|
let result = astar_bag(
|
2020-06-12 18:53:08 +08:00
|
|
|
&Node::Uninit, // start
|
2020-06-14 19:10:30 +08:00
|
|
|
|n| n.successors(&positions, *best_proximity),
|
2020-06-12 18:53:08 +08:00
|
|
|
|_| 0, // heuristic
|
2020-06-14 18:51:54 +08:00
|
|
|
|n| { // success
|
2020-06-14 19:10:30 +08:00
|
|
|
let c = n.is_complete(&positions) && n.proximity() >= *best_proximity;
|
|
|
|
if n.is_reachable(contains_documents) { Some(c) } else { None }
|
2020-06-14 18:51:54 +08:00
|
|
|
},
|
2020-06-11 23:43:06 +08:00
|
|
|
);
|
|
|
|
|
2020-06-11 03:35:01 +08:00
|
|
|
eprintln!("BestProximity::next() took {:.02?}", before.elapsed());
|
|
|
|
|
2020-06-11 23:43:06 +08:00
|
|
|
match result {
|
|
|
|
Some((paths, proximity)) => {
|
|
|
|
self.best_proximity = proximity + 1;
|
|
|
|
// We retrieve the last path that we convert into a Vec
|
2020-06-12 18:53:08 +08:00
|
|
|
let paths: Vec<_> = paths.map(|p| p.iter().filter_map(Node::position).collect()).collect();
|
2020-06-11 23:43:06 +08:00
|
|
|
eprintln!("result: {} {:?}", proximity, paths);
|
|
|
|
Some((proximity, paths))
|
|
|
|
},
|
|
|
|
None => {
|
|
|
|
eprintln!("result: {:?}", None as Option<()>);
|
|
|
|
self.best_proximity += 1;
|
|
|
|
None
|
|
|
|
},
|
2020-06-10 20:20:35 +08:00
|
|
|
}
|
2020-06-09 00:05:14 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn same_attribute() {
|
|
|
|
let positions = vec![
|
|
|
|
vec![0, 2, 3, 4 ],
|
|
|
|
vec![ 1, ],
|
|
|
|
vec![ 3, 6],
|
|
|
|
];
|
2020-06-13 06:17:43 +08:00
|
|
|
let mut iter = BestProximity::new(positions, |_, _| true);
|
2020-06-09 00:05:14 +08:00
|
|
|
|
2020-06-10 20:20:35 +08:00
|
|
|
assert_eq!(iter.next(), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
|
|
|
assert_eq!(iter.next(), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
|
|
|
assert_eq!(iter.next(), Some((3+2, vec![vec![3, 1, 3]]))); // 5
|
|
|
|
assert_eq!(iter.next(), Some((1+5, vec![vec![0, 1, 6], vec![4, 1, 3]]))); // 6
|
|
|
|
assert_eq!(iter.next(), Some((2+5, vec![vec![2, 1, 6]]))); // 7
|
|
|
|
assert_eq!(iter.next(), Some((3+5, vec![vec![3, 1, 6]]))); // 8
|
|
|
|
assert_eq!(iter.next(), Some((4+5, vec![vec![4, 1, 6]]))); // 9
|
|
|
|
assert_eq!(iter.next(), None);
|
2020-06-09 00:05:14 +08:00
|
|
|
}
|
2020-06-10 22:27:02 +08:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn different_attributes() {
|
|
|
|
let positions = vec![
|
|
|
|
vec![0, 2, 1000, 1001, 2000 ],
|
|
|
|
vec![ 1, 1000, 2001 ],
|
|
|
|
vec![ 3, 6, 2002, 3000],
|
|
|
|
];
|
2020-06-13 06:17:43 +08:00
|
|
|
let mut iter = BestProximity::new(positions, |_, _| true);
|
2020-06-10 22:27:02 +08:00
|
|
|
|
|
|
|
assert_eq!(iter.next(), Some((1+1, vec![vec![2000, 2001, 2002]]))); // 2
|
|
|
|
assert_eq!(iter.next(), Some((1+2, vec![vec![0, 1, 3]]))); // 3
|
|
|
|
assert_eq!(iter.next(), Some((2+2, vec![vec![2, 1, 3]]))); // 4
|
|
|
|
assert_eq!(iter.next(), Some((1+5, vec![vec![0, 1, 6]]))); // 6
|
|
|
|
// We ignore others here...
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn easy_proximities() {
|
|
|
|
fn slice_proximity(positions: &[u32]) -> u32 {
|
|
|
|
positions.windows(2).map(|ps| positions_proximity(ps[0], ps[1])).sum::<u32>()
|
|
|
|
}
|
|
|
|
|
|
|
|
assert_eq!(slice_proximity(&[1000, 1000, 2002]), 8);
|
|
|
|
}
|
2020-06-09 00:05:14 +08:00
|
|
|
}
|