Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions diskann-providers/src/index/diskann_async.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4381,4 +4381,97 @@ pub(crate) mod tests {
filter.visit_count()
);
}

#[tokio::test]
async fn vectors_with_infinity_values_should_be_inserted_and_searched_without_panic() {
let l_build: usize = 20;
// We need to insert more points than l_build to ensure that the priority queue gets filled in.
let insert_count = l_build + 10;
const VECTORS_DIMENSION: usize = 384;
// Last third of inserted vectors will have infinity values.
let vector_value_start: f32 = half::f16::MAX.to_f32() - insert_count as f32 / 3.0;

let (config, mut parameters) = simplified_builder(
l_build,
32,
Metric::L2,
VECTORS_DIMENSION,
insert_count,
|_| {},
)
.unwrap();

parameters.frozen_points = NonZeroUsize::new(1).unwrap();
let index = new_index::<half::f16, _>(config, parameters, NoDeletes).unwrap();

let vectors = (0..insert_count)
.map(move |i| [half::f16::from_f32(vector_value_start + i as f32); VECTORS_DIMENSION])
.collect::<Vec<_>>();

assert_ne!(
vectors[0][0],
half::f16::INFINITY,
"First vector should not have infinity value"
);
assert_eq!(
vectors[vectors.len() - 1][0],
half::f16::INFINITY,
"Last vector should have infinity value"
);

for (i, vector) in vectors.iter().take(insert_count).enumerate() {
let vector_id = i as u32;
index
.insert(FullPrecision, &DefaultContext, &vector_id, vector)
.await
.unwrap();
}

let query_count: usize = 1;
let mut queries = crate::common::AlignedBoxWithSlice::<half::f16>::new(
query_count * VECTORS_DIMENSION,
32,
)
.unwrap();

for i in 0..query_count {
for val in queries.as_mut_slice()[i * VECTORS_DIMENSION..(i + 1) * VECTORS_DIMENSION]
.iter_mut()
{
*val = half::f16::from_f32(0f32);
}
}

let top_k = l_build;
let search_l = l_build;
let mut ids = vec![0; top_k];
let mut distances = vec![0.0; top_k];
let ctx = DefaultContext;
let search_params = graph::search::Knn::new_default(top_k, search_l).unwrap();
for i in 0..query_count {
let query_vector =
&queries.as_slice()[i * VECTORS_DIMENSION..(i + 1) * VECTORS_DIMENSION];

let mut result_output_buffer =
search_output_buffer::IdDistance::new(&mut ids, &mut distances);

// Full Precision Search.
let search_result = index
.search(
search_params,
&FullPrecision,
&ctx,
query_vector,
&mut result_output_buffer,
)
.await
.unwrap();

assert!(
search_result.result_count > 0,
"Expected non-empty result for query {}",
i
);
}
}
}
71 changes: 71 additions & 0 deletions diskann/src/neighbor/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,11 @@ impl<I: NeighborPriorityQueueIdType> NeighborPriorityQueue<I> {
/// Due to the performance sensitiveness of this function - we don't check for uniqueness of the item.
/// Inserting the same item twice will cause undefined behavior.
pub fn insert(&mut self, nbr: Neighbor<I>) {
if nbr.distance.is_nan() {
// We don't support NaN distances. If we see one, we ignore the insert since we can't determine where it belongs in the sorted order.
return;
}

self.dbgassert_unique_insert(nbr.id);

if self.auto_resizable {
Expand Down Expand Up @@ -1264,6 +1269,72 @@ mod neighbor_priority_queue_test {
assert_eq!(queue.get(3).id, 5); // 2.0
}

#[test]
fn test_insert_neighbors_with_infinity_distance() {
let mut queue = NeighborPriorityQueue::new(5);

assert_eq!(queue.size(), 0);
assert_eq!(queue.capacity(), 5);

for id in 0..2 {
queue.insert(Neighbor::new(id, f32::INFINITY));
}

assert_eq!(queue.size(), 2);
assert_eq!(queue.capacity(), 5);

for id in 2..10 {
queue.insert(Neighbor::new(id, f32::INFINITY));
}

assert_eq!(queue.size(), 5);
assert_eq!(queue.capacity(), 5);

assert!(queue.get(0).id >= 0, "First element should be retrievable");
}

#[test]
fn test_normal_distances_should_push_infinity_distances_away_from_queue() {
let mut queue = NeighborPriorityQueue::new(5);

assert_eq!(queue.size(), 0);
assert_eq!(queue.capacity(), 5);

for id in 0..=4 {
queue.insert(Neighbor::new(id, f32::INFINITY));
}

assert_eq!(queue.size(), 5);
assert_eq!(queue.capacity(), 5);

assert!(queue.get(0).id >= 0, "First element should be retrievable");

for id in 5..=7 {
queue.insert(Neighbor::new(id, id as f32));
}

assert_eq!(queue.size(), 5);
assert_eq!(queue.capacity(), 5);

// The normal distance neighbors should be at the front of the queue
assert_eq!(queue.get(0).id, 5);
assert_eq!(queue.get(1).id, 6);
assert_eq!(queue.get(2).id, 7);

// The infinity distance neighbors should be pushed to the end of the queue
assert_eq!(queue.get(3).id, 4);
assert_eq!(queue.get(4).id, 3);
}

#[test]
fn test_insert_neighbor_with_nan_distance_is_ignored() {
let mut queue = NeighborPriorityQueue::new(5);

assert_eq!(queue.size(), 0);
queue.insert(Neighbor::new(0, f32::NAN));
assert_eq!(queue.size(), 0);
}

#[test]
#[cfg(feature = "experimental_diversity_search")]
fn test_retain() {
Expand Down
Loading