From f9c149223dc36b062cbe62acbcf5c901deab9232 Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 23 Feb 2026 14:31:15 -0800 Subject: [PATCH 1/3] [Bugfix] NeighborPriorityQueue panics, when it is at capacity and NaN distance is inserted --- diskann-providers/src/index/diskann_async.rs | 93 ++++++++++ diskann/src/neighbor/queue.rs | 170 +++++++++++++++++++ 2 files changed, 263 insertions(+) diff --git a/diskann-providers/src/index/diskann_async.rs b/diskann-providers/src/index/diskann_async.rs index a7472a6ce..1ede4edba 100644 --- a/diskann-providers/src/index/diskann_async.rs +++ b/diskann-providers/src/index/diskann_async.rs @@ -4381,4 +4381,97 @@ pub(crate) mod tests { filter.visit_count() ); } + + #[tokio::test] + async fn vectors_with_infinity_values_should_be_inserted_and_searched_without_panic() { + let l_build: usize = 20; + // We need to insert more points than l_build to ensure that the priority queue gets filled in. + let insert_count = l_build + 10; + const VECTORS_DIMENSION: usize = 384; + // Last third of inserted vectors will have infinity values. + let vector_value_start: f32 = half::f16::MAX.to_f32() - insert_count as f32 / 3.0; + + let (config, mut parameters) = simplified_builder( + l_build, + 32, + Metric::L2, + VECTORS_DIMENSION, + insert_count, + |_| {}, + ) + .unwrap(); + + parameters.frozen_points = NonZeroUsize::new(1).unwrap(); + let index = new_index::(config, parameters, NoDeletes).unwrap(); + + let vectors = (0..insert_count) + .map(move |i| [half::f16::from_f32(vector_value_start + i as f32); VECTORS_DIMENSION]) + .collect::>(); + + assert_ne!( + vectors[0][0], + half::f16::INFINITY, + "First vector should not have infinity value" + ); + assert_eq!( + vectors[vectors.len() - 1][0], + half::f16::INFINITY, + "Last vector should have infinity value" + ); + + for (i, vector) in vectors.iter().take(insert_count).enumerate() { + let vector_id = i as u32; + index + .insert(FullPrecision, &DefaultContext, &vector_id, vector) + .await + .unwrap(); + } + + let query_count: usize = 1; + let mut queries = crate::common::AlignedBoxWithSlice::::new( + query_count * VECTORS_DIMENSION, + 32, + ) + .unwrap(); + + for i in 0..query_count { + for val in queries.as_mut_slice()[i * VECTORS_DIMENSION..(i + 1) * VECTORS_DIMENSION] + .iter_mut() + { + *val = half::f16::from_f32(0f32); + } + } + + let top_k = l_build; + let search_l = l_build; + let mut ids = vec![0; top_k]; + let mut distances = vec![0.0; top_k]; + let ctx = DefaultContext; + let search_params = graph::search::Knn::new_default(top_k, search_l).unwrap(); + for i in 0..query_count { + let query_vector = + &queries.as_slice()[i * VECTORS_DIMENSION..(i + 1) * VECTORS_DIMENSION]; + + let mut result_output_buffer = + search_output_buffer::IdDistance::new(&mut ids, &mut distances); + + // Full Precision Search. + let search_result = index + .search( + search_params, + &FullPrecision, + &ctx, + query_vector, + &mut result_output_buffer, + ) + .await + .unwrap(); + + assert!( + search_result.result_count > 0, + "Expected non-empty result for query {}", + i + ); + } + } } diff --git a/diskann/src/neighbor/queue.rs b/diskann/src/neighbor/queue.rs index f22088c95..9c1b4d1f5 100644 --- a/diskann/src/neighbor/queue.rs +++ b/diskann/src/neighbor/queue.rs @@ -148,6 +148,14 @@ impl NeighborPriorityQueue { }; if self.size == self.capacity { + if insert_idx == self.capacity { + // When insert_idx equals capacity, the value being inserted is not comparable + // with existing values (neither larger nor smaller). This typically happens + // when inserting a NaN distance. In this case, we ignore the value since the + // queue is full and we can't determine where it belongs in the sorted order. + return; + } + self.id_visiteds.truncate(self.size - 1); self.distances.truncate(self.size - 1); self.size -= 1; @@ -1264,6 +1272,168 @@ mod neighbor_priority_queue_test { assert_eq!(queue.get(3).id, 5); // 2.0 } + #[test] + fn test_insert_with_increasing_distances() { + let mut queue = NeighborPriorityQueue::new(5); + + assert_eq!(queue.size(), 0); + assert_eq!(queue.capacity(), 5); + + for id in 0..2 { + queue.insert(Neighbor::new(id, id as f32)); + } + + assert_eq!(queue.size(), 2); + assert_eq!(queue.capacity(), 5); + + for id in 2..10 { + queue.insert(Neighbor::new(id, id as f32)); + } + + assert_eq!(queue.size(), 5); + assert_eq!(queue.capacity(), 5); + + assert_eq!( + queue.get(0).id, + 0, + "The smallest distance should be from id 0" + ); + assert_eq!( + queue.get(queue.size() - 1).id, + 4, + "The largest distance should be from id 4" + ); + } + + #[test] + fn test_insert_with_decreasing_distances() { + let mut queue = NeighborPriorityQueue::new(5); + + assert_eq!(queue.size(), 0); + assert_eq!(queue.capacity(), 5); + + for id in 0..2 { + queue.insert(Neighbor::new(id, -id as f32)); + } + + assert_eq!(queue.size(), 2); + assert_eq!(queue.capacity(), 5); + + for id in 2..10 { + queue.insert(Neighbor::new(id, -id as f32)); + } + + assert_eq!(queue.size(), 5); + assert_eq!(queue.capacity(), 5); + + assert_eq!( + queue.get(0).id, + 9, + "The smallest distance should be from id 9" + ); + assert_eq!( + queue.get(queue.size() - 1).id, + 5, + "The largest distance should be from id 5" + ); + } + + #[test] + fn test_insert_with_infinity_distance() { + let mut queue = NeighborPriorityQueue::new(5); + + assert_eq!(queue.size(), 0); + assert_eq!(queue.capacity(), 5); + + for id in 0..2 { + queue.insert(Neighbor::new(id, f32::INFINITY)); + } + + assert_eq!(queue.size(), 2); + assert_eq!(queue.capacity(), 5); + + for id in 2..10 { + queue.insert(Neighbor::new(id, f32::INFINITY)); + } + + assert_eq!(queue.size(), 5); + assert_eq!(queue.capacity(), 5); + + assert!(queue.get(0).id >= 0, "First element should be retrievable"); + } + + #[test] + fn test_insert_with_nan_distance() { + let mut queue = NeighborPriorityQueue::new(5); + + assert_eq!(queue.size(), 0); + assert_eq!(queue.capacity(), 5); + + for id in 0..2 { + queue.insert(Neighbor::new(id, f32::NAN)); + } + + assert_eq!(queue.size(), 2); + assert_eq!(queue.capacity(), 5); + + for id in 2..10 { + queue.insert(Neighbor::new(id, f32::NAN)); + } + + assert_eq!(queue.size(), 5); + assert_eq!(queue.capacity(), 5); + + assert!(queue.get(0).id >= 0, "First element should be retrievable"); + } + + #[test] + fn test_insert_with_infinity_distance_then_with_nan_distance() { + let mut queue = NeighborPriorityQueue::new(5); + + assert_eq!(queue.size(), 0); + assert_eq!(queue.capacity(), 5); + + for id in 0..2 { + queue.insert(Neighbor::new(id, f32::INFINITY)); + } + + assert_eq!(queue.size(), 2); + assert_eq!(queue.capacity(), 5); + + for id in 2..10 { + queue.insert(Neighbor::new(id, f32::NAN)); + } + + assert_eq!(queue.size(), 5); + assert_eq!(queue.capacity(), 5); + + assert!(queue.get(0).id >= 0, "First element should be retrievable"); + } + + #[test] + fn test_insert_with_nan_distance_then_with_infinity_distance() { + let mut queue = NeighborPriorityQueue::new(5); + + assert_eq!(queue.size(), 0); + assert_eq!(queue.capacity(), 5); + + for id in 0..2 { + queue.insert(Neighbor::new(id, f32::NAN)); + } + + assert_eq!(queue.size(), 2); + assert_eq!(queue.capacity(), 5); + + for id in 2..10 { + queue.insert(Neighbor::new(id, f32::INFINITY)); + } + + assert_eq!(queue.size(), 5); + assert_eq!(queue.capacity(), 5); + + assert!(queue.get(0).id >= 0, "First element should be retrievable"); + } + #[test] #[cfg(feature = "experimental_diversity_search")] fn test_retain() { From ab34711d45e8788286a063c7c5fb7ffc82ba299e Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 23 Feb 2026 17:26:34 -0800 Subject: [PATCH 2/3] Detect NaN in the queue --- diskann/src/neighbor/queue.rs | 69 ++++++++++++++++++++++++++++++++++- 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/diskann/src/neighbor/queue.rs b/diskann/src/neighbor/queue.rs index 9c1b4d1f5..c6dc877b9 100644 --- a/diskann/src/neighbor/queue.rs +++ b/diskann/src/neighbor/queue.rs @@ -303,7 +303,14 @@ impl NeighborPriorityQueue { } } - self.size + // If we reach here, none of the existing neighbors has a distance >= the target. + // This might mean that we’re among trailing NaNs as well; find the first NaN neighbor if any. + let mut index = self.size; + while index > 0 && self.get_unchecked(index - 1).distance.is_nan() { + index -= 1; + } + + index } /// Get the neighbor at index - SAFETY: index must be less than size @@ -1386,6 +1393,16 @@ mod neighbor_priority_queue_test { assert!(queue.get(0).id >= 0, "First element should be retrievable"); } + #[test] + fn test_insert_single_neighbor_with_nan_distance() { + let mut queue = NeighborPriorityQueue::new(5); + + assert_eq!(queue.size(), 0); + assert_eq!(queue.capacity(), 5); + queue.insert(Neighbor::new(999, f32::NAN)); + assert_eq!(queue.get(0).id, 999); + } + #[test] fn test_insert_with_infinity_distance_then_with_nan_distance() { let mut queue = NeighborPriorityQueue::new(5); @@ -1410,6 +1427,56 @@ mod neighbor_priority_queue_test { assert!(queue.get(0).id >= 0, "First element should be retrievable"); } + #[test] + fn test_normal_distances_should_push_nan_distances_away_from_queue() { + let mut queue = NeighborPriorityQueue::new(5); + + assert_eq!(queue.size(), 0); + assert_eq!(queue.capacity(), 5); + + for id in 0..5 { + queue.insert(Neighbor::new(id, f32::NAN)); + } + + assert_eq!(queue.size(), 5); + assert_eq!(queue.capacity(), 5); + + assert!(queue.get(0).id >= 0, "First element should be retrievable"); + + for id in 5..9 { + queue.insert(Neighbor::new(id, id as f32)); + } + + assert_eq!(queue.size(), 5); + assert_eq!(queue.capacity(), 5); + + assert_eq!( + queue.get(0).id, + 5, + "The closest element should be id 5 with distance 5.0" + ); + assert_eq!( + queue.get(1).id, + 6, + "The closest element should be id 6 with distance 6.0" + ); + assert_eq!( + queue.get(2).id, + 7, + "The closest element should be id 7 with distance 7.0" + ); + assert_eq!( + queue.get(3).id, + 8, + "The closest element should be id 8 with distance 8.0" + ); + assert_eq!( + queue.get(4).id, + 4, + "The farthest element should be id 4 with distance NAN" + ); + } + #[test] fn test_insert_with_nan_distance_then_with_infinity_distance() { let mut queue = NeighborPriorityQueue::new(5); From 9d8bbcbd0fd319b542bd673f9f19a0ddfa70511f Mon Sep 17 00:00:00 2001 From: "Alex Razumov (from Dev Box)" Date: Mon, 23 Feb 2026 18:19:34 -0800 Subject: [PATCH 3/3] Ignore NaN neighbors --- diskann/src/neighbor/queue.rs | 208 ++++------------------------------ 1 file changed, 21 insertions(+), 187 deletions(-) diff --git a/diskann/src/neighbor/queue.rs b/diskann/src/neighbor/queue.rs index c6dc877b9..47b59fe49 100644 --- a/diskann/src/neighbor/queue.rs +++ b/diskann/src/neighbor/queue.rs @@ -131,6 +131,11 @@ impl NeighborPriorityQueue { /// Due to the performance sensitiveness of this function - we don't check for uniqueness of the item. /// Inserting the same item twice will cause undefined behavior. pub fn insert(&mut self, nbr: Neighbor) { + if nbr.distance.is_nan() { + // We don't support NaN distances. If we see one, we ignore the insert since we can't determine where it belongs in the sorted order. + return; + } + self.dbgassert_unique_insert(nbr.id); if self.auto_resizable { @@ -148,14 +153,6 @@ impl NeighborPriorityQueue { }; if self.size == self.capacity { - if insert_idx == self.capacity { - // When insert_idx equals capacity, the value being inserted is not comparable - // with existing values (neither larger nor smaller). This typically happens - // when inserting a NaN distance. In this case, we ignore the value since the - // queue is full and we can't determine where it belongs in the sorted order. - return; - } - self.id_visiteds.truncate(self.size - 1); self.distances.truncate(self.size - 1); self.size -= 1; @@ -303,14 +300,7 @@ impl NeighborPriorityQueue { } } - // If we reach here, none of the existing neighbors has a distance >= the target. - // This might mean that we’re among trailing NaNs as well; find the first NaN neighbor if any. - let mut index = self.size; - while index > 0 && self.get_unchecked(index - 1).distance.is_nan() { - index -= 1; - } - - index + self.size } /// Get the neighbor at index - SAFETY: index must be less than size @@ -1280,73 +1270,7 @@ mod neighbor_priority_queue_test { } #[test] - fn test_insert_with_increasing_distances() { - let mut queue = NeighborPriorityQueue::new(5); - - assert_eq!(queue.size(), 0); - assert_eq!(queue.capacity(), 5); - - for id in 0..2 { - queue.insert(Neighbor::new(id, id as f32)); - } - - assert_eq!(queue.size(), 2); - assert_eq!(queue.capacity(), 5); - - for id in 2..10 { - queue.insert(Neighbor::new(id, id as f32)); - } - - assert_eq!(queue.size(), 5); - assert_eq!(queue.capacity(), 5); - - assert_eq!( - queue.get(0).id, - 0, - "The smallest distance should be from id 0" - ); - assert_eq!( - queue.get(queue.size() - 1).id, - 4, - "The largest distance should be from id 4" - ); - } - - #[test] - fn test_insert_with_decreasing_distances() { - let mut queue = NeighborPriorityQueue::new(5); - - assert_eq!(queue.size(), 0); - assert_eq!(queue.capacity(), 5); - - for id in 0..2 { - queue.insert(Neighbor::new(id, -id as f32)); - } - - assert_eq!(queue.size(), 2); - assert_eq!(queue.capacity(), 5); - - for id in 2..10 { - queue.insert(Neighbor::new(id, -id as f32)); - } - - assert_eq!(queue.size(), 5); - assert_eq!(queue.capacity(), 5); - - assert_eq!( - queue.get(0).id, - 9, - "The smallest distance should be from id 9" - ); - assert_eq!( - queue.get(queue.size() - 1).id, - 5, - "The largest distance should be from id 5" - ); - } - - #[test] - fn test_insert_with_infinity_distance() { + fn test_insert_neighbors_with_infinity_distance() { let mut queue = NeighborPriorityQueue::new(5); assert_eq!(queue.size(), 0); @@ -1370,135 +1294,45 @@ mod neighbor_priority_queue_test { } #[test] - fn test_insert_with_nan_distance() { + fn test_normal_distances_should_push_infinity_distances_away_from_queue() { let mut queue = NeighborPriorityQueue::new(5); assert_eq!(queue.size(), 0); assert_eq!(queue.capacity(), 5); - for id in 0..2 { - queue.insert(Neighbor::new(id, f32::NAN)); - } - - assert_eq!(queue.size(), 2); - assert_eq!(queue.capacity(), 5); - - for id in 2..10 { - queue.insert(Neighbor::new(id, f32::NAN)); - } - - assert_eq!(queue.size(), 5); - assert_eq!(queue.capacity(), 5); - - assert!(queue.get(0).id >= 0, "First element should be retrievable"); - } - - #[test] - fn test_insert_single_neighbor_with_nan_distance() { - let mut queue = NeighborPriorityQueue::new(5); - - assert_eq!(queue.size(), 0); - assert_eq!(queue.capacity(), 5); - queue.insert(Neighbor::new(999, f32::NAN)); - assert_eq!(queue.get(0).id, 999); - } - - #[test] - fn test_insert_with_infinity_distance_then_with_nan_distance() { - let mut queue = NeighborPriorityQueue::new(5); - - assert_eq!(queue.size(), 0); - assert_eq!(queue.capacity(), 5); - - for id in 0..2 { + for id in 0..=4 { queue.insert(Neighbor::new(id, f32::INFINITY)); } - assert_eq!(queue.size(), 2); - assert_eq!(queue.capacity(), 5); - - for id in 2..10 { - queue.insert(Neighbor::new(id, f32::NAN)); - } - assert_eq!(queue.size(), 5); assert_eq!(queue.capacity(), 5); assert!(queue.get(0).id >= 0, "First element should be retrievable"); - } - - #[test] - fn test_normal_distances_should_push_nan_distances_away_from_queue() { - let mut queue = NeighborPriorityQueue::new(5); - - assert_eq!(queue.size(), 0); - assert_eq!(queue.capacity(), 5); - for id in 0..5 { - queue.insert(Neighbor::new(id, f32::NAN)); - } - - assert_eq!(queue.size(), 5); - assert_eq!(queue.capacity(), 5); - - assert!(queue.get(0).id >= 0, "First element should be retrievable"); - - for id in 5..9 { + for id in 5..=7 { queue.insert(Neighbor::new(id, id as f32)); } assert_eq!(queue.size(), 5); assert_eq!(queue.capacity(), 5); - assert_eq!( - queue.get(0).id, - 5, - "The closest element should be id 5 with distance 5.0" - ); - assert_eq!( - queue.get(1).id, - 6, - "The closest element should be id 6 with distance 6.0" - ); - assert_eq!( - queue.get(2).id, - 7, - "The closest element should be id 7 with distance 7.0" - ); - assert_eq!( - queue.get(3).id, - 8, - "The closest element should be id 8 with distance 8.0" - ); - assert_eq!( - queue.get(4).id, - 4, - "The farthest element should be id 4 with distance NAN" - ); + // The normal distance neighbors should be at the front of the queue + assert_eq!(queue.get(0).id, 5); + assert_eq!(queue.get(1).id, 6); + assert_eq!(queue.get(2).id, 7); + + // The infinity distance neighbors should be pushed to the end of the queue + assert_eq!(queue.get(3).id, 4); + assert_eq!(queue.get(4).id, 3); } #[test] - fn test_insert_with_nan_distance_then_with_infinity_distance() { + fn test_insert_neighbor_with_nan_distance_is_ignored() { let mut queue = NeighborPriorityQueue::new(5); assert_eq!(queue.size(), 0); - assert_eq!(queue.capacity(), 5); - - for id in 0..2 { - queue.insert(Neighbor::new(id, f32::NAN)); - } - - assert_eq!(queue.size(), 2); - assert_eq!(queue.capacity(), 5); - - for id in 2..10 { - queue.insert(Neighbor::new(id, f32::INFINITY)); - } - - assert_eq!(queue.size(), 5); - assert_eq!(queue.capacity(), 5); - - assert!(queue.get(0).id >= 0, "First element should be retrievable"); + queue.insert(Neighbor::new(0, f32::NAN)); + assert_eq!(queue.size(), 0); } #[test]