diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 978dd04d83ca..2709a5f199c8 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -149,6 +149,8 @@ Optimizations * GITHUB#15597, GITHUB#15777: Reduce memory usage of NeighborArray (Viliam Durina) +* GITHUB#15606: Utilize bulk scoring for NeighborArray#isWorstNonDiverse (Luis Negrin) + Bug Fixes --------------------- * GITHUB#14049: Randomize KNN codec params in RandomCodec. Fixes scalar quantization div-by-zero diff --git a/lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java b/lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java index 3ddba160b44e..41ff20ac105f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java +++ b/lucene/core/src/java/org/apache/lucene/util/hnsw/NeighborArray.java @@ -292,13 +292,19 @@ private int findWorstNonDiverse(UpdateableRandomVectorScorer scorer) throws IOEx int[] uncheckedIndexes = sort(scorer); assert uncheckedIndexes != null : "We will always have something unchecked"; int uncheckedCursor = uncheckedIndexes.length - 1; + int[] uncheckedNodes = new int[uncheckedIndexes.length]; + for (int i = uncheckedCursor; i >= 0; i--) { + uncheckedNodes[i] = nodes[uncheckedIndexes[i]]; + } + float[] bulkScores = new float[size]; for (int i = size - 1; i > 0; i--) { if (uncheckedCursor < 0) { // no unchecked node left break; } scorer.setScoringOrdinal(nodes[i]); - if (isWorstNonDiverse(i, uncheckedIndexes, uncheckedCursor, scorer)) { + if (isWorstNonDiverse( + i, uncheckedIndexes, uncheckedCursor, scorer, uncheckedNodes, bulkScores)) { return i; } if (i == uncheckedIndexes[uncheckedCursor]) { @@ -309,31 +315,23 @@ private int findWorstNonDiverse(UpdateableRandomVectorScorer scorer) throws IOEx } private boolean isWorstNonDiverse( - int candidateIndex, int[] uncheckedIndexes, int uncheckedCursor, RandomVectorScorer scorer) + int candidateIndex, + int[] uncheckedIndexes, + int uncheckedCursor, + RandomVectorScorer scorer, + int[] uncheckedNodes, + float[] bulkScores) throws IOException { float minAcceptedSimilarity = scores[candidateIndex]; if (candidateIndex == uncheckedIndexes[uncheckedCursor]) { // the candidate itself is unchecked - for (int i = candidateIndex - 1; i >= 0; i--) { - float neighborSimilarity = scorer.score(nodes[i]); - // candidate node is too similar to node i given its score relative to the base node - if (neighborSimilarity >= minAcceptedSimilarity) { - return true; - } - } - } else { - // else we just need to make sure candidate does not violate diversity with the (newly - // inserted) unchecked nodes - assert candidateIndex > uncheckedIndexes[uncheckedCursor]; - for (int i = uncheckedCursor; i >= 0; i--) { - float neighborSimilarity = scorer.score(nodes[uncheckedIndexes[i]]); - // candidate node is too similar to node i given its score relative to the base node - if (neighborSimilarity >= minAcceptedSimilarity) { - return true; - } - } + return scorer.bulkScore(nodes, bulkScores, candidateIndex) >= minAcceptedSimilarity; } - return false; + // else we just need to make sure candidate does not violate diversity with the (newly + // inserted) unchecked nodes + assert candidateIndex > uncheckedIndexes[uncheckedCursor]; + return scorer.bulkScore(uncheckedNodes, bulkScores, uncheckedCursor + 1) + >= minAcceptedSimilarity; } public int maxSize() {