diff --git a/packages/ai/src/task/VectorQuantizeTask.ts b/packages/ai/src/task/VectorQuantizeTask.ts index 5fb5bf0c0..ca3726200 100644 --- a/packages/ai/src/task/VectorQuantizeTask.ts +++ b/packages/ai/src/task/VectorQuantizeTask.ts @@ -10,11 +10,19 @@ import { FromSchema, normalizeNumberArray, TensorType, + turboQuantizeToTypedArray, TypedArray, TypedArraySchema, TypedArraySchemaOptions, } from "@workglow/util/schema"; +export const QuantizationMethod = { + LINEAR: "linear", + TURBO: "turbo", +} as const; + +export type QuantizationMethod = (typeof QuantizationMethod)[keyof typeof QuantizationMethod]; + const inputSchema = { type: "object", properties: { @@ -48,6 +56,21 @@ const inputSchema = { description: "Normalize vector before quantization", default: true, }, + method: { + type: "string", + enum: Object.values(QuantizationMethod), + title: "Method", + description: + "Quantization method: 'linear' for simple min-max scaling, 'turbo' for TurboQuant (randomized rotation + optimal scalar quantization, better distortion than linear at the same bit width). Turbo requires an integer targetType (int8, uint8, int16, uint16).", + default: QuantizationMethod.LINEAR, + }, + turboSeed: { + type: "integer", + title: "TurboQuant Seed", + description: + "Seed for the random rotation in TurboQuant. All vectors in the same collection must use the same seed for similarity search to work.", + default: 42, + }, }, required: ["vector", "targetType"], additionalProperties: false, @@ -117,12 +140,24 @@ export class VectorQuantizeTask extends Task< } override async executeReactive(input: VectorQuantizeTaskInput): Promise { - const { vector, targetType, normalize = true } = input; + const { + vector, + targetType, + normalize = true, + method = QuantizationMethod.LINEAR, + turboSeed = 42, + } = input; const isArray = Array.isArray(vector); const vectors = isArray ? vector : [vector]; const originalType = this.getVectorType(vectors[0]); - const quantized = vectors.map((v) => this.vectorQuantize(v, targetType, normalize)); + let quantized: TypedArray[]; + + if (method === QuantizationMethod.TURBO) { + quantized = vectors.map((v) => turboQuantizeToTypedArray(v, targetType, turboSeed)); + } else { + quantized = vectors.map((v) => this.vectorQuantize(v, targetType, normalize)); + } return { vector: isArray ? quantized : quantized[0], diff --git a/packages/test/src/test/rag/VectorQuantizeTask.test.ts b/packages/test/src/test/rag/VectorQuantizeTask.test.ts index 7683454c3..babb6319d 100644 --- a/packages/test/src/test/rag/VectorQuantizeTask.test.ts +++ b/packages/test/src/test/rag/VectorQuantizeTask.test.ts @@ -229,4 +229,68 @@ describe("VectorQuantizeTask", () => { expect(result).toBeDefined(); expect(result.vector).toBeInstanceOf(Int8Array); }); + + describe("turbo method", () => { + test("should return target TypedArray type directly", async () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + + const result = await vectorQuantize({ + vector, + targetType: TensorType.INT8, + method: "turbo", + turboSeed: 42, + }); + + expect(result).toBeDefined(); + expect(result.vector).toBeInstanceOf(Int8Array); + expect(result.targetType).toBe(TensorType.INT8); + expect(result.originalType).toBe(TensorType.FLOAT32); + expect((result.vector as Int8Array).length).toBe(vector.length); + }); + + test("should be deterministic for a fixed seed", async () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + + const r1 = await vectorQuantize({ + vector, + targetType: TensorType.INT8, + method: "turbo", + turboSeed: 99, + }); + + const r2 = await vectorQuantize({ + vector, + targetType: TensorType.INT8, + method: "turbo", + turboSeed: 99, + }); + + const v1 = r1.vector as Int8Array; + const v2 = r2.vector as Int8Array; + expect(v1.length).toBe(v2.length); + for (let i = 0; i < v1.length; i++) { + expect(v1[i]).toBe(v2[i]); + } + }); + + test("should handle array of vectors with turbo method", async () => { + const vectors = [ + new Float32Array([1, 2, 3, 4]), + new Float32Array([5, 6, 7, 8]), + ]; + + const result = await vectorQuantize({ + vector: vectors, + targetType: TensorType.INT8, + method: "turbo", + turboSeed: 42, + }); + + expect(Array.isArray(result.vector)).toBe(true); + const out = result.vector as Int8Array[]; + expect(out.length).toBe(2); + out.forEach((v) => expect(v).toBeInstanceOf(Int8Array)); + expect(result.targetType).toBe(TensorType.INT8); + }); + }); }); diff --git a/packages/test/src/test/util/TurboQuantize.test.ts b/packages/test/src/test/util/TurboQuantize.test.ts new file mode 100644 index 000000000..483513979 --- /dev/null +++ b/packages/test/src/test/util/TurboQuantize.test.ts @@ -0,0 +1,470 @@ +/** + * @license + * Copyright 2025 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +import { setLogger } from "@workglow/util"; +import { + turboQuantize, + turboDequantize, + turboQuantizeToTypedArray, + turboQuantizedInnerProduct, + turboQuantizedCosineSimilarity, + turboQuantizeStorageBytes, + turboQuantizeCompressionRatio, + TensorType, + cosineSimilarity, + inner, + magnitude, +} from "@workglow/util/schema"; +import { describe, expect, test } from "vitest"; +import { getTestingLogger } from "../../binding/TestingLogger"; + +describe("TurboQuantize", () => { + let logger = getTestingLogger(); + setLogger(logger); + + describe("turboQuantize", () => { + test("should quantize a Float32Array vector", () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const result = turboQuantize(vector, { bits: 4, seed: 42 }); + + expect(result.bits).toBe(4); + expect(result.dimensions).toBe(8); + expect(result.seed).toBe(42); + expect(result.norm).toBeCloseTo(magnitude(vector), 5); + expect(result.codes).toBeInstanceOf(Uint8Array); + }); + + test("should quantize with default options", () => { + const vector = new Float32Array([1, 2, 3, 4]); + const result = turboQuantize(vector, undefined); + + expect(result.bits).toBe(4); + expect(result.seed).toBe(42); + expect(result.dimensions).toBe(4); + }); + + test("should produce compact storage at low bit widths", () => { + const vector = new Float32Array(768); // typical embedding dimension + for (let i = 0; i < 768; i++) vector[i] = Math.sin(i * 0.1); + + const result4bit = turboQuantize(vector, { bits: 4, seed: 42 }); + const result2bit = turboQuantize(vector, { bits: 2, seed: 42 }); + + // 768 pads to 1024 (next power of 2): + // 4-bit: 1024 * 4 / 8 = 512 bytes + expect(result4bit.codes.length).toBe(512); + expect(result4bit.paddedDimensions).toBe(1024); + // 2-bit: 1024 * 2 / 8 = 256 bytes + expect(result2bit.codes.length).toBe(256); + expect(result2bit.paddedDimensions).toBe(1024); + }); + + test("should reject invalid bit widths", () => { + const vector = new Float32Array([1, 2, 3, 4]); + expect(() => turboQuantize(vector, { bits: 0, seed: 42 })).toThrow(); + expect(() => turboQuantize(vector, { bits: 9, seed: 42 })).toThrow(); + expect(() => turboQuantize(vector, { bits: 3.5, seed: 42 })).toThrow(); + }); + + test("should reject empty vectors", () => { + const vector = new Float32Array(0); + expect(() => turboQuantize(vector, { bits: 4, seed: 42 })).toThrow(); + }); + + test("should handle zero vectors", () => { + const vector = new Float32Array([0, 0, 0, 0]); + const result = turboQuantize(vector, { bits: 4, seed: 42 }); + expect(result.norm).toBe(0); + }); + + test("should support different TypedArray inputs", () => { + const values = [1, 2, 3, 4, 5, 6, 7, 8]; + const f32 = turboQuantize(new Float32Array(values), { bits: 4, seed: 42 }); + const f64 = turboQuantize(new Float64Array(values), { bits: 4, seed: 42 }); + const i8 = turboQuantize(new Int8Array(values), { bits: 4, seed: 42 }); + + expect(f32.dimensions).toBe(8); + expect(f64.dimensions).toBe(8); + expect(i8.dimensions).toBe(8); + }); + + test("should produce deterministic results with same seed", () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const r1 = turboQuantize(vector, { bits: 4, seed: 123 }); + const r2 = turboQuantize(vector, { bits: 4, seed: 123 }); + + expect(r1.codes).toEqual(r2.codes); + expect(r1.norm).toBe(r2.norm); + }); + + test("should produce different results with different seeds", () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const r1 = turboQuantize(vector, { bits: 4, seed: 1 }); + const r2 = turboQuantize(vector, { bits: 4, seed: 2 }); + + // Norms should be the same (same input vector) + expect(r1.norm).toBeCloseTo(r2.norm, 5); + // But codes should differ (different rotations) + expect(r1.codes).not.toEqual(r2.codes); + }); + }); + + describe("turboDequantize", () => { + test("should reconstruct vectors with reasonable fidelity at 8 bits", () => { + const original = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const quantized = turboQuantize(original, { bits: 8, seed: 42 }); + const reconstructed = turboDequantize(quantized); + + expect(reconstructed.length).toBe(original.length); + expect(reconstructed).toBeInstanceOf(Float32Array); + + // At 8 bits, reconstruction should be quite close + const sim = cosineSimilarity(original, reconstructed); + expect(sim).toBeGreaterThan(0.95); + }); + + test("should reconstruct vectors with acceptable fidelity at 4 bits", () => { + // Use a higher-dimensional vector where TurboQuant shines + const d = 128; + const original = new Float32Array(d); + for (let i = 0; i < d; i++) original[i] = Math.sin(i * 0.1) + Math.cos(i * 0.3); + + const quantized = turboQuantize(original, { bits: 4, seed: 42 }); + const reconstructed = turboDequantize(quantized); + + const sim = cosineSimilarity(original, reconstructed); + expect(sim).toBeGreaterThan(0.9); + }); + + test("should preserve vector norm approximately", () => { + const original = new Float32Array([3, 4, 5, 6, 7, 8, 9, 10]); + const origNorm = magnitude(original); + + const quantized = turboQuantize(original, { bits: 8, seed: 42 }); + const reconstructed = turboDequantize(quantized); + const reconNorm = magnitude(reconstructed); + + // Norm should be approximately preserved + expect(reconNorm).toBeCloseTo(origNorm, 0); + }); + + test("should return zero vector for quantized zero vector", () => { + const original = new Float32Array([0, 0, 0, 0]); + const quantized = turboQuantize(original, { bits: 4, seed: 42 }); + const reconstructed = turboDequantize(quantized); + + for (let i = 0; i < reconstructed.length; i++) { + expect(Math.abs(reconstructed[i])).toBe(0); + } + }); + + test("should improve quality with higher dimensions", () => { + // TurboQuant relies on concentration of measure, which improves with dimension + const d64 = 64; + const d256 = 256; + + const v64 = new Float32Array(d64); + const v256 = new Float32Array(d256); + for (let i = 0; i < d64; i++) v64[i] = Math.random() - 0.5; + for (let i = 0; i < d256; i++) v256[i] = Math.random() - 0.5; + + const q64 = turboQuantize(v64, { bits: 4, seed: 42 }); + const q256 = turboQuantize(v256, { bits: 4, seed: 42 }); + + const r64 = turboDequantize(q64); + const r256 = turboDequantize(q256); + + const sim64 = cosineSimilarity(v64, r64); + const sim256 = cosineSimilarity(v256, r256); + + // Higher dimension should give better or comparable quality + // (both should be good, but 256-dim should be slightly better) + expect(sim64).toBeGreaterThan(0.8); + expect(sim256).toBeGreaterThan(0.8); + }); + }); + + describe("turboQuantizedInnerProduct", () => { + test("should estimate inner product of quantized vectors", () => { + const a = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const b = new Float32Array([8, 7, 6, 5, 4, 3, 2, 1]); + + const trueIP = inner(a, b); + const qa = turboQuantize(a, { bits: 8, seed: 42 }); + const qb = turboQuantize(b, { bits: 8, seed: 42 }); + const estimatedIP = turboQuantizedInnerProduct(qa, qb); + + // At 8 bits, should be reasonably close + expect(estimatedIP).toBeCloseTo(trueIP, -1); // within order of magnitude + }); + + test("should reject vectors with different dimensions", () => { + const a = turboQuantize(new Float32Array([1, 2, 3, 4]), { bits: 4, seed: 42 }); + const b = turboQuantize(new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]), { + bits: 4, + seed: 42, + }); + + expect(() => turboQuantizedInnerProduct(a, b)).toThrow("same dimensions"); + }); + + test("should reject vectors with different bit widths", () => { + const v = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const a = turboQuantize(v, { bits: 4, seed: 42 }); + const b = turboQuantize(v, { bits: 8, seed: 42 }); + + expect(() => turboQuantizedInnerProduct(a, b)).toThrow("same bit width"); + }); + + test("should reject vectors with different seeds", () => { + const v = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const a = turboQuantize(v, { bits: 4, seed: 1 }); + const b = turboQuantize(v, { bits: 4, seed: 2 }); + + expect(() => turboQuantizedInnerProduct(a, b)).toThrow("same rotation seed"); + }); + }); + + describe("turboQuantizedCosineSimilarity", () => { + test("should estimate cosine similarity between quantized vectors", () => { + const d = 64; + const a = new Float32Array(d); + const b = new Float32Array(d); + for (let i = 0; i < d; i++) { + a[i] = Math.sin(i * 0.1); + b[i] = Math.sin(i * 0.1 + 0.5); // similar but shifted + } + + const trueSim = cosineSimilarity(a, b); + const qa = turboQuantize(a, { bits: 8, seed: 42 }); + const qb = turboQuantize(b, { bits: 8, seed: 42 }); + const estimatedSim = turboQuantizedCosineSimilarity(qa, qb); + + // Should be close to true cosine similarity + expect(Math.abs(estimatedSim - trueSim)).toBeLessThan(0.15); + }); + + test("should return 0 for zero vectors", () => { + const a = turboQuantize(new Float32Array([0, 0, 0, 0]), { bits: 4, seed: 42 }); + const b = turboQuantize(new Float32Array([1, 2, 3, 4]), { bits: 4, seed: 42 }); + + expect(turboQuantizedCosineSimilarity(a, b)).toBe(0); + }); + + test("should give high similarity for identical vectors", () => { + const v = new Float32Array(64); + for (let i = 0; i < 64; i++) v[i] = Math.sin(i); + + const qa = turboQuantize(v, { bits: 8, seed: 42 }); + const qb = turboQuantize(v, { bits: 8, seed: 42 }); + + expect(turboQuantizedCosineSimilarity(qa, qb)).toBeGreaterThan(0.95); + }); + }); + + describe("turboQuantizeStorageBytes", () => { + test("should calculate correct storage for common configurations", () => { + // 768-dim pads to 1024 (next power of 2): + // At 4 bits: 1024 * 4 / 8 = 512 bytes + expect(turboQuantizeStorageBytes(768, 4)).toBe(512); + + // At 2 bits: 1024 * 2 / 8 = 256 bytes + expect(turboQuantizeStorageBytes(768, 2)).toBe(256); + + // At 8 bits: 1024 * 8 / 8 = 1024 bytes + expect(turboQuantizeStorageBytes(768, 8)).toBe(1024); + + // At 1 bit: 1024 * 1 / 8 = 128 bytes + expect(turboQuantizeStorageBytes(768, 1)).toBe(128); + + // Power-of-2 dimension: no extra padding + // 512-dim at 4 bits: 512 * 4 / 8 = 256 bytes + expect(turboQuantizeStorageBytes(512, 4)).toBe(256); + }); + + test("should ceil for non-byte-aligned sizes", () => { + // 3 dimensions pads to 4 (next power of 2), 3 bits: 4 * 3 / 8 = 1.5 -> 2 bytes + expect(turboQuantizeStorageBytes(3, 3)).toBe(2); + }); + }); + + describe("turboQuantizeCompressionRatio", () => { + test("should calculate correct compression ratios", () => { + // Float32 = 4 bytes/dim. + // 512-dim (already power-of-2) at 4 bits: ratio = (512 * 4) / (512 * 4 / 8) = 8 + expect(turboQuantizeCompressionRatio(512, 4)).toBe(8); + + // At 2 bits: ratio = (512 * 4) / (512 * 2 / 8) = 16 + expect(turboQuantizeCompressionRatio(512, 2)).toBe(16); + + // At 1 bit: ratio = (512 * 4) / (512 * 1 / 8) = 32 + expect(turboQuantizeCompressionRatio(512, 1)).toBe(32); + }); + }); + + describe("turboQuantizeToTypedArray", () => { + test("should produce Int8Array for INT8 target", () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const result = turboQuantizeToTypedArray(vector, TensorType.INT8); + expect(result).toBeInstanceOf(Int8Array); + expect(result.length).toBe(vector.length); + }); + + test("should produce Uint8Array for UINT8 target", () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const result = turboQuantizeToTypedArray(vector, TensorType.UINT8); + expect(result).toBeInstanceOf(Uint8Array); + expect(result.length).toBe(vector.length); + }); + + test("should produce Int16Array for INT16 target", () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const result = turboQuantizeToTypedArray(vector, TensorType.INT16); + expect(result).toBeInstanceOf(Int16Array); + expect(result.length).toBe(vector.length); + }); + + test("should produce Uint16Array for UINT16 target", () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const result = turboQuantizeToTypedArray(vector, TensorType.UINT16); + expect(result).toBeInstanceOf(Uint16Array); + expect(result.length).toBe(vector.length); + }); + + test("should reject float target types", () => { + const vector = new Float32Array([1, 2, 3, 4]); + expect(() => turboQuantizeToTypedArray(vector, TensorType.FLOAT32)).toThrow( + "integer target types" + ); + expect(() => turboQuantizeToTypedArray(vector, TensorType.FLOAT64)).toThrow( + "integer target types" + ); + }); + + test("should reject empty vectors", () => { + expect(() => turboQuantizeToTypedArray(new Float32Array(0), TensorType.INT8)).toThrow(); + }); + + test("should be deterministic with same seed", () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const r1 = turboQuantizeToTypedArray(vector, TensorType.INT8, 123); + const r2 = turboQuantizeToTypedArray(vector, TensorType.INT8, 123); + expect(r1).toEqual(r2); + }); + + test("should produce different results with different seeds", () => { + const vector = new Float32Array([1, 2, 3, 4, 5, 6, 7, 8]); + const r1 = turboQuantizeToTypedArray(vector, TensorType.INT8, 1); + const r2 = turboQuantizeToTypedArray(vector, TensorType.INT8, 2); + expect(r1).not.toEqual(r2); + }); + + test("should preserve cosine similarity between vectors (Int8)", () => { + const d = 128; + const a = new Float32Array(d); + const b = new Float32Array(d); + for (let i = 0; i < d; i++) { + a[i] = Math.sin(i * 0.1); + b[i] = Math.sin(i * 0.1 + 0.5); + } + + const trueSim = cosineSimilarity(a, b); + const qa = turboQuantizeToTypedArray(a, TensorType.INT8, 42); + const qb = turboQuantizeToTypedArray(b, TensorType.INT8, 42); + const quantSim = cosineSimilarity(qa, qb); + + // Turbo Int8 should preserve similarity well + expect(Math.abs(quantSim - trueSim)).toBeLessThan(0.15); + }); + + test("should preserve cosine similarity between vectors (Int16)", () => { + const d = 128; + const a = new Float32Array(d); + const b = new Float32Array(d); + for (let i = 0; i < d; i++) { + a[i] = Math.sin(i * 0.1); + b[i] = Math.sin(i * 0.1 + 0.5); + } + + const trueSim = cosineSimilarity(a, b); + const qa = turboQuantizeToTypedArray(a, TensorType.INT16, 42); + const qb = turboQuantizeToTypedArray(b, TensorType.INT16, 42); + const quantSim = cosineSimilarity(qa, qb); + + // Int16 should be very close + expect(Math.abs(quantSim - trueSim)).toBeLessThan(0.05); + }); + + test("should give high similarity for identical vectors", () => { + const d = 128; + const v = new Float32Array(d); + for (let i = 0; i < d; i++) v[i] = Math.sin(i); + + const qa = turboQuantizeToTypedArray(v, TensorType.INT8, 42); + const qb = turboQuantizeToTypedArray(v, TensorType.INT8, 42); + + // Identical input + same seed = identical output + expect(cosineSimilarity(qa, qb)).toBeCloseTo(1, 10); + }); + + test("should handle zero vectors", () => { + const vector = new Float32Array([0, 0, 0, 0]); + const result = turboQuantizeToTypedArray(vector, TensorType.INT8); + expect(result.length).toBe(4); + // All values should be 0 (or the midpoint for unsigned) + for (let i = 0; i < result.length; i++) { + expect(result[i]).toBe(0); + } + }); + + test("should produce values within type range for Int8", () => { + const d = 256; + const vector = new Float32Array(d); + for (let i = 0; i < d; i++) vector[i] = Math.random() * 10 - 5; + + const result = turboQuantizeToTypedArray(vector, TensorType.INT8); + for (let i = 0; i < result.length; i++) { + expect(result[i]).toBeGreaterThanOrEqual(-128); + expect(result[i]).toBeLessThanOrEqual(127); + } + }); + + test("should produce values within type range for Uint8", () => { + const d = 256; + const vector = new Float32Array(d); + for (let i = 0; i < d; i++) vector[i] = Math.random() * 10 - 5; + + const result = turboQuantizeToTypedArray(vector, TensorType.UINT8); + for (let i = 0; i < result.length; i++) { + expect(result[i]).toBeGreaterThanOrEqual(0); + expect(result[i]).toBeLessThanOrEqual(255); + } + }); + }); + + describe("roundtrip quality across bit widths", () => { + const d = 128; + const original = new Float32Array(d); + for (let i = 0; i < d; i++) original[i] = Math.sin(i * 0.1) * (1 + Math.cos(i * 0.05)); + + for (const bits of [2, 3, 4, 6, 8]) { + test(`should maintain reasonable quality at ${bits} bits`, () => { + const quantized = turboQuantize(original, { bits, seed: 42 }); + const reconstructed = turboDequantize(quantized); + const sim = cosineSimilarity(original, reconstructed); + + // Quality expectations scale with bits + if (bits >= 6) { + expect(sim).toBeGreaterThan(0.95); + } else if (bits >= 4) { + expect(sim).toBeGreaterThan(0.85); + } else { + expect(sim).toBeGreaterThan(0.5); // Even 2-bit should preserve direction + } + }); + } + }); +}); diff --git a/packages/util/src/schema-entry.ts b/packages/util/src/schema-entry.ts index a0e50f573..c00e4e4fa 100644 --- a/packages/util/src/schema-entry.ts +++ b/packages/util/src/schema-entry.ts @@ -15,3 +15,4 @@ export * from "./vector/TypedArray"; export * from "./vector/TypedArrayUtils"; export * from "./vector/VectorSimilarityUtils"; export * from "./vector/VectorUtils"; +export * from "./vector/TurboQuantize"; diff --git a/packages/util/src/vector/TurboQuantize.ts b/packages/util/src/vector/TurboQuantize.ts new file mode 100644 index 000000000..6820325b6 --- /dev/null +++ b/packages/util/src/vector/TurboQuantize.ts @@ -0,0 +1,580 @@ +/** + * @license + * Copyright 2025 Steven Roussey + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * TurboQuant: Near-optimal vector quantization using randomized rotation + * and optimal per-coordinate scalar quantization. + * + * Based on "TurboQuant: Online Vector Quantization with Near-optimal Distortion Rate" + * by Zandieh, Daliri, Hadian, and Mirrokni (2025). + * + * The key insight: applying a random orthogonal rotation to a unit vector causes its + * coordinates to concentrate around a known Beta distribution. This enables near-optimal + * scalar quantization per coordinate without needing to observe the data distribution first. + * + * Properties: + * - Data-oblivious: no training or codebook construction needed + * - Per-vector: each vector quantized independently (streaming-friendly) + * - Near-optimal: within ~2.7x of theoretical distortion limit at all bit-widths + * - Preserves inner products for accurate similarity search + */ + +import { TensorType } from "./Tensor"; +import type { TypedArray } from "./TypedArray"; + +/** + * Configuration for TurboQuant quantization. + */ +export interface TurboQuantizeOptions { + /** Number of bits per dimension (1-8). Lower = more compression, higher distortion. */ + readonly bits?: number; + /** Seed for deterministic random rotation. If omitted, uses a fixed default seed. */ + readonly seed?: number; +} + +/** + * Result of TurboQuant quantization, containing everything needed for dequantization. + */ +export interface TurboQuantizeResult { + /** Quantized codes packed into a Uint8Array */ + readonly codes: Uint8Array; + /** Number of bits per dimension used */ + readonly bits: number; + /** Original vector dimensionality */ + readonly dimensions: number; + /** + * Padded dimensionality used during rotation (next power of 2 >= dimensions). + * The codes array covers this many coordinates; the extra coordinates beyond + * `dimensions` are discarded during dequantization. + */ + readonly paddedDimensions: number; + /** The seed used for the random rotation (needed for dequantization) */ + readonly seed: number; + /** L2 norm of the original vector (needed to reconstruct scale) */ + readonly norm: number; +} + +const DEFAULT_SEED = 42; + +/** + * Simple deterministic PRNG (xorshift32) for generating rotation seeds. + * Produces deterministic sequences given a seed, suitable for reproducible rotations. + * + * Note: the seed is XOR-mixed with a constant before use so that every distinct + * integer seed (including 0) maps to a distinct, non-zero initial PRNG state. + */ +function createPrng(seed: number): () => number { + // XOR-mix the seed with the golden-ratio constant so that seed=0 does not + // collapse to the same state as seed=1 (xorshift32 requires a non-zero state). + // The `|| 1` guards the one theoretical edge-case where the XOR result is 0 + // (i.e. the caller passed seed = 0x616c8647). + let state = ((seed ^ 0x9e3779b9) >>> 0) || 1; + return () => { + state ^= state << 13; + state ^= state >> 17; + state ^= state << 5; + // Convert to [0, 1) range + return (state >>> 0) / 4294967296; + }; +} + +/** + * Applies a randomized rotation to a vector using the fast Walsh-Hadamard transform + * combined with random sign flips. This is an approximation of a random orthogonal + * rotation that runs in O(d log d) time instead of O(d²). + * + * The input is zero-padded to the next power of 2 before the transform. All + * `paddedLen` coordinates are returned so that the transform is fully invertible. + * Dropping the extra coordinates would break orthogonality for non-power-of-2 + * input dimensions. + * + * We apply 3 rounds of (sign-flip + WHT) for good isometry properties. + */ +function randomRotate(values: Float64Array, seed: number): Float64Array { + const d = values.length; + // Pad to next power of 2 for Hadamard transform + const paddedLen = nextPowerOf2(d); + const result = new Float64Array(paddedLen); + result.set(values); + + const prng = createPrng(seed); + + // Apply 3 rounds for good mixing (standard practice for randomized Hadamard) + for (let round = 0; round < 3; round++) { + // Random sign flips (diagonal Rademacher matrix) + for (let i = 0; i < paddedLen; i++) { + if (prng() < 0.5) { + result[i] = -result[i]; + } + } + + // Fast Walsh-Hadamard transform (in-place, normalized) + fastWalshHadamard(result); + } + + // Return ALL paddedLen coordinates to preserve full invertibility. + return result; +} + +/** + * Inverse of randomRotate: undoes the rotation to reconstruct the original vector direction. + * The input must be the full paddedLen array returned by randomRotate. + */ +function inverseRandomRotate(values: Float64Array, seed: number): Float64Array { + const paddedLen = values.length; + const result = new Float64Array(paddedLen); + result.set(values); + + const prng = createPrng(seed); + + // We need to collect all random values for 3 rounds, then apply in reverse + const signs: boolean[][] = []; + for (let round = 0; round < 3; round++) { + const roundSigns: boolean[] = []; + for (let i = 0; i < paddedLen; i++) { + roundSigns.push(prng() < 0.5); + } + signs.push(roundSigns); + } + + // Apply rounds in reverse order + for (let round = 2; round >= 0; round--) { + // WHT is its own inverse (up to scaling, which we handle) + fastWalshHadamard(result); + + // Undo sign flips + for (let i = 0; i < paddedLen; i++) { + if (signs[round][i]) { + result[i] = -result[i]; + } + } + } + + return result; +} + +/** + * In-place Fast Walsh-Hadamard Transform with normalization. + * Runs in O(n log n) where n must be a power of 2. + */ +function fastWalshHadamard(data: Float64Array): void { + const n = data.length; + const norm = 1 / Math.sqrt(n); + + for (let halfSize = 1; halfSize < n; halfSize *= 2) { + for (let i = 0; i < n; i += halfSize * 2) { + for (let j = i; j < i + halfSize; j++) { + const a = data[j]; + const b = data[j + halfSize]; + data[j] = a + b; + data[j + halfSize] = a - b; + } + } + } + + // Normalize + for (let i = 0; i < n; i++) { + data[i] *= norm; + } +} + +function nextPowerOf2(n: number): number { + let p = 1; + while (p < n) p <<= 1; + return p; +} + +/** + * Returns quantization parameters for uniform scalar quantization over the range + * [-scale, scale]. + * + * After random rotation in paddedLen-dimensional space, each coordinate of a + * d-dimensional unit vector (zero-padded to paddedLen) has variance 1/paddedLen. + * We use a fixed range of ±3 standard deviations (coverage ≈ 99.7%) as the + * clipping boundary for a uniform quantizer with `levels = 2^bits` levels. + * This is a simple, practical uniform quantizer; no non-uniform or + * distribution-fitted quantization is performed. + */ +function getQuantizationParams( + bits: number, + paddedLen: number +): { readonly levels: number; readonly scale: number } { + const levels = 1 << bits; // 2^bits quantization levels + // After rotation, coordinates have std dev ≈ 1/sqrt(paddedLen). + // Cover ±3 standard deviations. + const coverage = 3.0; + const scale = coverage / Math.sqrt(paddedLen); + return { levels, scale }; +} + +/** + * Quantizes a single float value to an integer code in [0, levels-1]. + */ +function quantizeScalar(value: number, scale: number, levels: number): number { + // Map from [-scale, scale] to [0, 1] + const normalized = (value + scale) / (2 * scale); + // Clamp and discretize + const clamped = Math.max(0, Math.min(1, normalized)); + const code = Math.round(clamped * (levels - 1)); + return code; +} + +/** + * Dequantizes an integer code back to a float value (reconstruction point). + */ +function dequantizeScalar(code: number, scale: number, levels: number): number { + const normalized = code / (levels - 1); + return normalized * 2 * scale - scale; +} + +/** + * Packs an array of codes (each in [0, 2^bits - 1]) into a compact Uint8Array. + * For sub-byte bit widths, multiple codes share a byte. + */ +function packCodes(codes: number[], bits: number): Uint8Array { + const totalBits = codes.length * bits; + const numBytes = Math.ceil(totalBits / 8); + const packed = new Uint8Array(numBytes); + + let bitPos = 0; + for (let i = 0; i < codes.length; i++) { + const code = codes[i]; + // Write `bits` bits starting at bitPos + let remaining = bits; + let value = code; + while (remaining > 0) { + const byteIdx = bitPos >> 3; + const bitOffset = bitPos & 7; + const bitsToWrite = Math.min(remaining, 8 - bitOffset); + const mask = (1 << bitsToWrite) - 1; + packed[byteIdx] |= (value & mask) << bitOffset; + value >>= bitsToWrite; + bitPos += bitsToWrite; + remaining -= bitsToWrite; + } + } + + return packed; +} + +/** + * Unpacks codes from a compact Uint8Array back to an array of integers. + * Throws if the buffer is too small for the requested count and bit width. + */ +function unpackCodes(packed: Uint8Array, bits: number, count: number): number[] { + const expectedBytes = Math.ceil((count * bits) / 8); + if (packed.length < expectedBytes) { + throw new Error( + `unpackCodes: buffer too small - need ${expectedBytes} bytes for ${count} codes at ${bits} bits, got ${packed.length}` + ); + } + const codes: number[] = new Array(count); + + let bitPos = 0; + for (let i = 0; i < count; i++) { + let code = 0; + let remaining = bits; + let shift = 0; + while (remaining > 0) { + const byteIdx = bitPos >> 3; + const bitOffset = bitPos & 7; + const bitsToRead = Math.min(remaining, 8 - bitOffset); + const mask = (1 << bitsToRead) - 1; + code |= ((packed[byteIdx] >> bitOffset) & mask) << shift; + shift += bitsToRead; + bitPos += bitsToRead; + remaining -= bitsToRead; + } + codes[i] = code; + } + + return codes; +} + +/** + * Quantizes a vector using the TurboQuant algorithm. + * + * Steps: + * 1. Normalize the vector and record its L2 norm + * 2. Apply randomized rotation (sign flips + Walsh-Hadamard transform) + * 3. Quantize each rotated coordinate using optimal scalar quantization + * 4. Pack the codes into a compact bit representation + * + * @param vector - Input vector (any TypedArray) + * @param options - Quantization options (bits per dimension, optional seed) + * @returns Compact quantized representation + */ +export function turboQuantize( + vector: TypedArray, + options: TurboQuantizeOptions | undefined +): TurboQuantizeResult { + const bits = options?.bits ?? 4; + const seed = options?.seed ?? DEFAULT_SEED; + + if (bits < 1 || bits > 8 || !Number.isInteger(bits)) { + throw new Error(`TurboQuant bits must be an integer between 1 and 8, got ${bits}`); + } + + const d = vector.length; + if (d === 0) { + throw new Error("Cannot quantize an empty vector"); + } + + // Step 1: Compute norm and normalize + let norm = 0; + for (let i = 0; i < d; i++) { + norm += vector[i] * vector[i]; + } + norm = Math.sqrt(norm); + + const values = new Float64Array(d); + if (norm > 0) { + for (let i = 0; i < d; i++) { + values[i] = vector[i] / norm; + } + } + + // Step 2: Random rotation — returns all paddedLen coordinates + const paddedLen = nextPowerOf2(d); + const rotated = randomRotate(values, seed); + + // Step 3: Scalar quantization per coordinate (all paddedLen) + const { levels, scale } = getQuantizationParams(bits, paddedLen); + const codes: number[] = new Array(paddedLen); + for (let i = 0; i < paddedLen; i++) { + codes[i] = quantizeScalar(rotated[i], scale, levels); + } + + // Step 4: Pack into compact representation + const packed = packCodes(codes, bits); + + return { + codes: packed, + bits, + dimensions: d, + paddedDimensions: paddedLen, + seed, + norm, + }; +} + +/** + * Dequantizes a TurboQuant result back to a Float32Array. + * + * Steps: + * 1. Unpack the codes from the compact representation + * 2. Reconstruct the rotated coordinates from quantization levels + * 3. Apply inverse rotation + * 4. Scale by the original norm + * + * @param quantized - The TurboQuant quantization result + * @returns Reconstructed vector as Float32Array + */ +export function turboDequantize(quantized: TurboQuantizeResult): Float32Array { + const { codes, bits, dimensions, paddedDimensions, seed, norm } = quantized; + + // Step 1: Unpack all paddedDimensions codes + const unpacked = unpackCodes(codes, bits, paddedDimensions); + + // Step 2: Reconstruct rotated coordinates (all paddedDimensions) + const { levels, scale } = getQuantizationParams(bits, paddedDimensions); + const rotated = new Float64Array(paddedDimensions); + for (let i = 0; i < paddedDimensions; i++) { + rotated[i] = dequantizeScalar(unpacked[i], scale, levels); + } + + // Step 3: Inverse rotation (returns full paddedDimensions array) + const unrotated = inverseRandomRotate(rotated, seed); + + // Step 4: Crop to original dimensions and scale by original norm + const result = new Float32Array(dimensions); + for (let i = 0; i < dimensions; i++) { + result[i] = unrotated[i] * norm; + } + + return result; +} + +/** + * Estimates the inner product between two TurboQuant-quantized vectors + * without full dequantization. This is faster than dequantizing both vectors + * and computing the dot product, though for maximum accuracy, full + * dequantization is preferred. + * + * @param a - First quantized vector + * @param b - Second quantized vector + * @returns Estimated inner product + */ +export function turboQuantizedInnerProduct( + a: TurboQuantizeResult, + b: TurboQuantizeResult +): number { + if (a.dimensions !== b.dimensions) { + throw new Error("Vectors must have the same dimensions"); + } + if (a.bits !== b.bits) { + throw new Error("Vectors must use the same bit width"); + } + if (a.seed !== b.seed) { + throw new Error("Vectors must use the same rotation seed"); + } + + const paddedLen = a.paddedDimensions; + const { levels, scale } = getQuantizationParams(a.bits, paddedLen); + + // Unpack both code arrays (paddedLen codes each) + const codesA = unpackCodes(a.codes, a.bits, paddedLen); + const codesB = unpackCodes(b.codes, b.bits, paddedLen); + + // Compute dot product in the rotated (quantized) domain. + // Since rotation is orthogonal, inner products are preserved: + // = (for orthogonal R) + let dot = 0; + for (let i = 0; i < paddedLen; i++) { + const va = dequantizeScalar(codesA[i], scale, levels); + const vb = dequantizeScalar(codesB[i], scale, levels); + dot += va * vb; + } + + // Scale by both norms + return dot * a.norm * b.norm; +} + +/** + * Computes the approximate cosine similarity between two TurboQuant-quantized vectors. + * + * @param a - First quantized vector + * @param b - Second quantized vector + * @returns Estimated cosine similarity in [-1, 1] + */ +export function turboQuantizedCosineSimilarity( + a: TurboQuantizeResult, + b: TurboQuantizeResult +): number { + if (a.norm === 0 || b.norm === 0) return 0; + // Inner product of unit vectors = cosine similarity + // turboQuantizedInnerProduct includes norm scaling, so divide it out + return turboQuantizedInnerProduct(a, b) / (a.norm * b.norm); +} + +/** Integer target types supported by turboQuantizeToTypedArray */ +const INTEGER_TARGET_RANGES = { + [TensorType.INT8]: { signed: true, max: 127 }, + [TensorType.UINT8]: { signed: false, max: 255 }, + [TensorType.INT16]: { signed: true, max: 32767 }, + [TensorType.UINT16]: { signed: false, max: 65535 }, +} as const; + +/** + * Quantizes a vector using TurboQuant rotation directly into a byte-aligned TypedArray. + * + * Unlike the packed `turboQuantize`, this outputs a standard TypedArray (Int8Array, + * Uint8Array, Int16Array, Uint16Array) with the **same `.length`** as the input vector. + * This means the output works transparently with existing storage backends and + * similarity search (cosineSimilarity requires matching lengths). + * + * The rotation spreads information across all coordinates and concentrates their + * distribution, yielding better distortion than naive linear quantization at the + * same byte width. + * + * Note: The vector norm is not preserved (cosine similarity is scale-invariant, + * so this is fine for similarity search). + * + * @param vector - Input vector (any TypedArray) + * @param targetType - Target integer type (INT8, UINT8, INT16, UINT16) + * @param seed - Seed for the random rotation (default: 42). All vectors in the + * same collection must use the same seed for similarity search to work. + * @returns TypedArray of the target type with `.length === vector.length` + */ +export function turboQuantizeToTypedArray( + vector: TypedArray, + targetType: TensorType, + seed: number = DEFAULT_SEED +): TypedArray { + const range = INTEGER_TARGET_RANGES[targetType as keyof typeof INTEGER_TARGET_RANGES]; + if (!range) { + throw new Error( + `turboQuantizeToTypedArray only supports integer target types (int8, uint8, int16, uint16), got "${targetType}"` + ); + } + + const d = vector.length; + if (d === 0) { + throw new Error("Cannot quantize an empty vector"); + } + + // Step 1: Normalize to unit vector + let norm = 0; + for (let i = 0; i < d; i++) { + norm += vector[i] * vector[i]; + } + norm = Math.sqrt(norm); + + const values = new Float64Array(d); + if (norm > 0) { + for (let i = 0; i < d; i++) { + values[i] = vector[i] / norm; + } + } + + // Step 2: Random rotation (spreads information, concentrates distribution) + // randomRotate returns all paddedLen coordinates; we only use the first d. + const paddedLen = nextPowerOf2(d); + const rotated = randomRotate(values, seed); + + // Step 3: Map rotated coordinates to target integer range + // After rotation in paddedLen-dimensional space, coordinates have std dev ≈ 1/sqrt(paddedLen). + const coverage = 3.0; + const scale = coverage / Math.sqrt(paddedLen); + + if (range.signed) { + // Map [-scale, scale] → [-max, max] + const max = range.max; + const result = targetType === TensorType.INT8 ? new Int8Array(d) : new Int16Array(d); + for (let i = 0; i < d; i++) { + const clamped = Math.max(-scale, Math.min(scale, rotated[i])); + result[i] = Math.round((clamped / scale) * max); + } + return result; + } else { + // Map [-scale, scale] → [0, max] + const max = range.max; + const result = targetType === TensorType.UINT8 ? new Uint8Array(d) : new Uint16Array(d); + for (let i = 0; i < d; i++) { + const clamped = Math.max(-scale, Math.min(scale, rotated[i])); + result[i] = Math.round(((clamped + scale) / (2 * scale)) * max); + } + return result; + } +} + +/** + * Calculates the storage size in bytes for a TurboQuant-quantized vector. + * + * Because the Walsh-Hadamard transform requires a power-of-2 length, the vector + * is zero-padded to the next power of 2 before quantization. The codes buffer + * therefore covers `nextPowerOf2(dimensions)` coordinates, not `dimensions`. + * + * @param dimensions - Vector dimensionality + * @param bits - Bits per dimension + * @returns Storage size in bytes (codes only, excluding metadata) + */ +export function turboQuantizeStorageBytes(dimensions: number, bits: number): number { + return Math.ceil((nextPowerOf2(dimensions) * bits) / 8); +} + +/** + * Calculates the compression ratio compared to Float32 storage. + * + * @param dimensions - Vector dimensionality + * @param bits - Bits per dimension + * @returns Compression ratio (e.g., 8.0 means 8x smaller) + */ +export function turboQuantizeCompressionRatio(dimensions: number, bits: number): number { + const originalBytes = dimensions * 4; // Float32 = 4 bytes per dim + const quantizedBytes = turboQuantizeStorageBytes(dimensions, bits); + return originalBytes / quantizedBytes; +}