-
Notifications
You must be signed in to change notification settings - Fork 1
Add TurboQuant vector quantization algorithm #354
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
cc47910
9b209b1
051f0b7
447c7b0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -10,11 +10,19 @@ import { | |
| FromSchema, | ||
| normalizeNumberArray, | ||
| TensorType, | ||
| turboQuantizeToTypedArray, | ||
| TypedArray, | ||
| TypedArraySchema, | ||
| TypedArraySchemaOptions, | ||
| } from "@workglow/util/schema"; | ||
|
|
||
| export const QuantizationMethod = { | ||
| LINEAR: "linear", | ||
| TURBO: "turbo", | ||
| } as const; | ||
|
|
||
| export type QuantizationMethod = (typeof QuantizationMethod)[keyof typeof QuantizationMethod]; | ||
|
|
||
| const inputSchema = { | ||
| type: "object", | ||
| properties: { | ||
|
|
@@ -48,6 +56,21 @@ const inputSchema = { | |
| description: "Normalize vector before quantization", | ||
| default: true, | ||
| }, | ||
| method: { | ||
| type: "string", | ||
| enum: Object.values(QuantizationMethod), | ||
| title: "Method", | ||
| description: | ||
| "Quantization method: 'linear' for simple min-max scaling, 'turbo' for TurboQuant (randomized rotation + optimal scalar quantization, better distortion than linear at the same bit width). Turbo requires an integer targetType (int8, uint8, int16, uint16).", | ||
| default: QuantizationMethod.LINEAR, | ||
| }, | ||
| turboSeed: { | ||
| type: "integer", | ||
| title: "TurboQuant Seed", | ||
| description: | ||
| "Seed for the random rotation in TurboQuant. All vectors in the same collection must use the same seed for similarity search to work.", | ||
| default: 42, | ||
| }, | ||
| }, | ||
| required: ["vector", "targetType"], | ||
| additionalProperties: false, | ||
|
|
@@ -117,12 +140,24 @@ export class VectorQuantizeTask extends Task< | |
| } | ||
|
|
||
| override async executeReactive(input: VectorQuantizeTaskInput): Promise<VectorQuantizeTaskOutput> { | ||
| const { vector, targetType, normalize = true } = input; | ||
| const { | ||
| vector, | ||
| targetType, | ||
| normalize = true, | ||
| method = QuantizationMethod.LINEAR, | ||
| turboSeed = 42, | ||
| } = input; | ||
| const isArray = Array.isArray(vector); | ||
| const vectors = isArray ? vector : [vector]; | ||
| const originalType = this.getVectorType(vectors[0]); | ||
|
|
||
| const quantized = vectors.map((v) => this.vectorQuantize(v, targetType, normalize)); | ||
| let quantized: TypedArray[]; | ||
|
|
||
| if (method === QuantizationMethod.TURBO) { | ||
| quantized = vectors.map((v) => turboQuantizeToTypedArray(v, targetType, turboSeed)); | ||
| } else { | ||
| quantized = vectors.map((v) => this.vectorQuantize(v, targetType, normalize)); | ||
| } | ||
|
Comment on lines
142
to
+160
|
||
|
|
||
| return { | ||
| vector: isArray ? quantized : quantized[0], | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the
TURBObranch, the task returnsturboDequantize(...)(aFloat32Array) but still reportstargetTypeas the requested type, and does not actually quantize totargetType. This is an observable mismatch (e.g.,targetType: INT8can return aFloat32Array) and defeats the task’s “reduce storage” purpose. Either (1) change the output schema to return TurboQuant’s packed codes + metadata, (2) settargetTypeto FLOAT32 for the turbo path, and/or (3) post-process the dequantized vector throughvectorQuantize(..., targetType, ...)if you intend turbo to be a preconditioning step.