Usage Guide

Getting Started

Installation

Python (from PyPI):

pip install gigavector

Python (from source):

cd python
pip install .

C Library (from source):

# Using Make
make lib

# Using CMake
cmake -B build -DCMAKE_BUILD_TYPE=Release
cmake --build build

Quick Start Example

Python:

from gigavector import Database, DistanceType, IndexType

# Create a database
with Database.open("my_db.db", dimension=128, index=IndexType.HNSW) as db:
    # Add a vector
    db.add_vector([0.1] * 128, metadata={"id": "1", "category": "A"})

    # Search
    results = db.search([0.1] * 128, k=5, distance=DistanceType.EUCLIDEAN)
    for hit in results:
        print(f"Distance: {hit.distance}, Metadata: {hit.vector.metadata}")

#include "gigavector/gigavector.h"

// Create database
GV_Database *db = gv_db_open("my_db.db", 128, GV_INDEX_TYPE_HNSW);

// Add vector
float data[128];
for (int i = 0; i < 128; i++) data[i] = 0.1f;
gv_db_add_vector_with_metadata(db, data, 128, "id", "1");

// Search
float query[128];
for (int i = 0; i < 128; i++) query[i] = 0.1f;
GV_SearchResult results[5];
int found = gv_db_search(db, query, 5, results, GV_DISTANCE_EUCLIDEAN);

// Process results
for (int i = 0; i < found; i++) {
    printf("Distance: %f\n", results[i].distance);
}

gv_db_close(db);

Choosing the Right Index

GigaVector supports multiple index types. Choose based on your requirements:

Index Type	Best For	Dataset Size	Dimensions	Search Speed	Memory
KD-Tree	Exact search, small datasets	< 1M	< 100	Fast (small)	Low
HNSW	General purpose, large datasets	1K - 1B+	Any	Very Fast	Medium
IVFPQ	Very large datasets, memory constrained	100K+	64+	Fast	Very Low
Sparse	Sparse vectors, text embeddings	Any	High (sparse)	Fast	Low

Quick Decision Guide:

Small dataset (< 100K vectors) and need exact results? → KD-Tree
Large dataset and need fast approximate search? → HNSW
Very large dataset and memory is critical? → IVFPQ
Sparse vectors (most dimensions are zero)? → Sparse Index

See the Performance Tuning Guide for detailed recommendations.

Python Usage

Creating a Database

from gigavector import Database, IndexType

# In-memory database
db = Database.open(None, dimension=128, index=IndexType.HNSW)

# Persistent database
db = Database.open("data.db", dimension=128, index=IndexType.HNSW)

# Auto-select index type
db = Database.open_auto("data.db", dimension=128, expected_count=1000000)

# Memory-mapped read-only database
db = Database.open_mmap("data.db", dimension=128)

Adding Vectors

# Simple vector
db.add_vector([0.1, 0.2, 0.3, ...])

# With metadata
db.add_vector(
    [0.1, 0.2, 0.3, ...],
    metadata={"id": "123", "category": "electronics", "price": "99.99"}
)

# Batch insertion
vectors = [[random.random() for _ in range(128)] for _ in range(1000)]
db.add_vectors(vectors)

Searching

from gigavector import DistanceType

# Basic search
results = db.search([0.1] * 128, k=10, distance=DistanceType.EUCLIDEAN)

# Filtered search
results = db.search(
    [0.1] * 128, k=10,
    filter_metadata=("category", "electronics")
)

# Advanced filter expression
results = db.search_with_filter_expr(
    [0.1] * 128, k=10,
    filter_expr='category == "electronics" AND price >= "50"'
)

# Range search (find all within radius)
results = db.range_search(
    [0.1] * 128, radius=0.5, max_results=100
)

# Batch search
queries = [[random.random() for _ in range(128)] for _ in range(10)]
all_results = db.search_batch(queries, k=5)

Managing Data

# Update a vector
db.update_vector(0, [0.2, 0.3, 0.4, ...])

# Update metadata
db.update_metadata(0, {"price": "149.99", "updated": "true"})

# Delete a vector
db.delete_vector(0)

# Save database
db.save("backup.db")

Configuration

from gigavector import HNSWConfig, IVFPQConfig

# HNSW with custom config
hnsw_config = HNSWConfig(
    M=32,
    ef_construction=200,
    ef_search=50,
    use_binary_quant=True
)
db = Database.open("db.db", 128, IndexType.HNSW, hnsw_config=hnsw_config)

# IVFPQ with custom config
ivfpq_config = IVFPQConfig(
    nlist=256,
    m=16,
    nprobe=16,
    default_rerank=32
)
db = Database.open(None, 128, IndexType.IVFPQ, ivfpq_config=ivfpq_config)
db.train_ivfpq(training_data)  # Must train before use

Resource Management

# Context manager (recommended)
with Database.open("db.db", 128) as db:
    db.add_vector([0.1] * 128)
    # Automatically closed on exit

# Manual management
db = Database.open("db.db", 128)
try:
    # Use database
    pass
finally:
    db.close()

# Resource limits
db.set_resource_limits(
    max_memory_bytes=1024 * 1024 * 1024,  # 1GB
    max_vectors=1000000,
    max_concurrent_operations=100
)

Monitoring and Statistics

# Basic statistics
stats = db.get_stats()
print(f"Total inserts: {stats.total_inserts}")
print(f"Total queries: {stats.total_queries}")

# Detailed statistics
detailed = db.get_detailed_stats()
print(f"QPS: {detailed['queries_per_second']}")
print(f"Memory: {detailed['memory']['total_bytes'] / 1024 / 1024:.2f} MB")
print(f"Recall: {detailed['recall']['avg_recall']:.2%}")

# Health check — returns 0 (healthy), -1 (degraded), or positive (unhealthy)
health = db.health_check()

C API Usage

Basic Operations

#include "gigavector/gigavector.h"
#include <stdio.h>
#include <stdlib.h>

int main() {
    // Create database
    GV_Database *db = gv_db_open("example.db", 128, GV_INDEX_TYPE_HNSW);
    if (!db) {
        fprintf(stderr, "Failed to create database\n");
        return 1;
    }

    // Add vector
    float data[128];
    for (int i = 0; i < 128; i++) {
        data[i] = (float)rand() / RAND_MAX;
    }

    int rc = gv_db_add_vector_with_metadata(
        db, data, 128, "id", "1"
    );
    if (rc != 0) {
        fprintf(stderr, "Failed to add vector\n");
        gv_db_close(db);
        return 1;
    }

    // Search
    float query[128];
    for (int i = 0; i < 128; i++) {
        query[i] = (float)rand() / RAND_MAX;
    }

    GV_SearchResult results[10];
    int found = gv_db_search(
        db, query, 10, results, GV_DISTANCE_EUCLIDEAN
    );

    if (found > 0) {
        printf("Found %d results:\n", found);
        for (int i = 0; i < found; i++) {
            printf("  Distance: %f\n", results[i].distance);
        }
    }

    // Save and close
    gv_db_save(db, "example.db");
    gv_db_close(db);
    return 0;
}

Error Handling

GV_Database *db = gv_db_open("db.db", 128, GV_INDEX_TYPE_HNSW);
if (!db) {
    // Handle error - check errno or log message
    perror("gv_db_open");
    return 1;
}

int rc = gv_db_add_vector(db, data, 128);
if (rc != 0) {
    // Handle error
    fprintf(stderr, "Failed to add vector\n");
    gv_db_close(db);
    return 1;
}

Memory Management

// Vectors are managed by the database
// You don't need to free vectors returned from search results
// The database owns all vector data

// Always close the database when done
gv_db_close(db);

Configuration

// HNSW configuration
GV_HNSWConfig hnsw_config = {
    .M = 32,
    .efConstruction = 200,
    .efSearch = 50,
    .use_binary_quant = 1,
    .quant_rerank = 20
};

GV_Database *db = gv_db_open_with_hnsw_config(
    "db.db", 128, GV_INDEX_TYPE_HNSW, &hnsw_config
);

// IVFPQ configuration
GV_IVFPQConfig ivfpq_config = {
    .nlist = 256,
    .m = 16,
    .nbits = 8,
    .nprobe = 16,
    .default_rerank = 32
};

GV_Database *db_ivfpq = gv_db_open_with_ivfpq_config(
    "ivfpq.db", 128, GV_INDEX_TYPE_IVFPQ, &ivfpq_config
);

// Train IVFPQ
float training_data[1000 * 128];
// ... populate training data ...
gv_db_ivfpq_train(db_ivfpq, training_data, 1000, 128);

HTTP REST Server

Start GigaVector as an HTTP server for language-agnostic access.

Starting the Server

#include "gigavector/gv_server.h"

GV_Database *db = gv_db_open("vectors.db", 128, GV_INDEX_TYPE_HNSW);

GV_ServerConfig config;
gv_server_config_init(&config);
config.port = 6969;
config.enable_cors = 1;

GV_Server *server = gv_server_create(db, &config);
gv_server_start(server);

// Server runs until stopped
gv_server_stop(server);
gv_server_destroy(server);
gv_db_close(db);

Python (with dashboard):

from gigavector import Database, IndexType, serve_with_dashboard

db = Database.open(None, dimension=128, index=IndexType.HNSW)
server = serve_with_dashboard(db, port=6969)
# Dashboard at http://localhost:6969/dashboard
# Press Ctrl+C to stop
try:
    server.wait()
finally:
    server.stop()
    db.close()

API Examples

Health check:

curl http://localhost:6969/health

Add vector:

curl -X POST http://localhost:6969/vectors \
  -H "Content-Type: application/json" \
  -d '{"vector": [0.1, 0.2, ...], "metadata": {"id": "1"}}'

Search:

curl -X POST http://localhost:6969/search \
  -H "Content-Type: application/json" \
  -d '{"vector": [0.1, 0.2, ...], "k": 10}'

Get stats:

curl http://localhost:6969/stats

Dashboard info:

curl http://localhost:6969/api/dashboard/info

Web Dashboard

GigaVector ships a built-in web dashboard with a dark theme. It is a pure-Python feature — no libmicrohttpd or other C HTTP library is needed.

Dashboard views:

Overview -- live metrics: vector count, dimension, index type, QPS, health status (auto-refreshes every 2 s)
Vectors -- browse vectors by ID, add new vectors with metadata, delete
Search -- k-NN search form with distance metric selector and results table
Visualize -- vector space visualization
Monitoring -- real-time performance monitoring and metrics
Import -- bulk import vectors from files
SQL -- SQL query interface for structured queries
Namespaces -- manage logical namespace partitions
Graph -- graph database browser and query interface
Console -- raw REST API console with method dropdown, URL, body, and syntax-highlighted JSON response
Backups -- create and manage database backups
Cluster -- cluster topology and node management

Use serve_with_dashboard() (shown above) or the DashboardServer class directly.

CLI Tools

GigaVector includes command-line tools for database management.

gvbackup - Create backups

# Basic backup
gvbackup mydb.db backup.gvb

# Compressed backup
gvbackup --compress mydb.db backup.gvb.gz

# Include WAL
gvbackup --include-wal mydb.db backup.gvb

gvrestore - Restore from backup

# Restore to new database
gvrestore backup.gvb restored.db

# Restore with verification
gvrestore --verify backup.gvb restored.db

gvinspect - Inspect database

# Show database info
gvinspect mydb.db

# Output:
# Database: mydb.db
# Version: 0.8.1
# Vectors: 1,234,567
# Dimension: 128
# Index: HNSW (M=16, ef=200)
# Size: 156.2 MB

Graph Database and Knowledge Graph

GigaVector includes a property graph database and a knowledge graph layer that integrates vector embeddings with graph structure.

Building a Graph

from gigavector import GraphDB

g = GraphDB()
alice = g.add_node("Person")
bob = g.add_node("Person")
g.set_node_prop(alice, "name", "Alice")
g.set_node_prop(bob, "name", "Bob")

g.add_edge(alice, bob, "KNOWS", weight=1.0)

# Traverse and analyze
visited = g.bfs(alice, max_depth=3)
path = g.shortest_path(alice, bob)
pr = g.pagerank(alice)

# Persist
g.save("social.gvgr")

Knowledge Graph with Embeddings

from gigavector import KnowledgeGraph, KGConfig

kg = KnowledgeGraph(KGConfig(embedding_dimension=128))

# Add entities with embeddings
e1 = kg.add_entity("Alice", "Person", embedding=[0.1] * 128)
e2 = kg.add_entity("Anthropic", "Company", embedding=[0.2] * 128)
kg.add_relation(e1, "works_at", e2)

# SPO triple queries (None = wildcard)
triples = kg.query_triples(predicate="works_at")

# Semantic search over entity embeddings
results = kg.search_similar([0.15] * 128, k=5)

# Entity resolution and link prediction
resolved = kg.resolve_entity("Alice Smith", "Person", embedding=[0.1] * 128)
predictions = kg.predict_links(e1, k=5)

// C API equivalent
GV_GraphDB *g = gv_graph_create(NULL);
uint64_t n1 = gv_graph_add_node(g, "Person");
uint64_t n2 = gv_graph_add_node(g, "Person");
gv_graph_add_edge(g, n1, n2, "KNOWS", 1.0f);

GV_GraphPath path;
gv_graph_shortest_path(g, n1, n2, &path);
gv_graph_free_path(&path);
gv_graph_destroy(g);

For best practices on resource management, error handling, and batch operations, see the Python Bindings Guide and C API Guide. For troubleshooting, see Troubleshooting.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Usage Guide

Getting Started

Installation

Quick Start Example

Choosing the Right Index

Python Usage

Creating a Database

Adding Vectors

Searching

Managing Data

Configuration

Resource Management

Monitoring and Statistics

C API Usage

Basic Operations

Error Handling

Memory Management

Configuration

HTTP REST Server

Starting the Server

API Examples

Web Dashboard

CLI Tools

gvbackup - Create backups

gvrestore - Restore from backup

gvinspect - Inspect database

Graph Database and Knowledge Graph

Building a Graph

Knowledge Graph with Embeddings

FilesExpand file tree

usage.md

Latest commit

History

usage.md

File metadata and controls

Usage Guide

Getting Started

Installation

Quick Start Example

Choosing the Right Index

Python Usage

Creating a Database

Adding Vectors

Searching

Managing Data

Configuration

Resource Management

Monitoring and Statistics

C API Usage

Basic Operations

Error Handling

Memory Management

Configuration

HTTP REST Server

Starting the Server

API Examples

Web Dashboard

CLI Tools

gvbackup - Create backups

gvrestore - Restore from backup

gvinspect - Inspect database

Graph Database and Knowledge Graph

Building a Graph

Knowledge Graph with Embeddings