Python (from PyPI):
pip install gigavectorPython (from source):
cd python
pip install .C Library (from source):
# Using Make
make lib
# Using CMake
cmake -B build -DCMAKE_BUILD_TYPE=Release
cmake --build buildPython:
from gigavector import Database, DistanceType, IndexType
# Create a database
with Database.open("my_db.db", dimension=128, index=IndexType.HNSW) as db:
# Add a vector
db.add_vector([0.1] * 128, metadata={"id": "1", "category": "A"})
# Search
results = db.search([0.1] * 128, k=5, distance=DistanceType.EUCLIDEAN)
for hit in results:
print(f"Distance: {hit.distance}, Metadata: {hit.vector.metadata}")C:
#include "gigavector/gigavector.h"
// Create database
GV_Database *db = gv_db_open("my_db.db", 128, GV_INDEX_TYPE_HNSW);
// Add vector
float data[128];
for (int i = 0; i < 128; i++) data[i] = 0.1f;
gv_db_add_vector_with_metadata(db, data, 128, "id", "1");
// Search
float query[128];
for (int i = 0; i < 128; i++) query[i] = 0.1f;
GV_SearchResult results[5];
int found = gv_db_search(db, query, 5, results, GV_DISTANCE_EUCLIDEAN);
// Process results
for (int i = 0; i < found; i++) {
printf("Distance: %f\n", results[i].distance);
}
gv_db_close(db);GigaVector supports multiple index types. Choose based on your requirements:
| Index Type | Best For | Dataset Size | Dimensions | Search Speed | Memory |
|---|---|---|---|---|---|
| KD-Tree | Exact search, small datasets | < 1M | < 100 | Fast (small) | Low |
| HNSW | General purpose, large datasets | 1K - 1B+ | Any | Very Fast | Medium |
| IVFPQ | Very large datasets, memory constrained | 100K+ | 64+ | Fast | Very Low |
| Sparse | Sparse vectors, text embeddings | Any | High (sparse) | Fast | Low |
Quick Decision Guide:
- Small dataset (< 100K vectors) and need exact results? → KD-Tree
- Large dataset and need fast approximate search? → HNSW
- Very large dataset and memory is critical? → IVFPQ
- Sparse vectors (most dimensions are zero)? → Sparse Index
See the Performance Tuning Guide for detailed recommendations.
from gigavector import Database, IndexType
# In-memory database
db = Database.open(None, dimension=128, index=IndexType.HNSW)
# Persistent database
db = Database.open("data.db", dimension=128, index=IndexType.HNSW)
# Auto-select index type
db = Database.open_auto("data.db", dimension=128, expected_count=1000000)
# Memory-mapped read-only database
db = Database.open_mmap("data.db", dimension=128)# Simple vector
db.add_vector([0.1, 0.2, 0.3, ...])
# With metadata
db.add_vector(
[0.1, 0.2, 0.3, ...],
metadata={"id": "123", "category": "electronics", "price": "99.99"}
)
# Batch insertion
vectors = [[random.random() for _ in range(128)] for _ in range(1000)]
db.add_vectors(vectors)from gigavector import DistanceType
# Basic search
results = db.search([0.1] * 128, k=10, distance=DistanceType.EUCLIDEAN)
# Filtered search
results = db.search(
[0.1] * 128, k=10,
filter_metadata=("category", "electronics")
)
# Advanced filter expression
results = db.search_with_filter_expr(
[0.1] * 128, k=10,
filter_expr='category == "electronics" AND price >= "50"'
)
# Range search (find all within radius)
results = db.range_search(
[0.1] * 128, radius=0.5, max_results=100
)
# Batch search
queries = [[random.random() for _ in range(128)] for _ in range(10)]
all_results = db.search_batch(queries, k=5)# Update a vector
db.update_vector(0, [0.2, 0.3, 0.4, ...])
# Update metadata
db.update_metadata(0, {"price": "149.99", "updated": "true"})
# Delete a vector
db.delete_vector(0)
# Save database
db.save("backup.db")from gigavector import HNSWConfig, IVFPQConfig
# HNSW with custom config
hnsw_config = HNSWConfig(
M=32,
ef_construction=200,
ef_search=50,
use_binary_quant=True
)
db = Database.open("db.db", 128, IndexType.HNSW, hnsw_config=hnsw_config)
# IVFPQ with custom config
ivfpq_config = IVFPQConfig(
nlist=256,
m=16,
nprobe=16,
default_rerank=32
)
db = Database.open(None, 128, IndexType.IVFPQ, ivfpq_config=ivfpq_config)
db.train_ivfpq(training_data) # Must train before use# Context manager (recommended)
with Database.open("db.db", 128) as db:
db.add_vector([0.1] * 128)
# Automatically closed on exit
# Manual management
db = Database.open("db.db", 128)
try:
# Use database
pass
finally:
db.close()
# Resource limits
db.set_resource_limits(
max_memory_bytes=1024 * 1024 * 1024, # 1GB
max_vectors=1000000,
max_concurrent_operations=100
)# Basic statistics
stats = db.get_stats()
print(f"Total inserts: {stats.total_inserts}")
print(f"Total queries: {stats.total_queries}")
# Detailed statistics
detailed = db.get_detailed_stats()
print(f"QPS: {detailed['queries_per_second']}")
print(f"Memory: {detailed['memory']['total_bytes'] / 1024 / 1024:.2f} MB")
print(f"Recall: {detailed['recall']['avg_recall']:.2%}")
# Health check — returns 0 (healthy), -1 (degraded), or positive (unhealthy)
health = db.health_check()#include "gigavector/gigavector.h"
#include <stdio.h>
#include <stdlib.h>
int main() {
// Create database
GV_Database *db = gv_db_open("example.db", 128, GV_INDEX_TYPE_HNSW);
if (!db) {
fprintf(stderr, "Failed to create database\n");
return 1;
}
// Add vector
float data[128];
for (int i = 0; i < 128; i++) {
data[i] = (float)rand() / RAND_MAX;
}
int rc = gv_db_add_vector_with_metadata(
db, data, 128, "id", "1"
);
if (rc != 0) {
fprintf(stderr, "Failed to add vector\n");
gv_db_close(db);
return 1;
}
// Search
float query[128];
for (int i = 0; i < 128; i++) {
query[i] = (float)rand() / RAND_MAX;
}
GV_SearchResult results[10];
int found = gv_db_search(
db, query, 10, results, GV_DISTANCE_EUCLIDEAN
);
if (found > 0) {
printf("Found %d results:\n", found);
for (int i = 0; i < found; i++) {
printf(" Distance: %f\n", results[i].distance);
}
}
// Save and close
gv_db_save(db, "example.db");
gv_db_close(db);
return 0;
}GV_Database *db = gv_db_open("db.db", 128, GV_INDEX_TYPE_HNSW);
if (!db) {
// Handle error - check errno or log message
perror("gv_db_open");
return 1;
}
int rc = gv_db_add_vector(db, data, 128);
if (rc != 0) {
// Handle error
fprintf(stderr, "Failed to add vector\n");
gv_db_close(db);
return 1;
}// Vectors are managed by the database
// You don't need to free vectors returned from search results
// The database owns all vector data
// Always close the database when done
gv_db_close(db);// HNSW configuration
GV_HNSWConfig hnsw_config = {
.M = 32,
.efConstruction = 200,
.efSearch = 50,
.use_binary_quant = 1,
.quant_rerank = 20
};
GV_Database *db = gv_db_open_with_hnsw_config(
"db.db", 128, GV_INDEX_TYPE_HNSW, &hnsw_config
);
// IVFPQ configuration
GV_IVFPQConfig ivfpq_config = {
.nlist = 256,
.m = 16,
.nbits = 8,
.nprobe = 16,
.default_rerank = 32
};
GV_Database *db_ivfpq = gv_db_open_with_ivfpq_config(
"ivfpq.db", 128, GV_INDEX_TYPE_IVFPQ, &ivfpq_config
);
// Train IVFPQ
float training_data[1000 * 128];
// ... populate training data ...
gv_db_ivfpq_train(db_ivfpq, training_data, 1000, 128);Start GigaVector as an HTTP server for language-agnostic access.
C:
#include "gigavector/gv_server.h"
GV_Database *db = gv_db_open("vectors.db", 128, GV_INDEX_TYPE_HNSW);
GV_ServerConfig config;
gv_server_config_init(&config);
config.port = 6969;
config.enable_cors = 1;
GV_Server *server = gv_server_create(db, &config);
gv_server_start(server);
// Server runs until stopped
gv_server_stop(server);
gv_server_destroy(server);
gv_db_close(db);Python (with dashboard):
from gigavector import Database, IndexType, serve_with_dashboard
db = Database.open(None, dimension=128, index=IndexType.HNSW)
server = serve_with_dashboard(db, port=6969)
# Dashboard at http://localhost:6969/dashboard
# Press Ctrl+C to stop
try:
server.wait()
finally:
server.stop()
db.close()Health check:
curl http://localhost:6969/healthAdd vector:
curl -X POST http://localhost:6969/vectors \
-H "Content-Type: application/json" \
-d '{"vector": [0.1, 0.2, ...], "metadata": {"id": "1"}}'Search:
curl -X POST http://localhost:6969/search \
-H "Content-Type: application/json" \
-d '{"vector": [0.1, 0.2, ...], "k": 10}'Get stats:
curl http://localhost:6969/statsDashboard info:
curl http://localhost:6969/api/dashboard/infoGigaVector ships a built-in web dashboard with a dark theme. It is a pure-Python feature — no libmicrohttpd or other C HTTP library is needed.
Dashboard views:
- Overview -- live metrics: vector count, dimension, index type, QPS, health status (auto-refreshes every 2 s)
- Vectors -- browse vectors by ID, add new vectors with metadata, delete
- Search -- k-NN search form with distance metric selector and results table
- Visualize -- vector space visualization
- Monitoring -- real-time performance monitoring and metrics
- Import -- bulk import vectors from files
- SQL -- SQL query interface for structured queries
- Namespaces -- manage logical namespace partitions
- Graph -- graph database browser and query interface
- Console -- raw REST API console with method dropdown, URL, body, and syntax-highlighted JSON response
- Backups -- create and manage database backups
- Cluster -- cluster topology and node management
Use serve_with_dashboard() (shown above) or the DashboardServer class directly.
GigaVector includes command-line tools for database management.
# Basic backup
gvbackup mydb.db backup.gvb
# Compressed backup
gvbackup --compress mydb.db backup.gvb.gz
# Include WAL
gvbackup --include-wal mydb.db backup.gvb# Restore to new database
gvrestore backup.gvb restored.db
# Restore with verification
gvrestore --verify backup.gvb restored.db# Show database info
gvinspect mydb.db
# Output:
# Database: mydb.db
# Version: 0.8.1
# Vectors: 1,234,567
# Dimension: 128
# Index: HNSW (M=16, ef=200)
# Size: 156.2 MBGigaVector includes a property graph database and a knowledge graph layer that integrates vector embeddings with graph structure.
from gigavector import GraphDB
g = GraphDB()
alice = g.add_node("Person")
bob = g.add_node("Person")
g.set_node_prop(alice, "name", "Alice")
g.set_node_prop(bob, "name", "Bob")
g.add_edge(alice, bob, "KNOWS", weight=1.0)
# Traverse and analyze
visited = g.bfs(alice, max_depth=3)
path = g.shortest_path(alice, bob)
pr = g.pagerank(alice)
# Persist
g.save("social.gvgr")from gigavector import KnowledgeGraph, KGConfig
kg = KnowledgeGraph(KGConfig(embedding_dimension=128))
# Add entities with embeddings
e1 = kg.add_entity("Alice", "Person", embedding=[0.1] * 128)
e2 = kg.add_entity("Anthropic", "Company", embedding=[0.2] * 128)
kg.add_relation(e1, "works_at", e2)
# SPO triple queries (None = wildcard)
triples = kg.query_triples(predicate="works_at")
# Semantic search over entity embeddings
results = kg.search_similar([0.15] * 128, k=5)
# Entity resolution and link prediction
resolved = kg.resolve_entity("Alice Smith", "Person", embedding=[0.1] * 128)
predictions = kg.predict_links(e1, k=5)// C API equivalent
GV_GraphDB *g = gv_graph_create(NULL);
uint64_t n1 = gv_graph_add_node(g, "Person");
uint64_t n2 = gv_graph_add_node(g, "Person");
gv_graph_add_edge(g, n1, n2, "KNOWS", 1.0f);
GV_GraphPath path;
gv_graph_shortest_path(g, n1, n2, &path);
gv_graph_free_path(&path);
gv_graph_destroy(g);For best practices on resource management, error handling, and batch operations, see the Python Bindings Guide and C API Guide. For troubleshooting, see Troubleshooting.