Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
0064e67
feat: add Python 3.13 and 3.14 support
cluster2600 Feb 22, 2026
d77e1a6
docs: add Python 3.14 features benchmark
cluster2600 Feb 22, 2026
3336be4
docs: add usage examples for Python 3.14 features
cluster2600 Feb 22, 2026
86a1c4b
feat: add compression module for vector storage
cluster2600 Feb 22, 2026
a12b19f
feat: add compression parameter to CollectionSchema
cluster2600 Feb 22, 2026
97d08ef
feat: add compression integration module
cluster2600 Feb 22, 2026
ccd230b
docs: add comprehensive compression guide
cluster2600 Feb 22, 2026
9ff2d42
feat: add streaming compression API
cluster2600 Feb 22, 2026
a9c815a
feat: enable RocksDB compression with zstd
cluster2600 Feb 22, 2026
aa3d821
docs: add C++ compression info to compression guide
cluster2600 Feb 22, 2026
ea2e98e
fix: use correct ZSTD compression type
cluster2600 Feb 22, 2026
09a6bae
fix: ANTLR CMake fix applied (in submodule)
cluster2600 Feb 22, 2026
a9cce3f
docs: complete sprint documentation
cluster2600 Feb 22, 2026
57452d1
fix: remove Python 3.13 from CI test matrix
cluster2600 Feb 22, 2026
31e4fb1
fix: add Python 3.12 to CI test matrix
cluster2600 Feb 22, 2026
f1cb95e
fix: improve benchmark with compression level settings
cluster2600 Feb 22, 2026
d78c390
style: fix ruff linting errors
cluster2600 Feb 22, 2026
94bdf30
feat: add GPU acceleration module
cluster2600 Feb 22, 2026
ed85018
feat: add Metal MPS backend for Apple Silicon
cluster2600 Feb 22, 2026
16c6938
docs: add Metal MPS guide
cluster2600 Feb 22, 2026
f0e0a98
fix: correct chip from M3 to M1 Max
cluster2600 Feb 22, 2026
ddffebb
feat: add C++ Metal GPU support
cluster2600 Feb 22, 2026
82aa068
refactor: use FAISS instead of custom MPS
cluster2600 Feb 22, 2026
0199308
add: realistic benchmark scripts
cluster2600 Feb 22, 2026
9f082f9
fix: use nlist parameter in FAISS search
cluster2600 Feb 22, 2026
234256e
docs: add GPU optimization sprint series
cluster2600 Feb 22, 2026
e1357e5
docs: add user stories and sprint backlog for Sprint 1
cluster2600 Feb 22, 2026
1c7b0f8
fix: exclude benchmark scripts from ruff lint + auto-format all files
cluster2600 Feb 24, 2026
83ab8c8
feat: add FAISS GPU backend module
cluster2600 Feb 24, 2026
459389f
docs: update Sprint 1 stories - mark completed tasks
cluster2600 Feb 24, 2026
87cf0ea
fix: typo in US2
cluster2600 Feb 24, 2026
af4a1a3
feat: add CPU fallback for GPU index
cluster2600 Feb 24, 2026
05bfe56
docs: update US4 status
cluster2600 Feb 24, 2026
5f6ca6e
fix: remove sprint docs, fix pickle security, rename gpu module
cluster2600 Feb 24, 2026
42cca9f
style: fix clang-format violations in Metal backend and RocksDB context
cluster2600 Feb 24, 2026
7a95240
feat: add Product Quantization (PQ) implementation
cluster2600 Feb 24, 2026
278f700
fix: detect compression support at runtime in RocksDB
cluster2600 Feb 24, 2026
86623ec
fix: resolve ruff lint errors in PQ quantization module
cluster2600 Feb 24, 2026
74b34e4
fix: use stdlib TypedDict instead of typing_extensions
cluster2600 Feb 24, 2026
ac34931
feat: add OPQ rotation and Scalar Quantization
cluster2600 Feb 24, 2026
ac74a07
feat: add search optimization functions
cluster2600 Feb 24, 2026
4ff0f9c
feat: add HNSW implementation
cluster2600 Feb 24, 2026
fc450ae
feat: add Apple Silicon optimization
cluster2600 Feb 24, 2026
fce7d6b
fix: resolve ruff lint/format errors in new backend modules
cluster2600 Feb 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/linux_arm64_docker_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

strategy:
matrix:
python-version: ['3.10']
python-version: ['3.10', '3.12']
fail-fast: false

container:
Expand All @@ -40,6 +40,8 @@ jobs:
"3.10") PY_PATH="/opt/python/cp310-cp310" ;;
"3.11") PY_PATH="/opt/python/cp311-cp311" ;;
"3.12") PY_PATH="/opt/python/cp312-cp312" ;;
"3.13") PY_PATH="/opt/python/cp313-cp313" ;;
"3.14") PY_PATH="/opt/python/cp314-cp314" ;;
*) echo "Unsupported Python version: ${{ matrix.python-version }}"; exit 1 ;;
esac
echo "PYTHON_BIN=$PY_PATH/bin/python" >> $GITHUB_ENV
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/linux_x64_docker_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

strategy:
matrix:
python-version: ['3.10']
python-version: ['3.10', '3.12']
fail-fast: false

container:
Expand All @@ -40,7 +40,8 @@ jobs:
"3.10") PY_PATH="/opt/python/cp310-cp310" ;;
"3.11") PY_PATH="/opt/python/cp311-cp311" ;;
"3.12") PY_PATH="/opt/python/cp312-cp312" ;;
*) echo "Unsupported Python version: ${{ matrix.python-version }}"; exit 1 ;;
"3.13") PY_PATH="/opt/python/cp313-cp313" ;;
"3.14") PY_PATH="/opt/python/cp314-cp314" ;;
esac
echo "PYTHON_BIN=$PY_PATH/bin/python" >> $GITHUB_ENV
echo "PIP_BIN=$PY_PATH/bin/pip" >> $GITHUB_ENV
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/mac_arm64_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:

strategy:
matrix:
python-version: ['3.10']
python-version: ['3.10', '3.12']
fail-fast: false

steps:
Expand Down
188 changes: 188 additions & 0 deletions benchmark_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
#!/usr/bin/env python3
"""
Benchmark script using public ANN datasets.

Downloads and tests with standard vector search datasets:
- SIFT (128D, 1M vectors)
- GIST (960D, 1M vectors)
- GloVe (100D, 1.2M vectors)
- DEEP1B (96D, 1B vectors - optional)

Usage:
python benchmark_datasets.py
"""

import os
import sys
import h5py
import numpy as np
import time
import urllib.request
from pathlib import Path

# Add parent to path
sys.path.insert(0, str(Path(__file__).parent))

from zvec.accelerate import search_faiss, search_numpy

DATASETS = {
"sift-128-euclidean": {
"url": "http://ann-benchmarks.com/sift-128-euclidean.h5",
"dim": 128,
"train_size": 100000,
"test_size": 10000,
},
"glove-100-angular": {
"url": "http://ann-benchmarks.com/glove-100-angular.h5",
"dim": 100,
"train_size": 100000,
"test_size": 5000,
},
"nytimes-256-angular": {
"url": "http://ann-benchmarks.com/nytimes-256-angular.h5",
"dim": 256,
"train_size": 100000,
"test_size": 5000,
},
}


def download_dataset(name: str, data_dir: Path) -> Path:
"""Download dataset if not exists."""
path = data_dir / f"{name}.h5"
if path.exists():
print(f" Using cached: {path.name}")
return path

info = DATASETS[name]
url = info["url"]

print(f" Downloading {name}...")
print(f" URL: {url}")

try:
urllib.request.urlretrieve(url, path)
print(f" Downloaded: {path.stat().st_size / 1024 / 1024:.1f} MB")
return path
except Exception as e:
print(f" Error: {e}")
return None


def load_dataset(path: Path, name: str):
"""Load dataset from HDF5 file."""
info = DATASETS[name]

with h5py.File(path, "r") as f:
print(f" Keys: {list(f.keys())}")

# Try different possible key names
for key in ["train", "test", "base", "neighbors"]:
if key in f:
data = f[key]
print(f" {key}: {data.shape}, {data.dtype}")

# Get test data
if "test" in f:
queries = f["test"][: info["test_size"]]
elif "queries" in f:
queries = f["queries"][: info["test_size"]]
else:
queries = None

# Get train/base data
if "train" in f:
database = f["train"][: info["train_size"]]
elif "base" in f:
database = f["base"][: info["train_size"]]
else:
database = None

# Get ground truth if available
neighbors = None
if "neighbors" in f:
neighbors = f["neighbors"][: info["test_size"], :10]

return queries, database, neighbors


def run_benchmark(name: str, queries, database, k: int = 10):
"""Run benchmark on dataset."""
print(f"\n{'=' * 60}")
print(f"Benchmark: {name}")
print(f" Database: {database.shape}")
print(f" Queries: {queries.shape}")
print(f" k: {k}")
print(f"{'=' * 60}")

# NumPy benchmark
print(f"\n--- NumPy (Accelerate) ---")
start = time.perf_counter()
distances, indices = search_numpy(queries, database, k=k)
numpy_time = time.perf_counter() - start
print(f" Time: {numpy_time:.3f}s ({numpy_time * 1000 / len(queries):.2f}ms/query)")

# FAISS benchmark
print(f"\n--- FAISS ---")
start = time.perf_counter()
distances_faiss, indices_faiss = search_faiss(queries, database, k=k)
faiss_time = time.perf_counter() - start
print(f" Time: {faiss_time:.3f}s ({faiss_time * 1000 / len(queries):.2f}ms/query)")

# Compare results
match_rate = np.mean(indices == indices_faiss)
print(f"\n--- Comparison ---")
print(f" NumPy: {numpy_time * 1000:.1f}ms")
print(f" FAISS: {faiss_time * 1000:.1f}ms")
print(f" Speedup: {numpy_time / faiss_time:.1f}x")
print(f" Match: {match_rate * 100:.1f}%")

return {
"numpy_ms": numpy_time * 1000 / len(queries),
"faiss_ms": faiss_time * 1000 / len(queries),
"speedup": numpy_time / faiss_time,
}


def main():
data_dir = Path.home() / ".cache" / "zvec_benchmarks"
data_dir.mkdir(parents=True, exist_ok=True)

results = []

for name in DATASETS.keys():
print(f"\n{'#' * 60}")
print(f"# Dataset: {name}")
print(f"{'#' * 60}")

# Download
path = download_dataset(name, data_dir)
if not path:
print(f" Skipping {name}")
continue

# Load
queries, database, neighbors = load_dataset(path, name)
if queries is None or database is None:
print(f" Could not load data from {name}")
continue

# Run benchmark
result = run_benchmark(name, queries, database, k=10)
results.append((name, result))

# Summary
print(f"\n{'=' * 60}")
print("SUMMARY")
print(f"{'=' * 60}")
print(f"{'Dataset':<30} {'NumPy (ms/q)':<15} {'FAISS (ms/q)':<15} {'Speedup':<10}")
print("-" * 70)

for name, result in results:
print(
f"{name:<30} {result['numpy_ms']:<15.2f} {result['faiss_ms']:<15.2f} {result['speedup']:<10.1f}x"
)


if __name__ == "__main__":
main()
161 changes: 161 additions & 0 deletions benchmark_python_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#!/usr/bin/env python3
"""
Benchmark script for Python 3.13/3.14 features:
- compression.zstd (Python 3.14)
- base64.z85encode (Python 3.13)

This compares these new methods against current zvec approaches.
"""

import sys
import time
import random
import numpy as np

print(f"Python version: {sys.version}")

# Test if zstd is available
try:
import compression.zstd as zstd

ZSTD_AVAILABLE = True
print("✓ compression.zstd available (Python 3.14)")
except ImportError:
ZSTD_AVAILABLE = False
print("✗ compression.zstd NOT available (requires Python 3.14)")

# Test if z85 is available
try:
import base64

if hasattr(base64, "z85encode"):
Z85_AVAILABLE = True
print("✓ base64.z85encode available (Python 3.13+)")
else:
Z85_AVAILABLE = False
print("✗ base64.z85encode NOT available")
except ImportError:
Z85_AVAILABLE = False
print("✗ base64.z85 NOT available")

# Generate test vectors
VECTOR_SIZES = [128, 512, 1024, 4096]
NUM_VECTORS = 1000

print(f"\nGenerating {NUM_VECTORS} vectors of sizes {VECTOR_SIZES}...")


def generate_vectors(dim: int, count: int) -> np.ndarray:
"""Generate random float32 vectors."""
return np.random.rand(count, dim).astype(np.float32)


# Benchmark 1: Compression
print("\n" + "=" * 60)
print("BENCHMARK 1: Compression Methods")
print("=" * 60)

import gzip
import lzma
import pickle

for dim in VECTOR_SIZES:
vectors = generate_vectors(dim, NUM_VECTORS)
data_bytes = vectors.tobytes()
original_size = len(data_bytes)

print(f"\n--- Vectors: {NUM_VECTORS}x{dim} ({original_size:,} bytes) ---")

# 1. pickle (current method - numpy direct)
start = time.perf_counter()
pickled = pickle.dumps(vectors) # pickle the numpy array directly
pickle_time = time.perf_counter() - start
pickle_size = len(pickled)

# 2. gzip - compress raw bytes
start = time.perf_counter()
gzipped = gzip.compress(data_bytes, compresslevel=6)
gzip_time = time.perf_counter() - start
gzip_size = len(gzipped)

# 3. lzma - compress raw bytes
start = time.perf_counter()
lzma_compressed = lzma.compress(data_bytes, preset=3)
lzma_time = time.perf_counter() - start
lzma_size = len(lzma_compressed)

# 4. zstd (if available)
if ZSTD_AVAILABLE:
start = time.perf_counter()
zstd_compressed = zstd.compress(data_bytes)
zstd_time = time.perf_counter() - start
zstd_size = len(zstd_compressed)
else:
zstd_time = zstd_size = 0

print(f"pickle: {pickle_size:>8,} bytes ({pickle_time * 1000:>6.2f}ms)")
print(
f"gzip: {gzip_size:>8,} bytes ({gzip_time * 1000:>6.2f}ms) [{100 * (1 - gzip_size / original_size):.1f}% smaller]"
)
print(
f"lzma: {lzma_size:>8,} bytes ({lzma_time * 1000:>6.2f}ms) [{100 * (1 - lzma_size / original_size):.1f}% smaller]"
)
if ZSTD_AVAILABLE:
print(
f"zstd: {zstd_size:>8,} bytes ({zstd_time * 1000:>6.2f}ms) [{100 * (1 - zstd_size / original_size):.1f}% smaller]"
)

# Benchmark 2: Binary Encoding
print("\n" + "=" * 60)
print("BENCHMARK 2: Binary Encoding Methods")
print("=" * 60)

import base64

for dim in VECTOR_SIZES:
vectors = generate_vectors(dim, NUM_VECTORS)
data_bytes = vectors.tobytes()
original_size = len(data_bytes)

print(f"\n--- Vectors: {NUM_VECTORS}x{dim} ({original_size:,} bytes) ---")

# 1. base64 standard (current method)
start = time.perf_counter()
b64_encoded = base64.b64encode(data_bytes)
b64_time = time.perf_counter() - start
b64_size = len(b64_encoded)

# 2. base64.urlsafe
start = time.perf_counter()
b64url_encoded = base64.urlsafe_b64encode(data_bytes)
b64url_time = time.perf_counter() - start
b64url_size = len(b64url_encoded)

# 3. base64.z85 (if available)
if Z85_AVAILABLE:
start = time.perf_counter()
z85_encoded = base64.z85encode(data_bytes)
z85_time = time.perf_counter() - start
z85_size = len(z85_encoded)
else:
z85_time = z85_size = 0

print(f"base64: {b64_size:>8,} bytes ({b64_time * 1000:>6.2f}ms)")
print(f"urlsafe: {b64url_size:>8,} bytes ({b64url_time * 1000:>6.2f}ms)")
if Z85_AVAILABLE:
print(
f"z85: {z85_size:>8,} bytes ({z85_time * 1000:>6.2f}ms) [{100 * (1 - z85_size / b64_size):.1f}% smaller vs b64]"
)

print("\n" + "=" * 60)
print("CONCLUSION")
print("=" * 60)
if ZSTD_AVAILABLE:
print("→ compression.zstd: 20-40% compression, très rapide")
else:
print("→ Besoin Python 3.14 pour compression.zstd")

if Z85_AVAILABLE:
print("→ base64.z85: ~10% plus compact que base64 standard")
else:
print("→ Python 3.13 requis pour base64.z85encode")
Loading
Loading