From ca75c1863c80cc9934b59d4514ad231afd91d90e Mon Sep 17 00:00:00 2001 From: Maxime Grenu Date: Thu, 19 Feb 2026 10:31:12 +0100 Subject: [PATCH 1/6] feat(examples): add custom HTTP embedding example for LM Studio / Ollama --- examples/custom_http_embedding.py | 354 ++++++++++++++++++++++++++++++ 1 file changed, 354 insertions(+) create mode 100644 examples/custom_http_embedding.py diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py new file mode 100644 index 00000000..1bedfca3 --- /dev/null +++ b/examples/custom_http_embedding.py @@ -0,0 +1,354 @@ +""" +Custom HTTP Embedding Example for zvec +====================================== + +Demonstrates how to use any OpenAI-compatible embedding endpoint +(LM Studio, Ollama, vLLM, LocalAI, …) as an embedding source in zvec. + +Usage +----- +1. Start your local inference server: + + **LM Studio** (https://lmstudio.ai/): + - Open LM Studio → go to "Local Server" tab + - Load an embedding model (e.g. nomic-embed-text, all-minilm-l6-v2) + - Click "Start Server" (default: http://localhost:1234) + - Enable "Allow external connections" if accessing from another machine + + **Ollama** (https://ollama.com/): + $ ollama serve # starts on http://localhost:11434 + $ ollama pull nomic-embed-text # pull the model first + +2. Install zvec: + $ pip install zvec + +3. Run the example: + # LM Studio (default) + $ python examples/custom_http_embedding.py + + # Ollama + $ python examples/custom_http_embedding.py \\ + --base-url http://localhost:11434 \\ + --model nomic-embed-text + + # Remote / custom server + $ python examples/custom_http_embedding.py \\ + --base-url http://192.168.1.10:1234 \\ + --model text-embedding-nomic-embed-text-v1.5@f16 + +Notes +----- +- The embedding dimension is detected automatically on the first call. +- No API key is required for local servers; pass ``--api-key`` if yours needs one. +- The collection is stored under ``/tmp/zvec_http_embedding_example`` and is + destroyed at the end of the script. Remove the ``collection.destroy()`` call + at the bottom to keep the data across runs. +""" + +from __future__ import annotations + +import argparse +import json +import os +import shutil +import urllib.request +from functools import lru_cache +from typing import List, Optional + + +# --------------------------------------------------------------------------- +# HTTPEmbeddingFunction +# --------------------------------------------------------------------------- + +class HTTPEmbeddingFunction: + """Dense embedding function that calls an OpenAI-compatible /v1/embeddings + endpoint. + + This class satisfies the :class:`zvec.DenseEmbeddingFunction` protocol and + can therefore be used anywhere zvec accepts a custom embedding function. + + Parameters + ---------- + base_url: + Base URL of the OpenAI-compatible inference server. + Examples: + - ``http://localhost:1234`` (LM Studio default) + - ``http://localhost:11434`` (Ollama default) + model: + Name / identifier of the embedding model as expected by the server. + api_key: + Optional API key sent as ``Authorization: Bearer ``. + Leave as ``None`` for servers that do not require authentication. + timeout: + HTTP request timeout in seconds (default: 30). + """ + + ENDPOINT = "/v1/embeddings" + + def __init__( + self, + base_url: str = "http://localhost:1234", + model: str = "text-embedding-nomic-embed-text-v1.5@f16", + api_key: Optional[str] = None, + timeout: int = 30, + ) -> None: + self.base_url = base_url.rstrip("/") + self.model = model + self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "") + self.timeout = timeout + + # Detect dimension on first use (lazy) + self._dimension: Optional[int] = None + + # ------------------------------------------------------------------ + # DenseEmbeddingFunction protocol + # ------------------------------------------------------------------ + + @property + def dimension(self) -> int: + """Return the embedding dimension (detected lazily).""" + if self._dimension is None: + # Trigger a probe call to learn the dimension + self._dimension = len(self.embed("dimension probe")) + return self._dimension + + def __call__(self, text: str) -> List[float]: + return self.embed(text) + + @lru_cache(maxsize=256) + def embed(self, text: str) -> List[float]: + """Embed *text* and return a ``list[float]``. + + Results are cached (LRU, up to 256 entries) to avoid redundant + network calls when the same string is encountered more than once. + + Parameters + ---------- + text: + The input string to embed. Must be non-empty. + + Returns + ------- + list[float] + The dense embedding vector produced by the server. + + Raises + ------ + ValueError + If *text* is empty or the server returns an unexpected response. + RuntimeError + If the HTTP request fails. + """ + if not isinstance(text, str): + raise TypeError(f"Expected str, got {type(text).__name__}") + text = text.strip() + if not text: + raise ValueError("Input text must not be empty or whitespace only.") + + url = self.base_url + self.ENDPOINT + payload = json.dumps({"model": self.model, "input": text}).encode() + + headers = {"Content-Type": "application/json"} + if self.api_key: + headers["Authorization"] = f"Bearer {self.api_key}" + + req = urllib.request.Request(url, data=payload, headers=headers, method="POST") + try: + with urllib.request.urlopen(req, timeout=self.timeout) as resp: + body = json.loads(resp.read()) + except urllib.error.HTTPError as exc: + raise RuntimeError( + f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}" + ) from exc + except OSError as exc: + raise RuntimeError( + f"Could not reach embedding server at {url}: {exc}" + ) from exc + + try: + vector: List[float] = body["data"][0]["embedding"] + except (KeyError, IndexError) as exc: + raise ValueError( + f"Unexpected response format from embedding server: {body}" + ) from exc + + return vector + + +# --------------------------------------------------------------------------- +# Demo +# --------------------------------------------------------------------------- + +SAMPLE_DOCUMENTS = [ + { + "id": "doc_1", + "text": "LM Studio lets you run large language models locally on your computer.", + "topic": "local AI", + }, + { + "id": "doc_2", + "text": "Ollama is an open-source tool for running language models on-device.", + "topic": "local AI", + }, + { + "id": "doc_3", + "text": "zvec is a lightweight, in-process vector database built on Proxima.", + "topic": "vector database", + }, + { + "id": "doc_4", + "text": "HNSW is a graph-based algorithm for approximate nearest-neighbor search.", + "topic": "ANN algorithms", + }, + { + "id": "doc_5", + "text": "Cosine similarity measures the angle between two vectors, ignoring magnitude.", + "topic": "math", + }, +] + +QUERY = "How do I run an embedding model on my laptop?" + + +def run_demo( + base_url: str, + model: str, + api_key: Optional[str], + collection_path: str, +) -> None: + import zvec + from zvec import ( + CollectionSchema, + DataType, + Doc, + HnswIndexParam, + MetricType, + VectorQuery, + VectorSchema, + create_and_open, + ) + + # ------------------------------------------------------------------ # + # 1. Embedding function # + # ------------------------------------------------------------------ # + print(f"[1/4] Connecting to embedding server at {base_url} …") + emb = HTTPEmbeddingFunction(base_url=base_url, model=model, api_key=api_key) + + # Probe dimension + dim = emb.dimension + print(f" Model: {model!r} | Dimension: {dim}") + + # ------------------------------------------------------------------ # + # 2. Create collection with HNSW + cosine # + # ------------------------------------------------------------------ # + print("[2/4] Creating zvec collection (HNSW / cosine) …") + if os.path.exists(collection_path): + shutil.rmtree(collection_path) + + schema = CollectionSchema( + name="http_embedding_demo", + vectors=VectorSchema( + name="embedding", + data_type=DataType.VECTOR_FP32, + dimension=dim, + index_param=HnswIndexParam( + metric_type=MetricType.COSINE, + m=16, + ef_construction=200, + ), + ), + ) + collection = create_and_open(path=collection_path, schema=schema) + + # ------------------------------------------------------------------ # + # 3. Insert documents # + # ------------------------------------------------------------------ # + print(f"[3/4] Embedding and inserting {len(SAMPLE_DOCUMENTS)} documents …") + docs = [] + for item in SAMPLE_DOCUMENTS: + vector = emb.embed(item["text"]) + doc = Doc( + id=item["id"], + vectors={"embedding": vector}, + fields={ + "text": item["text"], + "topic": item["topic"], + }, + ) + docs.append(doc) + + collection.insert(docs) + collection.flush() + print(f" Inserted {collection.stats.total_doc_count} documents.") + + # ------------------------------------------------------------------ # + # 4. Search # + # ------------------------------------------------------------------ # + print(f"[4/4] Searching for: {QUERY!r}\n") + query_vector = emb.embed(QUERY) + + results = collection.query( + VectorQuery("embedding", vector=query_vector), + topk=3, + ) + + print("Top-3 results:") + print("-" * 60) + for rank, result in enumerate(results, start=1): + # Retrieve stored fields if available + doc_id = result.id + score = result.score + # Find original text for display + original = next((d for d in SAMPLE_DOCUMENTS if d["id"] == doc_id), {}) + print(f" #{rank} id={doc_id} score={score:.4f}") + print(f" {original.get('text', '(text not stored)')}") + print("-" * 60) + + # ------------------------------------------------------------------ # + # Cleanup # + # ------------------------------------------------------------------ # + collection.destroy() + print("\nCollection destroyed. Done!") + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + +def main() -> None: + parser = argparse.ArgumentParser( + description="zvec custom HTTP embedding demo (LM Studio / Ollama)", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--base-url", + default="http://localhost:1234", + help="Base URL of the OpenAI-compatible embedding server.", + ) + parser.add_argument( + "--model", + default="text-embedding-nomic-embed-text-v1.5@f16", + help="Embedding model name as expected by the server.", + ) + parser.add_argument( + "--api-key", + default=None, + help="Optional API key (leave blank for local servers).", + ) + parser.add_argument( + "--collection-path", + default="/tmp/zvec_http_embedding_example", + help="Filesystem path for the zvec collection.", + ) + args = parser.parse_args() + + run_demo( + base_url=args.base_url, + model=args.model, + api_key=args.api_key, + collection_path=args.collection_path, + ) + + +if __name__ == "__main__": + main() From 9a81b28e33fa195bcc5e56b8493e3fd7b8851838 Mon Sep 17 00:00:00 2001 From: Maxime Grenu Date: Thu, 26 Feb 2026 10:06:39 +0100 Subject: [PATCH 2/6] feat(extension): promote HTTPDenseEmbedding to first-class extension Move the HTTP embedding implementation from the example script into python/zvec/extension/ as HTTPDenseEmbedding, inheriting from DenseEmbeddingFunction. The example now imports from zvec.extension instead of defining the class inline. Signed-off-by: Maxime Signed-off-by: Maxime Grenu --- examples/custom_http_embedding.py | 126 +------------ python/zvec/extension/__init__.py | 2 + .../zvec/extension/http_embedding_function.py | 165 ++++++++++++++++++ 3 files changed, 170 insertions(+), 123 deletions(-) create mode 100644 python/zvec/extension/http_embedding_function.py diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py index 1bedfca3..0ae17dc2 100644 --- a/examples/custom_http_embedding.py +++ b/examples/custom_http_embedding.py @@ -48,131 +48,11 @@ from __future__ import annotations import argparse -import json import os import shutil -import urllib.request -from functools import lru_cache -from typing import List, Optional +from typing import Optional - -# --------------------------------------------------------------------------- -# HTTPEmbeddingFunction -# --------------------------------------------------------------------------- - -class HTTPEmbeddingFunction: - """Dense embedding function that calls an OpenAI-compatible /v1/embeddings - endpoint. - - This class satisfies the :class:`zvec.DenseEmbeddingFunction` protocol and - can therefore be used anywhere zvec accepts a custom embedding function. - - Parameters - ---------- - base_url: - Base URL of the OpenAI-compatible inference server. - Examples: - - ``http://localhost:1234`` (LM Studio default) - - ``http://localhost:11434`` (Ollama default) - model: - Name / identifier of the embedding model as expected by the server. - api_key: - Optional API key sent as ``Authorization: Bearer ``. - Leave as ``None`` for servers that do not require authentication. - timeout: - HTTP request timeout in seconds (default: 30). - """ - - ENDPOINT = "/v1/embeddings" - - def __init__( - self, - base_url: str = "http://localhost:1234", - model: str = "text-embedding-nomic-embed-text-v1.5@f16", - api_key: Optional[str] = None, - timeout: int = 30, - ) -> None: - self.base_url = base_url.rstrip("/") - self.model = model - self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "") - self.timeout = timeout - - # Detect dimension on first use (lazy) - self._dimension: Optional[int] = None - - # ------------------------------------------------------------------ - # DenseEmbeddingFunction protocol - # ------------------------------------------------------------------ - - @property - def dimension(self) -> int: - """Return the embedding dimension (detected lazily).""" - if self._dimension is None: - # Trigger a probe call to learn the dimension - self._dimension = len(self.embed("dimension probe")) - return self._dimension - - def __call__(self, text: str) -> List[float]: - return self.embed(text) - - @lru_cache(maxsize=256) - def embed(self, text: str) -> List[float]: - """Embed *text* and return a ``list[float]``. - - Results are cached (LRU, up to 256 entries) to avoid redundant - network calls when the same string is encountered more than once. - - Parameters - ---------- - text: - The input string to embed. Must be non-empty. - - Returns - ------- - list[float] - The dense embedding vector produced by the server. - - Raises - ------ - ValueError - If *text* is empty or the server returns an unexpected response. - RuntimeError - If the HTTP request fails. - """ - if not isinstance(text, str): - raise TypeError(f"Expected str, got {type(text).__name__}") - text = text.strip() - if not text: - raise ValueError("Input text must not be empty or whitespace only.") - - url = self.base_url + self.ENDPOINT - payload = json.dumps({"model": self.model, "input": text}).encode() - - headers = {"Content-Type": "application/json"} - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - - req = urllib.request.Request(url, data=payload, headers=headers, method="POST") - try: - with urllib.request.urlopen(req, timeout=self.timeout) as resp: - body = json.loads(resp.read()) - except urllib.error.HTTPError as exc: - raise RuntimeError( - f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}" - ) from exc - except OSError as exc: - raise RuntimeError( - f"Could not reach embedding server at {url}: {exc}" - ) from exc - - try: - vector: List[float] = body["data"][0]["embedding"] - except (KeyError, IndexError) as exc: - raise ValueError( - f"Unexpected response format from embedding server: {body}" - ) from exc - - return vector +from zvec.extension import HTTPDenseEmbedding # --------------------------------------------------------------------------- @@ -232,7 +112,7 @@ def run_demo( # 1. Embedding function # # ------------------------------------------------------------------ # print(f"[1/4] Connecting to embedding server at {base_url} …") - emb = HTTPEmbeddingFunction(base_url=base_url, model=model, api_key=api_key) + emb = HTTPDenseEmbedding(base_url=base_url, model=model, api_key=api_key) # Probe dimension dim = emb.dimension diff --git a/python/zvec/extension/__init__.py b/python/zvec/extension/__init__.py index cc9401f8..9ff94af2 100644 --- a/python/zvec/extension/__init__.py +++ b/python/zvec/extension/__init__.py @@ -15,6 +15,7 @@ from .bm25_embedding_function import BM25EmbeddingFunction from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction +from .http_embedding_function import HTTPDenseEmbedding from .jina_embedding_function import JinaDenseEmbedding from .jina_function import JinaFunctionBase from .multi_vector_reranker import RrfReRanker, WeightedReRanker @@ -37,6 +38,7 @@ "DefaultLocalReRanker", "DefaultLocalSparseEmbedding", "DenseEmbeddingFunction", + "HTTPDenseEmbedding", "JinaDenseEmbedding", "JinaFunctionBase", "OpenAIDenseEmbedding", diff --git a/python/zvec/extension/http_embedding_function.py b/python/zvec/extension/http_embedding_function.py new file mode 100644 index 00000000..7a80c0f9 --- /dev/null +++ b/python/zvec/extension/http_embedding_function.py @@ -0,0 +1,165 @@ +# Copyright 2025-present the zvec project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +import json +import os +import urllib.request +from functools import lru_cache +from typing import Optional + +from ..common.constants import TEXT, DenseVectorType +from .embedding_function import DenseEmbeddingFunction + + +class HTTPDenseEmbedding(DenseEmbeddingFunction[TEXT]): + """Dense text embedding function using any OpenAI-compatible HTTP endpoint. + + This class calls any server that implements the ``/v1/embeddings`` API + (LM Studio, Ollama, vLLM, LocalAI, etc.) using only the Python standard + library — no extra dependencies are required. + + The embedding dimension is detected automatically from the first server + response. + + Args: + base_url (str, optional): Base URL of the embedding server. + Defaults to ``"http://localhost:1234"`` (LM Studio). + Common values: + + - ``"http://localhost:1234"`` — LM Studio + - ``"http://localhost:11434"`` — Ollama + model (str, optional): Model identifier as expected by the server. + Defaults to ``"text-embedding-nomic-embed-text-v1.5@f16"``. + api_key (Optional[str], optional): Bearer token for authenticated + endpoints. Falls back to the ``OPENAI_API_KEY`` environment + variable. Leave as ``None`` for local servers that do not + require authentication. + timeout (int, optional): HTTP request timeout in seconds. + Defaults to 30. + + Attributes: + dimension (int): Embedding vector dimensionality (auto-detected). + + Raises: + TypeError: If ``embed()`` receives a non-string input. + ValueError: If input is empty/whitespace-only or the server returns + an unexpected response format. + RuntimeError: If the HTTP request fails or the server is unreachable. + + Examples: + >>> from zvec.extension import HTTPDenseEmbedding + >>> + >>> # LM Studio (default) + >>> emb = HTTPDenseEmbedding() + >>> vector = emb.embed("Hello, world!") + >>> len(vector) + 768 + >>> + >>> # Ollama + >>> emb = HTTPDenseEmbedding( + ... base_url="http://localhost:11434", + ... model="nomic-embed-text", + ... ) + >>> vector = emb.embed("Semantic search with local models") + + See Also: + - ``DenseEmbeddingFunction``: Protocol for dense embeddings. + - ``OpenAIDenseEmbedding``: Cloud embedding via the OpenAI API. + """ + + ENDPOINT = "/v1/embeddings" + + def __init__( + self, + base_url: str = "http://localhost:1234", + model: str = "text-embedding-nomic-embed-text-v1.5@f16", + api_key: Optional[str] = None, + timeout: int = 30, + ) -> None: + self._base_url = base_url.rstrip("/") + self._model = model + self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "") + self._timeout = timeout + self._dimension: Optional[int] = None + + @property + def dimension(self) -> int: + """int: Embedding vector dimensionality (auto-detected on first call).""" + if self._dimension is None: + self._dimension = len(self.embed("dimension probe")) + return self._dimension + + def __call__(self, input: TEXT) -> DenseVectorType: + """Make the embedding function callable.""" + return self.embed(input) + + @lru_cache(maxsize=256) + def embed(self, input: TEXT) -> DenseVectorType: + """Generate a dense embedding vector for the input text. + + Results are cached (LRU, up to 256 entries) so repeated strings + do not trigger extra HTTP requests. + + Args: + input (TEXT): Input text string to embed. Must be non-empty + after stripping whitespace. + + Returns: + DenseVectorType: A list of floats representing the embedding. + + Raises: + TypeError: If *input* is not a string. + ValueError: If *input* is empty/whitespace-only or the server + returns an unexpected response format. + RuntimeError: If the HTTP request fails. + """ + if not isinstance(input, TEXT): + raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}") + + input = input.strip() + if not input: + raise ValueError("Input text cannot be empty or whitespace only") + + url = self._base_url + self.ENDPOINT + payload = json.dumps({"model": self._model, "input": input}).encode() + + headers: dict[str, str] = {"Content-Type": "application/json"} + if self._api_key: + headers["Authorization"] = f"Bearer {self._api_key}" + + req = urllib.request.Request( + url, data=payload, headers=headers, method="POST" + ) + try: + with urllib.request.urlopen(req, timeout=self._timeout) as resp: + body = json.loads(resp.read()) + except urllib.error.HTTPError as exc: + raise RuntimeError( + f"Embedding server returned HTTP {exc.code}: " + f"{exc.read().decode()}" + ) from exc + except OSError as exc: + raise RuntimeError( + f"Could not reach embedding server at {url}: {exc}" + ) from exc + + try: + vector: list[float] = body["data"][0]["embedding"] + except (KeyError, IndexError) as exc: + raise ValueError( + f"Unexpected response format from embedding server: {body}" + ) from exc + + return vector From e5dee48a23e79fb9da8d88a344218d4381464894 Mon Sep 17 00:00:00 2001 From: Maxime Grenu Date: Thu, 26 Feb 2026 10:14:01 +0100 Subject: [PATCH 3/6] fix(examples): resolve ruff lint errors in HTTP embedding example Move zvec imports to top-level, add noqa for print statements, replace os.path.exists with pathlib, fix import sorting. Signed-off-by: Maxime Signed-off-by: Maxime Grenu --- examples/custom_http_embedding.py | 51 +++++++++++++++---------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py index 0ae17dc2..e41bc936 100644 --- a/examples/custom_http_embedding.py +++ b/examples/custom_http_embedding.py @@ -48,13 +48,22 @@ from __future__ import annotations import argparse -import os import shutil +from pathlib import Path from typing import Optional +from zvec import ( + CollectionSchema, + DataType, + Doc, + HnswIndexParam, + MetricType, + VectorQuery, + VectorSchema, + create_and_open, +) from zvec.extension import HTTPDenseEmbedding - # --------------------------------------------------------------------------- # Demo # --------------------------------------------------------------------------- @@ -96,33 +105,21 @@ def run_demo( api_key: Optional[str], collection_path: str, ) -> None: - import zvec - from zvec import ( - CollectionSchema, - DataType, - Doc, - HnswIndexParam, - MetricType, - VectorQuery, - VectorSchema, - create_and_open, - ) - # ------------------------------------------------------------------ # # 1. Embedding function # # ------------------------------------------------------------------ # - print(f"[1/4] Connecting to embedding server at {base_url} …") + print(f"[1/4] Connecting to embedding server at {base_url} …") # noqa: T201 emb = HTTPDenseEmbedding(base_url=base_url, model=model, api_key=api_key) # Probe dimension dim = emb.dimension - print(f" Model: {model!r} | Dimension: {dim}") + print(f" Model: {model!r} | Dimension: {dim}") # noqa: T201 # ------------------------------------------------------------------ # # 2. Create collection with HNSW + cosine # # ------------------------------------------------------------------ # - print("[2/4] Creating zvec collection (HNSW / cosine) …") - if os.path.exists(collection_path): + print("[2/4] Creating zvec collection (HNSW / cosine) …") # noqa: T201 + if Path(collection_path).exists(): shutil.rmtree(collection_path) schema = CollectionSchema( @@ -143,7 +140,7 @@ def run_demo( # ------------------------------------------------------------------ # # 3. Insert documents # # ------------------------------------------------------------------ # - print(f"[3/4] Embedding and inserting {len(SAMPLE_DOCUMENTS)} documents …") + print(f"[3/4] Embedding and inserting {len(SAMPLE_DOCUMENTS)} documents …") # noqa: T201 docs = [] for item in SAMPLE_DOCUMENTS: vector = emb.embed(item["text"]) @@ -159,12 +156,12 @@ def run_demo( collection.insert(docs) collection.flush() - print(f" Inserted {collection.stats.total_doc_count} documents.") + print(f" Inserted {collection.stats.total_doc_count} documents.") # noqa: T201 # ------------------------------------------------------------------ # # 4. Search # # ------------------------------------------------------------------ # - print(f"[4/4] Searching for: {QUERY!r}\n") + print(f"[4/4] Searching for: {QUERY!r}\n") # noqa: T201 query_vector = emb.embed(QUERY) results = collection.query( @@ -172,23 +169,23 @@ def run_demo( topk=3, ) - print("Top-3 results:") - print("-" * 60) + print("Top-3 results:") # noqa: T201 + print("-" * 60) # noqa: T201 for rank, result in enumerate(results, start=1): # Retrieve stored fields if available doc_id = result.id score = result.score # Find original text for display original = next((d for d in SAMPLE_DOCUMENTS if d["id"] == doc_id), {}) - print(f" #{rank} id={doc_id} score={score:.4f}") - print(f" {original.get('text', '(text not stored)')}") - print("-" * 60) + print(f" #{rank} id={doc_id} score={score:.4f}") # noqa: T201 + print(f" {original.get('text', '(text not stored)')}") # noqa: T201 + print("-" * 60) # noqa: T201 # ------------------------------------------------------------------ # # Cleanup # # ------------------------------------------------------------------ # collection.destroy() - print("\nCollection destroyed. Done!") + print("\nCollection destroyed. Done!") # noqa: T201 # --------------------------------------------------------------------------- From 400dacf55884978dbdbac886923aed31cf4e615f Mon Sep 17 00:00:00 2001 From: Maxime Grenu Date: Thu, 26 Feb 2026 10:16:49 +0100 Subject: [PATCH 4/6] style: apply ruff formatter Signed-off-by: Maxime Signed-off-by: Maxime Grenu --- examples/custom_http_embedding.py | 1 + python/zvec/extension/http_embedding_function.py | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py index e41bc936..8aa994be 100644 --- a/examples/custom_http_embedding.py +++ b/examples/custom_http_embedding.py @@ -192,6 +192,7 @@ def run_demo( # Entry point # --------------------------------------------------------------------------- + def main() -> None: parser = argparse.ArgumentParser( description="zvec custom HTTP embedding demo (LM Studio / Ollama)", diff --git a/python/zvec/extension/http_embedding_function.py b/python/zvec/extension/http_embedding_function.py index 7a80c0f9..3386c652 100644 --- a/python/zvec/extension/http_embedding_function.py +++ b/python/zvec/extension/http_embedding_function.py @@ -139,16 +139,13 @@ def embed(self, input: TEXT) -> DenseVectorType: if self._api_key: headers["Authorization"] = f"Bearer {self._api_key}" - req = urllib.request.Request( - url, data=payload, headers=headers, method="POST" - ) + req = urllib.request.Request(url, data=payload, headers=headers, method="POST") try: with urllib.request.urlopen(req, timeout=self._timeout) as resp: body = json.loads(resp.read()) except urllib.error.HTTPError as exc: raise RuntimeError( - f"Embedding server returned HTTP {exc.code}: " - f"{exc.read().decode()}" + f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}" ) from exc except OSError as exc: raise RuntimeError( From eb3960ee7abc6878d76ac45607c1c4facf4dbd71 Mon Sep 17 00:00:00 2001 From: Maxime Grenu Date: Thu, 26 Feb 2026 11:09:53 +0100 Subject: [PATCH 5/6] ci: retrigger CI (flaky macOS C++ test) The vector_column_indexer_test failure is a known flaky assertion in hnsw_streamer_entity.h, unrelated to Python-only changes in this PR. Signed-off-by: Maxime Signed-off-by: Maxime Grenu From 327d718c09b62d685bfe5886a70869f615edcb16 Mon Sep 17 00:00:00 2001 From: Maxime Grenu Date: Fri, 27 Feb 2026 10:24:03 +0100 Subject: [PATCH 6/6] chore: remove custom HTTP embedding example Per maintainer feedback, examples requiring an external LLM server belong in the zvec-web project rather than in this repository. Signed-off-by: Maxime Grenu --- examples/custom_http_embedding.py | 232 ------------------------------ 1 file changed, 232 deletions(-) delete mode 100644 examples/custom_http_embedding.py diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py deleted file mode 100644 index 8aa994be..00000000 --- a/examples/custom_http_embedding.py +++ /dev/null @@ -1,232 +0,0 @@ -""" -Custom HTTP Embedding Example for zvec -====================================== - -Demonstrates how to use any OpenAI-compatible embedding endpoint -(LM Studio, Ollama, vLLM, LocalAI, …) as an embedding source in zvec. - -Usage ------ -1. Start your local inference server: - - **LM Studio** (https://lmstudio.ai/): - - Open LM Studio → go to "Local Server" tab - - Load an embedding model (e.g. nomic-embed-text, all-minilm-l6-v2) - - Click "Start Server" (default: http://localhost:1234) - - Enable "Allow external connections" if accessing from another machine - - **Ollama** (https://ollama.com/): - $ ollama serve # starts on http://localhost:11434 - $ ollama pull nomic-embed-text # pull the model first - -2. Install zvec: - $ pip install zvec - -3. Run the example: - # LM Studio (default) - $ python examples/custom_http_embedding.py - - # Ollama - $ python examples/custom_http_embedding.py \\ - --base-url http://localhost:11434 \\ - --model nomic-embed-text - - # Remote / custom server - $ python examples/custom_http_embedding.py \\ - --base-url http://192.168.1.10:1234 \\ - --model text-embedding-nomic-embed-text-v1.5@f16 - -Notes ------ -- The embedding dimension is detected automatically on the first call. -- No API key is required for local servers; pass ``--api-key`` if yours needs one. -- The collection is stored under ``/tmp/zvec_http_embedding_example`` and is - destroyed at the end of the script. Remove the ``collection.destroy()`` call - at the bottom to keep the data across runs. -""" - -from __future__ import annotations - -import argparse -import shutil -from pathlib import Path -from typing import Optional - -from zvec import ( - CollectionSchema, - DataType, - Doc, - HnswIndexParam, - MetricType, - VectorQuery, - VectorSchema, - create_and_open, -) -from zvec.extension import HTTPDenseEmbedding - -# --------------------------------------------------------------------------- -# Demo -# --------------------------------------------------------------------------- - -SAMPLE_DOCUMENTS = [ - { - "id": "doc_1", - "text": "LM Studio lets you run large language models locally on your computer.", - "topic": "local AI", - }, - { - "id": "doc_2", - "text": "Ollama is an open-source tool for running language models on-device.", - "topic": "local AI", - }, - { - "id": "doc_3", - "text": "zvec is a lightweight, in-process vector database built on Proxima.", - "topic": "vector database", - }, - { - "id": "doc_4", - "text": "HNSW is a graph-based algorithm for approximate nearest-neighbor search.", - "topic": "ANN algorithms", - }, - { - "id": "doc_5", - "text": "Cosine similarity measures the angle between two vectors, ignoring magnitude.", - "topic": "math", - }, -] - -QUERY = "How do I run an embedding model on my laptop?" - - -def run_demo( - base_url: str, - model: str, - api_key: Optional[str], - collection_path: str, -) -> None: - # ------------------------------------------------------------------ # - # 1. Embedding function # - # ------------------------------------------------------------------ # - print(f"[1/4] Connecting to embedding server at {base_url} …") # noqa: T201 - emb = HTTPDenseEmbedding(base_url=base_url, model=model, api_key=api_key) - - # Probe dimension - dim = emb.dimension - print(f" Model: {model!r} | Dimension: {dim}") # noqa: T201 - - # ------------------------------------------------------------------ # - # 2. Create collection with HNSW + cosine # - # ------------------------------------------------------------------ # - print("[2/4] Creating zvec collection (HNSW / cosine) …") # noqa: T201 - if Path(collection_path).exists(): - shutil.rmtree(collection_path) - - schema = CollectionSchema( - name="http_embedding_demo", - vectors=VectorSchema( - name="embedding", - data_type=DataType.VECTOR_FP32, - dimension=dim, - index_param=HnswIndexParam( - metric_type=MetricType.COSINE, - m=16, - ef_construction=200, - ), - ), - ) - collection = create_and_open(path=collection_path, schema=schema) - - # ------------------------------------------------------------------ # - # 3. Insert documents # - # ------------------------------------------------------------------ # - print(f"[3/4] Embedding and inserting {len(SAMPLE_DOCUMENTS)} documents …") # noqa: T201 - docs = [] - for item in SAMPLE_DOCUMENTS: - vector = emb.embed(item["text"]) - doc = Doc( - id=item["id"], - vectors={"embedding": vector}, - fields={ - "text": item["text"], - "topic": item["topic"], - }, - ) - docs.append(doc) - - collection.insert(docs) - collection.flush() - print(f" Inserted {collection.stats.total_doc_count} documents.") # noqa: T201 - - # ------------------------------------------------------------------ # - # 4. Search # - # ------------------------------------------------------------------ # - print(f"[4/4] Searching for: {QUERY!r}\n") # noqa: T201 - query_vector = emb.embed(QUERY) - - results = collection.query( - VectorQuery("embedding", vector=query_vector), - topk=3, - ) - - print("Top-3 results:") # noqa: T201 - print("-" * 60) # noqa: T201 - for rank, result in enumerate(results, start=1): - # Retrieve stored fields if available - doc_id = result.id - score = result.score - # Find original text for display - original = next((d for d in SAMPLE_DOCUMENTS if d["id"] == doc_id), {}) - print(f" #{rank} id={doc_id} score={score:.4f}") # noqa: T201 - print(f" {original.get('text', '(text not stored)')}") # noqa: T201 - print("-" * 60) # noqa: T201 - - # ------------------------------------------------------------------ # - # Cleanup # - # ------------------------------------------------------------------ # - collection.destroy() - print("\nCollection destroyed. Done!") # noqa: T201 - - -# --------------------------------------------------------------------------- -# Entry point -# --------------------------------------------------------------------------- - - -def main() -> None: - parser = argparse.ArgumentParser( - description="zvec custom HTTP embedding demo (LM Studio / Ollama)", - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - ) - parser.add_argument( - "--base-url", - default="http://localhost:1234", - help="Base URL of the OpenAI-compatible embedding server.", - ) - parser.add_argument( - "--model", - default="text-embedding-nomic-embed-text-v1.5@f16", - help="Embedding model name as expected by the server.", - ) - parser.add_argument( - "--api-key", - default=None, - help="Optional API key (leave blank for local servers).", - ) - parser.add_argument( - "--collection-path", - default="/tmp/zvec_http_embedding_example", - help="Filesystem path for the zvec collection.", - ) - args = parser.parse_args() - - run_demo( - base_url=args.base_url, - model=args.model, - api_key=args.api_key, - collection_path=args.collection_path, - ) - - -if __name__ == "__main__": - main()