From ca75c1863c80cc9934b59d4514ad231afd91d90e Mon Sep 17 00:00:00 2001
From: Maxime Grenu <maxime.grenu@gmail.com>
Date: Thu, 19 Feb 2026 10:31:12 +0100
Subject: [PATCH 1/6] feat(examples): add custom HTTP embedding example for LM
 Studio / Ollama

---
 examples/custom_http_embedding.py | 354 ++++++++++++++++++++++++++++++
 1 file changed, 354 insertions(+)
 create mode 100644 examples/custom_http_embedding.py
diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py
new file mode 100644
index 00000000..1bedfca3
--- /dev/null
+++ b/examples/custom_http_embedding.py
@@ -0,0 +1,354 @@
+"""
+Custom HTTP Embedding Example for zvec
+======================================
+
+Demonstrates how to use any OpenAI-compatible embedding endpoint
+(LM Studio, Ollama, vLLM, LocalAI, …) as an embedding source in zvec.
+
+Usage
+-----
+1. Start your local inference server:
+
+   **LM Studio** (https://lmstudio.ai/):
+       - Open LM Studio → go to "Local Server" tab
+       - Load an embedding model (e.g. nomic-embed-text, all-minilm-l6-v2)
+       - Click "Start Server" (default: http://localhost:1234)
+       - Enable "Allow external connections" if accessing from another machine
+
+   **Ollama** (https://ollama.com/):
+       $ ollama serve                      # starts on http://localhost:11434
+       $ ollama pull nomic-embed-text      # pull the model first
+
+2. Install zvec:
+       $ pip install zvec
+
+3. Run the example:
+       # LM Studio (default)
+       $ python examples/custom_http_embedding.py
+
+       # Ollama
+       $ python examples/custom_http_embedding.py \\
+             --base-url http://localhost:11434 \\
+             --model nomic-embed-text
+
+       # Remote / custom server
+       $ python examples/custom_http_embedding.py \\
+             --base-url http://192.168.1.10:1234 \\
+             --model text-embedding-nomic-embed-text-v1.5@f16
+
+Notes
+-----
+- The embedding dimension is detected automatically on the first call.
+- No API key is required for local servers; pass ``--api-key`` if yours needs one.
+- The collection is stored under ``/tmp/zvec_http_embedding_example`` and is
+  destroyed at the end of the script.  Remove the ``collection.destroy()`` call
+  at the bottom to keep the data across runs.
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+import shutil
+import urllib.request
+from functools import lru_cache
+from typing import List, Optional
+
+
+# ---------------------------------------------------------------------------
+# HTTPEmbeddingFunction
+# ---------------------------------------------------------------------------
+
+class HTTPEmbeddingFunction:
+    """Dense embedding function that calls an OpenAI-compatible /v1/embeddings
+    endpoint.
+
+    This class satisfies the :class:`zvec.DenseEmbeddingFunction` protocol and
+    can therefore be used anywhere zvec accepts a custom embedding function.
+
+    Parameters
+    ----------
+    base_url:
+        Base URL of the OpenAI-compatible inference server.
+        Examples:
+        - ``http://localhost:1234``   (LM Studio default)
+        - ``http://localhost:11434``  (Ollama default)
+    model:
+        Name / identifier of the embedding model as expected by the server.
+    api_key:
+        Optional API key sent as ``Authorization: Bearer <key>``.
+        Leave as ``None`` for servers that do not require authentication.
+    timeout:
+        HTTP request timeout in seconds (default: 30).
+    """
+
+    ENDPOINT = "/v1/embeddings"
+
+    def __init__(
+        self,
+        base_url: str = "http://localhost:1234",
+        model: str = "text-embedding-nomic-embed-text-v1.5@f16",
+        api_key: Optional[str] = None,
+        timeout: int = 30,
+    ) -> None:
+        self.base_url = base_url.rstrip("/")
+        self.model = model
+        self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
+        self.timeout = timeout
+
+        # Detect dimension on first use (lazy)
+        self._dimension: Optional[int] = None
+
+    # ------------------------------------------------------------------
+    # DenseEmbeddingFunction protocol
+    # ------------------------------------------------------------------
+
+    @property
+    def dimension(self) -> int:
+        """Return the embedding dimension (detected lazily)."""
+        if self._dimension is None:
+            # Trigger a probe call to learn the dimension
+            self._dimension = len(self.embed("dimension probe"))
+        return self._dimension
+
+    def __call__(self, text: str) -> List[float]:
+        return self.embed(text)
+
+    @lru_cache(maxsize=256)
+    def embed(self, text: str) -> List[float]:
+        """Embed *text* and return a ``list[float]``.
+
+        Results are cached (LRU, up to 256 entries) to avoid redundant
+        network calls when the same string is encountered more than once.
+
+        Parameters
+        ----------
+        text:
+            The input string to embed.  Must be non-empty.
+
+        Returns
+        -------
+        list[float]
+            The dense embedding vector produced by the server.
+
+        Raises
+        ------
+        ValueError
+            If *text* is empty or the server returns an unexpected response.
+        RuntimeError
+            If the HTTP request fails.
+        """
+        if not isinstance(text, str):
+            raise TypeError(f"Expected str, got {type(text).__name__}")
+        text = text.strip()
+        if not text:
+            raise ValueError("Input text must not be empty or whitespace only.")
+
+        url = self.base_url + self.ENDPOINT
+        payload = json.dumps({"model": self.model, "input": text}).encode()
+
+        headers = {"Content-Type": "application/json"}
+        if self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+
+        req = urllib.request.Request(url, data=payload, headers=headers, method="POST")
+        try:
+            with urllib.request.urlopen(req, timeout=self.timeout) as resp:
+                body = json.loads(resp.read())
+        except urllib.error.HTTPError as exc:
+            raise RuntimeError(
+                f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}"
+            ) from exc
+        except OSError as exc:
+            raise RuntimeError(
+                f"Could not reach embedding server at {url}: {exc}"
+            ) from exc
+
+        try:
+            vector: List[float] = body["data"][0]["embedding"]
+        except (KeyError, IndexError) as exc:
+            raise ValueError(
+                f"Unexpected response format from embedding server: {body}"
+            ) from exc
+
+        return vector
+
+
+# ---------------------------------------------------------------------------
+# Demo
+# ---------------------------------------------------------------------------
+
+SAMPLE_DOCUMENTS = [
+    {
+        "id": "doc_1",
+        "text": "LM Studio lets you run large language models locally on your computer.",
+        "topic": "local AI",
+    },
+    {
+        "id": "doc_2",
+        "text": "Ollama is an open-source tool for running language models on-device.",
+        "topic": "local AI",
+    },
+    {
+        "id": "doc_3",
+        "text": "zvec is a lightweight, in-process vector database built on Proxima.",
+        "topic": "vector database",
+    },
+    {
+        "id": "doc_4",
+        "text": "HNSW is a graph-based algorithm for approximate nearest-neighbor search.",
+        "topic": "ANN algorithms",
+    },
+    {
+        "id": "doc_5",
+        "text": "Cosine similarity measures the angle between two vectors, ignoring magnitude.",
+        "topic": "math",
+    },
+]
+
+QUERY = "How do I run an embedding model on my laptop?"
+
+
+def run_demo(
+    base_url: str,
+    model: str,
+    api_key: Optional[str],
+    collection_path: str,
+) -> None:
+    import zvec
+    from zvec import (
+        CollectionSchema,
+        DataType,
+        Doc,
+        HnswIndexParam,
+        MetricType,
+        VectorQuery,
+        VectorSchema,
+        create_and_open,
+    )
+
+    # ------------------------------------------------------------------ #
+    # 1.  Embedding function                                               #
+    # ------------------------------------------------------------------ #
+    print(f"[1/4] Connecting to embedding server at {base_url} …")
+    emb = HTTPEmbeddingFunction(base_url=base_url, model=model, api_key=api_key)
+
+    # Probe dimension
+    dim = emb.dimension
+    print(f"      Model: {model!r}  |  Dimension: {dim}")
+
+    # ------------------------------------------------------------------ #
+    # 2.  Create collection with HNSW + cosine                            #
+    # ------------------------------------------------------------------ #
+    print("[2/4] Creating zvec collection (HNSW / cosine) …")
+    if os.path.exists(collection_path):
+        shutil.rmtree(collection_path)
+
+    schema = CollectionSchema(
+        name="http_embedding_demo",
+        vectors=VectorSchema(
+            name="embedding",
+            data_type=DataType.VECTOR_FP32,
+            dimension=dim,
+            index_param=HnswIndexParam(
+                metric_type=MetricType.COSINE,
+                m=16,
+                ef_construction=200,
+            ),
+        ),
+    )
+    collection = create_and_open(path=collection_path, schema=schema)
+
+    # ------------------------------------------------------------------ #
+    # 3.  Insert documents                                                 #
+    # ------------------------------------------------------------------ #
+    print(f"[3/4] Embedding and inserting {len(SAMPLE_DOCUMENTS)} documents …")
+    docs = []
+    for item in SAMPLE_DOCUMENTS:
+        vector = emb.embed(item["text"])
+        doc = Doc(
+            id=item["id"],
+            vectors={"embedding": vector},
+            fields={
+                "text": item["text"],
+                "topic": item["topic"],
+            },
+        )
+        docs.append(doc)
+
+    collection.insert(docs)
+    collection.flush()
+    print(f"      Inserted {collection.stats.total_doc_count} documents.")
+
+    # ------------------------------------------------------------------ #
+    # 4.  Search                                                           #
+    # ------------------------------------------------------------------ #
+    print(f"[4/4] Searching for: {QUERY!r}\n")
+    query_vector = emb.embed(QUERY)
+
+    results = collection.query(
+        VectorQuery("embedding", vector=query_vector),
+        topk=3,
+    )
+
+    print("Top-3 results:")
+    print("-" * 60)
+    for rank, result in enumerate(results, start=1):
+        # Retrieve stored fields if available
+        doc_id = result.id
+        score = result.score
+        # Find original text for display
+        original = next((d for d in SAMPLE_DOCUMENTS if d["id"] == doc_id), {})
+        print(f"  #{rank}  id={doc_id}  score={score:.4f}")
+        print(f"       {original.get('text', '(text not stored)')}")
+    print("-" * 60)
+
+    # ------------------------------------------------------------------ #
+    # Cleanup                                                              #
+    # ------------------------------------------------------------------ #
+    collection.destroy()
+    print("\nCollection destroyed.  Done!")
+
+
+# ---------------------------------------------------------------------------
+# Entry point
+# ---------------------------------------------------------------------------
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="zvec custom HTTP embedding demo (LM Studio / Ollama)",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument(
+        "--base-url",
+        default="http://localhost:1234",
+        help="Base URL of the OpenAI-compatible embedding server.",
+    )
+    parser.add_argument(
+        "--model",
+        default="text-embedding-nomic-embed-text-v1.5@f16",
+        help="Embedding model name as expected by the server.",
+    )
+    parser.add_argument(
+        "--api-key",
+        default=None,
+        help="Optional API key (leave blank for local servers).",
+    )
+    parser.add_argument(
+        "--collection-path",
+        default="/tmp/zvec_http_embedding_example",
+        help="Filesystem path for the zvec collection.",
+    )
+    args = parser.parse_args()
+
+    run_demo(
+        base_url=args.base_url,
+        model=args.model,
+        api_key=args.api_key,
+        collection_path=args.collection_path,
+    )
+
+
+if __name__ == "__main__":
+    main()

From 9a81b28e33fa195bcc5e56b8493e3fd7b8851838 Mon Sep 17 00:00:00 2001
From: Maxime Grenu <maxime.grenu@gmail.com>
Date: Thu, 26 Feb 2026 10:06:39 +0100
Subject: [PATCH 2/6] feat(extension): promote HTTPDenseEmbedding to
 first-class extension

Move the HTTP embedding implementation from the example script into
python/zvec/extension/ as HTTPDenseEmbedding, inheriting from
DenseEmbeddingFunction. The example now imports from zvec.extension
instead of defining the class inline.

Signed-off-by: Maxime <maxime@cluster2600.com>
Signed-off-by: Maxime Grenu <maxime.grenu@gmail.com>
---
 examples/custom_http_embedding.py             | 126 +------------
 python/zvec/extension/__init__.py             |   2 +
 .../zvec/extension/http_embedding_function.py | 165 ++++++++++++++++++
 3 files changed, 170 insertions(+), 123 deletions(-)
 create mode 100644 python/zvec/extension/http_embedding_function.py

diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py
index 1bedfca3..0ae17dc2 100644
--- a/examples/custom_http_embedding.py
+++ b/examples/custom_http_embedding.py
@@ -48,131 +48,11 @@
 from __future__ import annotations
 
 import argparse
-import json
 import os
 import shutil
-import urllib.request
-from functools import lru_cache
-from typing import List, Optional
+from typing import Optional
 
-
-# ---------------------------------------------------------------------------
-# HTTPEmbeddingFunction
-# ---------------------------------------------------------------------------
-
-class HTTPEmbeddingFunction:
-    """Dense embedding function that calls an OpenAI-compatible /v1/embeddings
-    endpoint.
-
-    This class satisfies the :class:`zvec.DenseEmbeddingFunction` protocol and
-    can therefore be used anywhere zvec accepts a custom embedding function.
-
-    Parameters
-    ----------
-    base_url:
-        Base URL of the OpenAI-compatible inference server.
-        Examples:
-        - ``http://localhost:1234``   (LM Studio default)
-        - ``http://localhost:11434``  (Ollama default)
-    model:
-        Name / identifier of the embedding model as expected by the server.
-    api_key:
-        Optional API key sent as ``Authorization: Bearer <key>``.
-        Leave as ``None`` for servers that do not require authentication.
-    timeout:
-        HTTP request timeout in seconds (default: 30).
-    """
-
-    ENDPOINT = "/v1/embeddings"
-
-    def __init__(
-        self,
-        base_url: str = "http://localhost:1234",
-        model: str = "text-embedding-nomic-embed-text-v1.5@f16",
-        api_key: Optional[str] = None,
-        timeout: int = 30,
-    ) -> None:
-        self.base_url = base_url.rstrip("/")
-        self.model = model
-        self.api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
-        self.timeout = timeout
-
-        # Detect dimension on first use (lazy)
-        self._dimension: Optional[int] = None
-
-    # ------------------------------------------------------------------
-    # DenseEmbeddingFunction protocol
-    # ------------------------------------------------------------------
-
-    @property
-    def dimension(self) -> int:
-        """Return the embedding dimension (detected lazily)."""
-        if self._dimension is None:
-            # Trigger a probe call to learn the dimension
-            self._dimension = len(self.embed("dimension probe"))
-        return self._dimension
-
-    def __call__(self, text: str) -> List[float]:
-        return self.embed(text)
-
-    @lru_cache(maxsize=256)
-    def embed(self, text: str) -> List[float]:
-        """Embed *text* and return a ``list[float]``.
-
-        Results are cached (LRU, up to 256 entries) to avoid redundant
-        network calls when the same string is encountered more than once.
-
-        Parameters
-        ----------
-        text:
-            The input string to embed.  Must be non-empty.
-
-        Returns
-        -------
-        list[float]
-            The dense embedding vector produced by the server.
-
-        Raises
-        ------
-        ValueError
-            If *text* is empty or the server returns an unexpected response.
-        RuntimeError
-            If the HTTP request fails.
-        """
-        if not isinstance(text, str):
-            raise TypeError(f"Expected str, got {type(text).__name__}")
-        text = text.strip()
-        if not text:
-            raise ValueError("Input text must not be empty or whitespace only.")
-
-        url = self.base_url + self.ENDPOINT
-        payload = json.dumps({"model": self.model, "input": text}).encode()
-
-        headers = {"Content-Type": "application/json"}
-        if self.api_key:
-            headers["Authorization"] = f"Bearer {self.api_key}"
-
-        req = urllib.request.Request(url, data=payload, headers=headers, method="POST")
-        try:
-            with urllib.request.urlopen(req, timeout=self.timeout) as resp:
-                body = json.loads(resp.read())
-        except urllib.error.HTTPError as exc:
-            raise RuntimeError(
-                f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}"
-            ) from exc
-        except OSError as exc:
-            raise RuntimeError(
-                f"Could not reach embedding server at {url}: {exc}"
-            ) from exc
-
-        try:
-            vector: List[float] = body["data"][0]["embedding"]
-        except (KeyError, IndexError) as exc:
-            raise ValueError(
-                f"Unexpected response format from embedding server: {body}"
-            ) from exc
-
-        return vector
+from zvec.extension import HTTPDenseEmbedding
 
 
 # ---------------------------------------------------------------------------
@@ -232,7 +112,7 @@ def run_demo(
     # 1.  Embedding function                                               #
     # ------------------------------------------------------------------ #
     print(f"[1/4] Connecting to embedding server at {base_url} …")
-    emb = HTTPEmbeddingFunction(base_url=base_url, model=model, api_key=api_key)
+    emb = HTTPDenseEmbedding(base_url=base_url, model=model, api_key=api_key)
 
     # Probe dimension
     dim = emb.dimension
diff --git a/python/zvec/extension/__init__.py b/python/zvec/extension/__init__.py
index cc9401f8..9ff94af2 100644
--- a/python/zvec/extension/__init__.py
+++ b/python/zvec/extension/__init__.py
@@ -15,6 +15,7 @@
 
 from .bm25_embedding_function import BM25EmbeddingFunction
 from .embedding_function import DenseEmbeddingFunction, SparseEmbeddingFunction
+from .http_embedding_function import HTTPDenseEmbedding
 from .jina_embedding_function import JinaDenseEmbedding
 from .jina_function import JinaFunctionBase
 from .multi_vector_reranker import RrfReRanker, WeightedReRanker
@@ -37,6 +38,7 @@
     "DefaultLocalReRanker",
     "DefaultLocalSparseEmbedding",
     "DenseEmbeddingFunction",
+    "HTTPDenseEmbedding",
     "JinaDenseEmbedding",
     "JinaFunctionBase",
     "OpenAIDenseEmbedding",
diff --git a/python/zvec/extension/http_embedding_function.py b/python/zvec/extension/http_embedding_function.py
new file mode 100644
index 00000000..7a80c0f9
--- /dev/null
+++ b/python/zvec/extension/http_embedding_function.py
@@ -0,0 +1,165 @@
+# Copyright 2025-present the zvec project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import json
+import os
+import urllib.request
+from functools import lru_cache
+from typing import Optional
+
+from ..common.constants import TEXT, DenseVectorType
+from .embedding_function import DenseEmbeddingFunction
+
+
+class HTTPDenseEmbedding(DenseEmbeddingFunction[TEXT]):
+    """Dense text embedding function using any OpenAI-compatible HTTP endpoint.
+
+    This class calls any server that implements the ``/v1/embeddings`` API
+    (LM Studio, Ollama, vLLM, LocalAI, etc.) using only the Python standard
+    library — no extra dependencies are required.
+
+    The embedding dimension is detected automatically from the first server
+    response.
+
+    Args:
+        base_url (str, optional): Base URL of the embedding server.
+            Defaults to ``"http://localhost:1234"`` (LM Studio).
+            Common values:
+
+            - ``"http://localhost:1234"``  — LM Studio
+            - ``"http://localhost:11434"`` — Ollama
+        model (str, optional): Model identifier as expected by the server.
+            Defaults to ``"text-embedding-nomic-embed-text-v1.5@f16"``.
+        api_key (Optional[str], optional): Bearer token for authenticated
+            endpoints.  Falls back to the ``OPENAI_API_KEY`` environment
+            variable.  Leave as ``None`` for local servers that do not
+            require authentication.
+        timeout (int, optional): HTTP request timeout in seconds.
+            Defaults to 30.
+
+    Attributes:
+        dimension (int): Embedding vector dimensionality (auto-detected).
+
+    Raises:
+        TypeError: If ``embed()`` receives a non-string input.
+        ValueError: If input is empty/whitespace-only or the server returns
+            an unexpected response format.
+        RuntimeError: If the HTTP request fails or the server is unreachable.
+
+    Examples:
+        >>> from zvec.extension import HTTPDenseEmbedding
+        >>>
+        >>> # LM Studio (default)
+        >>> emb = HTTPDenseEmbedding()
+        >>> vector = emb.embed("Hello, world!")
+        >>> len(vector)
+        768
+        >>>
+        >>> # Ollama
+        >>> emb = HTTPDenseEmbedding(
+        ...     base_url="http://localhost:11434",
+        ...     model="nomic-embed-text",
+        ... )
+        >>> vector = emb.embed("Semantic search with local models")
+
+    See Also:
+        - ``DenseEmbeddingFunction``: Protocol for dense embeddings.
+        - ``OpenAIDenseEmbedding``: Cloud embedding via the OpenAI API.
+    """
+
+    ENDPOINT = "/v1/embeddings"
+
+    def __init__(
+        self,
+        base_url: str = "http://localhost:1234",
+        model: str = "text-embedding-nomic-embed-text-v1.5@f16",
+        api_key: Optional[str] = None,
+        timeout: int = 30,
+    ) -> None:
+        self._base_url = base_url.rstrip("/")
+        self._model = model
+        self._api_key = api_key or os.environ.get("OPENAI_API_KEY", "")
+        self._timeout = timeout
+        self._dimension: Optional[int] = None
+
+    @property
+    def dimension(self) -> int:
+        """int: Embedding vector dimensionality (auto-detected on first call)."""
+        if self._dimension is None:
+            self._dimension = len(self.embed("dimension probe"))
+        return self._dimension
+
+    def __call__(self, input: TEXT) -> DenseVectorType:
+        """Make the embedding function callable."""
+        return self.embed(input)
+
+    @lru_cache(maxsize=256)
+    def embed(self, input: TEXT) -> DenseVectorType:
+        """Generate a dense embedding vector for the input text.
+
+        Results are cached (LRU, up to 256 entries) so repeated strings
+        do not trigger extra HTTP requests.
+
+        Args:
+            input (TEXT): Input text string to embed.  Must be non-empty
+                after stripping whitespace.
+
+        Returns:
+            DenseVectorType: A list of floats representing the embedding.
+
+        Raises:
+            TypeError: If *input* is not a string.
+            ValueError: If *input* is empty/whitespace-only or the server
+                returns an unexpected response format.
+            RuntimeError: If the HTTP request fails.
+        """
+        if not isinstance(input, TEXT):
+            raise TypeError(f"Expected 'input' to be str, got {type(input).__name__}")
+
+        input = input.strip()
+        if not input:
+            raise ValueError("Input text cannot be empty or whitespace only")
+
+        url = self._base_url + self.ENDPOINT
+        payload = json.dumps({"model": self._model, "input": input}).encode()
+
+        headers: dict[str, str] = {"Content-Type": "application/json"}
+        if self._api_key:
+            headers["Authorization"] = f"Bearer {self._api_key}"
+
+        req = urllib.request.Request(
+            url, data=payload, headers=headers, method="POST"
+        )
+        try:
+            with urllib.request.urlopen(req, timeout=self._timeout) as resp:
+                body = json.loads(resp.read())
+        except urllib.error.HTTPError as exc:
+            raise RuntimeError(
+                f"Embedding server returned HTTP {exc.code}: "
+                f"{exc.read().decode()}"
+            ) from exc
+        except OSError as exc:
+            raise RuntimeError(
+                f"Could not reach embedding server at {url}: {exc}"
+            ) from exc
+
+        try:
+            vector: list[float] = body["data"][0]["embedding"]
+        except (KeyError, IndexError) as exc:
+            raise ValueError(
+                f"Unexpected response format from embedding server: {body}"
+            ) from exc
+
+        return vector

From e5dee48a23e79fb9da8d88a344218d4381464894 Mon Sep 17 00:00:00 2001
From: Maxime Grenu <maxime.grenu@gmail.com>
Date: Thu, 26 Feb 2026 10:14:01 +0100
Subject: [PATCH 3/6] fix(examples): resolve ruff lint errors in HTTP embedding
 example

Move zvec imports to top-level, add noqa for print statements,
replace os.path.exists with pathlib, fix import sorting.

Signed-off-by: Maxime <maxime@cluster2600.com>
Signed-off-by: Maxime Grenu <maxime.grenu@gmail.com>
---
 examples/custom_http_embedding.py | 51 +++++++++++++++----------------
 1 file changed, 24 insertions(+), 27 deletions(-)

diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py
index 0ae17dc2..e41bc936 100644
--- a/examples/custom_http_embedding.py
+++ b/examples/custom_http_embedding.py
@@ -48,13 +48,22 @@
 from __future__ import annotations
 
 import argparse
-import os
 import shutil
+from pathlib import Path
 from typing import Optional
 
+from zvec import (
+    CollectionSchema,
+    DataType,
+    Doc,
+    HnswIndexParam,
+    MetricType,
+    VectorQuery,
+    VectorSchema,
+    create_and_open,
+)
 from zvec.extension import HTTPDenseEmbedding
 
-
 # ---------------------------------------------------------------------------
 # Demo
 # ---------------------------------------------------------------------------
@@ -96,33 +105,21 @@ def run_demo(
     api_key: Optional[str],
     collection_path: str,
 ) -> None:
-    import zvec
-    from zvec import (
-        CollectionSchema,
-        DataType,
-        Doc,
-        HnswIndexParam,
-        MetricType,
-        VectorQuery,
-        VectorSchema,
-        create_and_open,
-    )
-
     # ------------------------------------------------------------------ #
     # 1.  Embedding function                                               #
     # ------------------------------------------------------------------ #
-    print(f"[1/4] Connecting to embedding server at {base_url} …")
+    print(f"[1/4] Connecting to embedding server at {base_url} …")  # noqa: T201
     emb = HTTPDenseEmbedding(base_url=base_url, model=model, api_key=api_key)
 
     # Probe dimension
     dim = emb.dimension
-    print(f"      Model: {model!r}  |  Dimension: {dim}")
+    print(f"      Model: {model!r}  |  Dimension: {dim}")  # noqa: T201
 
     # ------------------------------------------------------------------ #
     # 2.  Create collection with HNSW + cosine                            #
     # ------------------------------------------------------------------ #
-    print("[2/4] Creating zvec collection (HNSW / cosine) …")
-    if os.path.exists(collection_path):
+    print("[2/4] Creating zvec collection (HNSW / cosine) …")  # noqa: T201
+    if Path(collection_path).exists():
         shutil.rmtree(collection_path)
 
     schema = CollectionSchema(
@@ -143,7 +140,7 @@ def run_demo(
     # ------------------------------------------------------------------ #
     # 3.  Insert documents                                                 #
     # ------------------------------------------------------------------ #
-    print(f"[3/4] Embedding and inserting {len(SAMPLE_DOCUMENTS)} documents …")
+    print(f"[3/4] Embedding and inserting {len(SAMPLE_DOCUMENTS)} documents …")  # noqa: T201
     docs = []
     for item in SAMPLE_DOCUMENTS:
         vector = emb.embed(item["text"])
@@ -159,12 +156,12 @@ def run_demo(
 
     collection.insert(docs)
     collection.flush()
-    print(f"      Inserted {collection.stats.total_doc_count} documents.")
+    print(f"      Inserted {collection.stats.total_doc_count} documents.")  # noqa: T201
 
     # ------------------------------------------------------------------ #
     # 4.  Search                                                           #
     # ------------------------------------------------------------------ #
-    print(f"[4/4] Searching for: {QUERY!r}\n")
+    print(f"[4/4] Searching for: {QUERY!r}\n")  # noqa: T201
     query_vector = emb.embed(QUERY)
 
     results = collection.query(
@@ -172,23 +169,23 @@ def run_demo(
         topk=3,
     )
 
-    print("Top-3 results:")
-    print("-" * 60)
+    print("Top-3 results:")  # noqa: T201
+    print("-" * 60)  # noqa: T201
     for rank, result in enumerate(results, start=1):
         # Retrieve stored fields if available
         doc_id = result.id
         score = result.score
         # Find original text for display
         original = next((d for d in SAMPLE_DOCUMENTS if d["id"] == doc_id), {})
-        print(f"  #{rank}  id={doc_id}  score={score:.4f}")
-        print(f"       {original.get('text', '(text not stored)')}")
-    print("-" * 60)
+        print(f"  #{rank}  id={doc_id}  score={score:.4f}")  # noqa: T201
+        print(f"       {original.get('text', '(text not stored)')}")  # noqa: T201
+    print("-" * 60)  # noqa: T201
 
     # ------------------------------------------------------------------ #
     # Cleanup                                                              #
     # ------------------------------------------------------------------ #
     collection.destroy()
-    print("\nCollection destroyed.  Done!")
+    print("\nCollection destroyed.  Done!")  # noqa: T201
 
 
 # ---------------------------------------------------------------------------

From 400dacf55884978dbdbac886923aed31cf4e615f Mon Sep 17 00:00:00 2001
From: Maxime Grenu <maxime.grenu@gmail.com>
Date: Thu, 26 Feb 2026 10:16:49 +0100
Subject: [PATCH 4/6] style: apply ruff formatter

Signed-off-by: Maxime <maxime@cluster2600.com>
Signed-off-by: Maxime Grenu <maxime.grenu@gmail.com>
---
 examples/custom_http_embedding.py                | 1 +
 python/zvec/extension/http_embedding_function.py | 7 ++-----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py
index e41bc936..8aa994be 100644
--- a/examples/custom_http_embedding.py
+++ b/examples/custom_http_embedding.py
@@ -192,6 +192,7 @@ def run_demo(
 # Entry point
 # ---------------------------------------------------------------------------
 
+
 def main() -> None:
     parser = argparse.ArgumentParser(
         description="zvec custom HTTP embedding demo (LM Studio / Ollama)",
diff --git a/python/zvec/extension/http_embedding_function.py b/python/zvec/extension/http_embedding_function.py
index 7a80c0f9..3386c652 100644
--- a/python/zvec/extension/http_embedding_function.py
+++ b/python/zvec/extension/http_embedding_function.py
@@ -139,16 +139,13 @@ def embed(self, input: TEXT) -> DenseVectorType:
         if self._api_key:
             headers["Authorization"] = f"Bearer {self._api_key}"
 
-        req = urllib.request.Request(
-            url, data=payload, headers=headers, method="POST"
-        )
+        req = urllib.request.Request(url, data=payload, headers=headers, method="POST")
         try:
             with urllib.request.urlopen(req, timeout=self._timeout) as resp:
                 body = json.loads(resp.read())
         except urllib.error.HTTPError as exc:
             raise RuntimeError(
-                f"Embedding server returned HTTP {exc.code}: "
-                f"{exc.read().decode()}"
+                f"Embedding server returned HTTP {exc.code}: {exc.read().decode()}"
             ) from exc
         except OSError as exc:
             raise RuntimeError(

From eb3960ee7abc6878d76ac45607c1c4facf4dbd71 Mon Sep 17 00:00:00 2001
From: Maxime Grenu <maxime.grenu@gmail.com>
Date: Thu, 26 Feb 2026 11:09:53 +0100
Subject: [PATCH 5/6] ci: retrigger CI (flaky macOS C++ test)

The vector_column_indexer_test failure is a known flaky assertion in
hnsw_streamer_entity.h, unrelated to Python-only changes in this PR.

Signed-off-by: Maxime <maxime@cluster2600.com>
Signed-off-by: Maxime Grenu <maxime.grenu@gmail.com>

From 327d718c09b62d685bfe5886a70869f615edcb16 Mon Sep 17 00:00:00 2001
From: Maxime Grenu <maxime.grenu@gmail.com>
Date: Fri, 27 Feb 2026 10:24:03 +0100
Subject: [PATCH 6/6] chore: remove custom HTTP embedding example

Per maintainer feedback, examples requiring an external LLM server
belong in the zvec-web project rather than in this repository.

Signed-off-by: Maxime Grenu <maxime.grenu@gmail.com>
---
 examples/custom_http_embedding.py | 232 ------------------------------
 1 file changed, 232 deletions(-)
 delete mode 100644 examples/custom_http_embedding.py

diff --git a/examples/custom_http_embedding.py b/examples/custom_http_embedding.py
deleted file mode 100644
index 8aa994be..00000000
--- a/examples/custom_http_embedding.py
+++ /dev/null
@@ -1,232 +0,0 @@
-"""
-Custom HTTP Embedding Example for zvec
-======================================
-
-Demonstrates how to use any OpenAI-compatible embedding endpoint
-(LM Studio, Ollama, vLLM, LocalAI, …) as an embedding source in zvec.
-
-Usage
------
-1. Start your local inference server:
-
-   **LM Studio** (https://lmstudio.ai/):
-       - Open LM Studio → go to "Local Server" tab
-       - Load an embedding model (e.g. nomic-embed-text, all-minilm-l6-v2)
-       - Click "Start Server" (default: http://localhost:1234)
-       - Enable "Allow external connections" if accessing from another machine
-
-   **Ollama** (https://ollama.com/):
-       $ ollama serve                      # starts on http://localhost:11434
-       $ ollama pull nomic-embed-text      # pull the model first
-
-2. Install zvec:
-       $ pip install zvec
-
-3. Run the example:
-       # LM Studio (default)
-       $ python examples/custom_http_embedding.py
-
-       # Ollama
-       $ python examples/custom_http_embedding.py \\
-             --base-url http://localhost:11434 \\
-             --model nomic-embed-text
-
-       # Remote / custom server
-       $ python examples/custom_http_embedding.py \\
-             --base-url http://192.168.1.10:1234 \\
-             --model text-embedding-nomic-embed-text-v1.5@f16
-
-Notes
------
-- The embedding dimension is detected automatically on the first call.
-- No API key is required for local servers; pass ``--api-key`` if yours needs one.
-- The collection is stored under ``/tmp/zvec_http_embedding_example`` and is
-  destroyed at the end of the script.  Remove the ``collection.destroy()`` call
-  at the bottom to keep the data across runs.
-"""
-
-from __future__ import annotations
-
-import argparse
-import shutil
-from pathlib import Path
-from typing import Optional
-
-from zvec import (
-    CollectionSchema,
-    DataType,
-    Doc,
-    HnswIndexParam,
-    MetricType,
-    VectorQuery,
-    VectorSchema,
-    create_and_open,
-)
-from zvec.extension import HTTPDenseEmbedding
-
-# ---------------------------------------------------------------------------
-# Demo
-# ---------------------------------------------------------------------------
-
-SAMPLE_DOCUMENTS = [
-    {
-        "id": "doc_1",
-        "text": "LM Studio lets you run large language models locally on your computer.",
-        "topic": "local AI",
-    },
-    {
-        "id": "doc_2",
-        "text": "Ollama is an open-source tool for running language models on-device.",
-        "topic": "local AI",
-    },
-    {
-        "id": "doc_3",
-        "text": "zvec is a lightweight, in-process vector database built on Proxima.",
-        "topic": "vector database",
-    },
-    {
-        "id": "doc_4",
-        "text": "HNSW is a graph-based algorithm for approximate nearest-neighbor search.",
-        "topic": "ANN algorithms",
-    },
-    {
-        "id": "doc_5",
-        "text": "Cosine similarity measures the angle between two vectors, ignoring magnitude.",
-        "topic": "math",
-    },
-]
-
-QUERY = "How do I run an embedding model on my laptop?"
-
-
-def run_demo(
-    base_url: str,
-    model: str,
-    api_key: Optional[str],
-    collection_path: str,
-) -> None:
-    # ------------------------------------------------------------------ #
-    # 1.  Embedding function                                               #
-    # ------------------------------------------------------------------ #
-    print(f"[1/4] Connecting to embedding server at {base_url} …")  # noqa: T201
-    emb = HTTPDenseEmbedding(base_url=base_url, model=model, api_key=api_key)
-
-    # Probe dimension
-    dim = emb.dimension
-    print(f"      Model: {model!r}  |  Dimension: {dim}")  # noqa: T201
-
-    # ------------------------------------------------------------------ #
-    # 2.  Create collection with HNSW + cosine                            #
-    # ------------------------------------------------------------------ #
-    print("[2/4] Creating zvec collection (HNSW / cosine) …")  # noqa: T201
-    if Path(collection_path).exists():
-        shutil.rmtree(collection_path)
-
-    schema = CollectionSchema(
-        name="http_embedding_demo",
-        vectors=VectorSchema(
-            name="embedding",
-            data_type=DataType.VECTOR_FP32,
-            dimension=dim,
-            index_param=HnswIndexParam(
-                metric_type=MetricType.COSINE,
-                m=16,
-                ef_construction=200,
-            ),
-        ),
-    )
-    collection = create_and_open(path=collection_path, schema=schema)
-
-    # ------------------------------------------------------------------ #
-    # 3.  Insert documents                                                 #
-    # ------------------------------------------------------------------ #
-    print(f"[3/4] Embedding and inserting {len(SAMPLE_DOCUMENTS)} documents …")  # noqa: T201
-    docs = []
-    for item in SAMPLE_DOCUMENTS:
-        vector = emb.embed(item["text"])
-        doc = Doc(
-            id=item["id"],
-            vectors={"embedding": vector},
-            fields={
-                "text": item["text"],
-                "topic": item["topic"],
-            },
-        )
-        docs.append(doc)
-
-    collection.insert(docs)
-    collection.flush()
-    print(f"      Inserted {collection.stats.total_doc_count} documents.")  # noqa: T201
-
-    # ------------------------------------------------------------------ #
-    # 4.  Search                                                           #
-    # ------------------------------------------------------------------ #
-    print(f"[4/4] Searching for: {QUERY!r}\n")  # noqa: T201
-    query_vector = emb.embed(QUERY)
-
-    results = collection.query(
-        VectorQuery("embedding", vector=query_vector),
-        topk=3,
-    )
-
-    print("Top-3 results:")  # noqa: T201
-    print("-" * 60)  # noqa: T201
-    for rank, result in enumerate(results, start=1):
-        # Retrieve stored fields if available
-        doc_id = result.id
-        score = result.score
-        # Find original text for display
-        original = next((d for d in SAMPLE_DOCUMENTS if d["id"] == doc_id), {})
-        print(f"  #{rank}  id={doc_id}  score={score:.4f}")  # noqa: T201
-        print(f"       {original.get('text', '(text not stored)')}")  # noqa: T201
-    print("-" * 60)  # noqa: T201
-
-    # ------------------------------------------------------------------ #
-    # Cleanup                                                              #
-    # ------------------------------------------------------------------ #
-    collection.destroy()
-    print("\nCollection destroyed.  Done!")  # noqa: T201
-
-
-# ---------------------------------------------------------------------------
-# Entry point
-# ---------------------------------------------------------------------------
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser(
-        description="zvec custom HTTP embedding demo (LM Studio / Ollama)",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument(
-        "--base-url",
-        default="http://localhost:1234",
-        help="Base URL of the OpenAI-compatible embedding server.",
-    )
-    parser.add_argument(
-        "--model",
-        default="text-embedding-nomic-embed-text-v1.5@f16",
-        help="Embedding model name as expected by the server.",
-    )
-    parser.add_argument(
-        "--api-key",
-        default=None,
-        help="Optional API key (leave blank for local servers).",
-    )
-    parser.add_argument(
-        "--collection-path",
-        default="/tmp/zvec_http_embedding_example",
-        help="Filesystem path for the zvec collection.",
-    )
-    args = parser.parse_args()
-
-    run_demo(
-        base_url=args.base_url,
-        model=args.model,
-        api_key=args.api_key,
-        collection_path=args.collection_path,
-    )
-
-
-if __name__ == "__main__":
-    main()