From 03f173c32522f6c8e02f62d3b31d013ce73ec7e8 Mon Sep 17 00:00:00 2001 From: Dj Isaac Date: Thu, 20 Nov 2025 17:34:41 -0600 Subject: [PATCH] fix: switch from semicolon to comma seperation maintains backwards compat --- .runpod/hub.json | 8 ++++---- .runpod/tests.json | 2 +- README.md | 22 +++++++++++----------- requirements.txt | 4 ++-- src/config.py | 11 +++++++---- 5 files changed, 25 insertions(+), 22 deletions(-) diff --git a/.runpod/hub.json b/.runpod/hub.json index d9e716f..6c1553a 100644 --- a/.runpod/hub.json +++ b/.runpod/hub.json @@ -16,7 +16,7 @@ "input": { "name": "Model Names", "type": "string", - "description": "One or more Hugging-Face model IDs. Separate multiple IDs with a semicolon.", + "description": "One or more Hugging-Face model IDs. Separate multiple IDs with a comma.", "default": "BAAI/bge-small-en-v1.5" } }, @@ -25,7 +25,7 @@ "input": { "name": "Batch Sizes", "type": "string", - "description": "Per-model batch size; semicolon-separated list matching MODEL_NAMES.", + "description": "Per-model batch size; comma-separated list matching MODEL_NAMES.", "default": "32" } }, @@ -43,7 +43,7 @@ "input": { "name": "Data Types", "type": "string", - "description": "Precision per model (auto, fp16, fp8). Semicolon-separated, must match MODEL_NAMES.", + "description": "Precision per model (auto, fp16, fp8). comma-separated, must match MODEL_NAMES.", "default": "auto" } }, @@ -61,7 +61,7 @@ "input": { "name": "Max Concurrency", "type": "string", - "description": "Max concurrent requests the RunPod wrapper will accept.", + "description": "Max concurrent requests the Runpod wrapper will accept.", "default": "300" } } diff --git a/.runpod/tests.json b/.runpod/tests.json index 02b8a4f..2619d63 100644 --- a/.runpod/tests.json +++ b/.runpod/tests.json @@ -12,7 +12,7 @@ "config": { "gpuTypeId": "NVIDIA RTX A5000", "gpuCount": 1, - "allowedCudaVersions": ["12.7", "12.6", "12.5", "12.4"], + "allowedCudaVersions": ["12.9", "12.8", "12.7", "12.6", "12.5", "12.4"], "env": [ { "key": "MODEL_NAMES", diff --git a/README.md b/README.md index b22672b..6e956e7 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ High-throughput, OpenAI-compatible text embedding & reranker powered by [Infinit --- -[![RunPod](https://api.runpod.io/badge/runpod-workers/worker-infinity-embedding)](https://www.runpod.io/console/hub/runpod-workers/worker-infinity-embedding) +[![Runpod](https://api.runpod.io/badge/runpod-workers/worker-infinity-embedding)](https://www.runpod.io/console/hub/runpod-workers/worker-infinity-embedding) --- @@ -26,7 +26,7 @@ High-throughput, OpenAI-compatible text embedding & reranker powered by [Infinit 1. ๐Ÿณ **Pull an image** โ€“ use the tag shown on the latest [GitHub release page](https://github.com/runpod-workers/worker-infinity-embedding/releases) (e.g. `runpod/worker-infinity-embedding:`) 2. ๐Ÿ”ง **Configure** โ€“ set at least `MODEL_NAMES` (see [Endpoint Configuration](#endpoint-configuration)) -3. ๐Ÿš€ **Deploy** โ€“ create a [RunPod Serverless endpoint](https://docs.runpod.io/serverless/endpoints/manage-endpoints) +3. ๐Ÿš€ **Deploy** โ€“ create a [Runpod Serverless endpoint](https://docs.runpod.io/serverless/endpoints/manage-endpoints) 4. ๐Ÿงช **Call the API** โ€“ follow the example in the [Usage](#usage) section --- @@ -37,12 +37,12 @@ All behaviour is controlled through environment variables: | Variable | Required | Default | Description | | ------------------------ | -------- | ------- | ---------------------------------------------------------------------------------------------------------------- | -| `MODEL_NAMES` | **Yes** | โ€” | One or more Hugging-Face model IDs. Separate multiple IDs with a semicolon.
Example: `BAAI/bge-small-en-v1.5` | -| `BATCH_SIZES` | No | `32` | Per-model batch size; semicolon-separated list matching `MODEL_NAMES`. | +| `MODEL_NAMES` | **Yes** | โ€” | One or more Hugging-Face model IDs. Separate multiple IDs with a comma.
Example: `BAAI/bge-small-en-v1.5` | +| `BATCH_SIZES` | No | `32` | Per-model batch size; comma-separated list matching `MODEL_NAMES`. | | `BACKEND` | No | `torch` | Inference engine for _all_ models: `torch`, `optimum`, or `ctranslate2`. | -| `DTYPES` | No | `auto` | Precision per model (`auto`, `fp16`, `fp8`). Semicolon-separated, must match `MODEL_NAMES`. | +| `DTYPES` | No | `auto` | Precision per model (`auto`, `fp16`, `fp8`). Comma-separated, must match `MODEL_NAMES`. | | `INFINITY_QUEUE_SIZE` | No | `48000` | Max items queueable inside the Infinity engine. | -| `RUNPOD_MAX_CONCURRENCY` | No | `300` | Max concurrent requests the RunPod wrapper will accept. | +| `RUNPOD_MAX_CONCURRENCY` | No | `300` | Max concurrent requests the Runpod wrapper will accept. | --- @@ -50,8 +50,8 @@ All behaviour is controlled through environment variables: Two flavours, one schema. -- **OpenAI-compatible** โ€“ drop-in replacement for `/v1/models`, `/v1/embeddings`, so you can use this endpoint instead of the API from OpenAI by replacing the base url with the URL of your endpoint: `https://api.runpod.ai/v2//openai/v1` and use your [API key from RunPod](https://docs.runpod.io/get-started/api-keys) instead of the one from OpenAI -- **Standard RunPod** โ€“ call `/run` or `/runsync` with a JSON body under the `input` key. +- **OpenAI-compatible** โ€“ drop-in replacement for `/v1/models`, `/v1/embeddings`, so you can use this endpoint instead of the API from OpenAI by replacing the base url with the URL of your endpoint: `https://api.runpod.ai/v2//openai/v1` and use your [API key from Runpod](https://docs.runpod.io/get-started/api-keys) instead of the one from OpenAI +- **Standard Runpod** โ€“ call `/run` or `/runsync` with a JSON body under the `input` key. Base URL: `https://api.runpod.ai/v2/` Except for transport (path + wrapper object) the JSON you send/receive is identical. The tables below describe the shared payload. @@ -144,7 +144,7 @@ Response contains either `scores` or the full `docs` list, depending on `return_ Below are minimal `curl` snippets so you can copy-paste from any machine. -> Replace `` with your endpoint ID and `` with a [RunPod API key](https://docs.runpod.io/get-started/api-keys). +> Replace `` with your endpoint ID and `` with a [Runpod API key](https://docs.runpod.io/get-started/api-keys). ### OpenAI-Compatible Calls @@ -161,7 +161,7 @@ curl -X POST \ https://api.runpod.ai/v2//openai/v1/embeddings ``` -### Standard RunPod Calls +### Standard Runpod Calls ```bash # Create embeddings (wait for result) @@ -182,7 +182,7 @@ curl -X POST \ ## Further Documentation - **[Infinity Engine](https://github.com/michaelfeil/infinity)** โ€“ how the ultra-fast backend works. -- **[RunPod Docs](https://docs.runpod.io/)** โ€“ serverless concepts, limits, and API reference. +- **[Runpod Docs](https://docs.runpod.io/)** โ€“ serverless concepts, limits, and API reference. --- diff --git a/requirements.txt b/requirements.txt index 9812a81..dc583f9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -runpod~=1.7.0 -infinity-emb[all]==0.0.76 +runpod~=1.8.1 +infinity-emb[all]==0.0.77 einops # deployment of custom code with nomic git+https://github.com/pytorch-labs/float8_experimental.git diff --git a/src/config.py b/src/config.py index 6266ed5..8923665 100644 --- a/src/config.py +++ b/src/config.py @@ -15,7 +15,9 @@ def __init__(self): load_dotenv() def _get_no_required_multi(self, name, default=None): - out = os.getenv(name, f"{default};" * len(self.model_names)).split(";") + out = os.getenv(name, f"{default};" * len(self.model_names)) + out = out.replace(";", ",") # We previously split by semicolon, and want to maintain this behavior for a while. + out = out.split(",") out = [o for o in out if o] if len(out) != len(self.model_names): raise ValueError( @@ -33,12 +35,13 @@ def model_names(self) -> list[str]: if not model_names: raise ValueError( "Missing required environment variable 'MODEL_NAMES'.\n" - "Please provide at least one HuggingFace model ID, or multiple IDs separated by a semicolon.\n" + "Please provide at least one HuggingFace model ID, or multiple IDs separated by a comma.\n" "Examples:\n" " MODEL_NAMES=BAAI/bge-small-en-v1.5\n" - " MODEL_NAMES=BAAI/bge-small-en-v1.5;intfloat/e5-large-v2\n" + " MODEL_NAMES=BAAI/bge-small-en-v1.5,intfloat/e5-large-v2\n" ) - model_names = model_names.split(";") + model_names = model_names.replace(";", ",") # see above, keeping support for semicolon-seperation + model_names = model_names.split(",") model_names = [model_name for model_name in model_names if model_name] return model_names