From 5bf976a9f8db19e776573bd0e75fa4bc680bef25 Mon Sep 17 00:00:00 2001 From: Aiun Date: Thu, 25 Dec 2025 21:54:08 -0500 Subject: [PATCH] Adding OpenRouter support --- .env.example | 4 + Cargo.lock | 2 +- crates/pattern_cli/src/agent_ops.rs | 21 +- crates/pattern_core/src/model.rs | 31 +- crates/pattern_core/src/model/defaults.rs | 351 ++++++++++++++++++++++ docs/config-examples.md | 36 ++- pattern.example.toml | 6 +- 7 files changed, 432 insertions(+), 19 deletions(-) diff --git a/.env.example b/.env.example index b83c42a..20cd00d 100644 --- a/.env.example +++ b/.env.example @@ -12,6 +12,10 @@ # Models #GEMINI_API_KEY= +# OpenRouter example with optional app URL and title headers +#OPENROUTER_API_KEY= +#OPENROUTER_APP_URL=https://github.com/orual/pattern/ +#OPENROUTER_APP_TITLE=Pattern # Database SURREAL_SYNC_DATA=true diff --git a/Cargo.lock b/Cargo.lock index 2ffada5..fbe5d82 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2863,7 +2863,7 @@ dependencies = [ [[package]] name = "genai" version = "0.4.0-alpha.8-WIP" -source = "git+https://github.com/orual/rust-genai#ffb904c285b91c45a3c83913b26cf513608e05b6" +source = "git+https://github.com/orual/rust-genai#0e81a6c8b27e2d31cc3c27fae237a3f4b3dec3ad" dependencies = [ "bytes", "derive_more 2.0.1", diff --git a/crates/pattern_cli/src/agent_ops.rs b/crates/pattern_cli/src/agent_ops.rs index 67bf00c..0467810 100644 --- a/crates/pattern_cli/src/agent_ops.rs +++ b/crates/pattern_cli/src/agent_ops.rs @@ -232,16 +232,17 @@ pub async fn load_model_embedding_providers( let oauth_client = OAuthClientBuilder::new(Arc::new(DB.clone()), config.user.id.clone()).build()?; // Wrap in GenAiClient with all endpoints available - let genai_client = GenAiClient::with_endpoints( - oauth_client, - vec![ - genai::adapter::AdapterKind::Anthropic, - genai::adapter::AdapterKind::Gemini, - genai::adapter::AdapterKind::OpenAI, - genai::adapter::AdapterKind::Groq, - genai::adapter::AdapterKind::Cohere, - ], - ); + let mut endpoints = vec![ + genai::adapter::AdapterKind::Anthropic, + genai::adapter::AdapterKind::Gemini, + genai::adapter::AdapterKind::OpenAI, + genai::adapter::AdapterKind::Groq, + genai::adapter::AdapterKind::Cohere, + ]; + if std::env::var("OPENROUTER_API_KEY").is_ok() { + endpoints.push(genai::adapter::AdapterKind::OpenRouter); + } + let genai_client = GenAiClient::with_endpoints(oauth_client, endpoints); Arc::new(RwLock::new(genai_client)) } #[cfg(not(feature = "oauth"))] diff --git a/crates/pattern_core/src/model.rs b/crates/pattern_core/src/model.rs index acd5743..0add56f 100644 --- a/crates/pattern_core/src/model.rs +++ b/crates/pattern_core/src/model.rs @@ -165,7 +165,8 @@ impl ResponseOptions { pub enum ModelVendor { Anthropic, OpenAI, - Gemini, // Google's Gemini models + OpenRouter, // OpenRouter - routes to multiple providers via OpenAI-compatible API + Gemini, // Google's Gemini models Cohere, Groq, Ollama, @@ -176,7 +177,12 @@ impl ModelVendor { /// Check if this vendor uses OpenAI-compatible API pub fn is_openai_compatible(&self) -> bool { match self { - Self::OpenAI | Self::Cohere | Self::Groq | Self::Ollama | Self::Other => true, + Self::OpenAI + | Self::OpenRouter + | Self::Cohere + | Self::Groq + | Self::Ollama + | Self::Other => true, Self::Anthropic | Self::Gemini => false, } } @@ -186,6 +192,7 @@ impl ModelVendor { match provider.to_lowercase().as_str() { "anthropic" => Self::Anthropic, "openai" => Self::OpenAI, + "openrouter" => Self::OpenRouter, "gemini" | "google" => Self::Gemini, "cohere" => Self::Cohere, "groq" => Self::Groq, @@ -282,6 +289,9 @@ impl GenAiClient { if std::env::var("COHERE_API_KEY").is_ok() { available_endpoints.push(AdapterKind::Cohere); } + if std::env::var("OPENROUTER_API_KEY").is_ok() { + available_endpoints.push(AdapterKind::OpenRouter); + } Ok(Self { client, @@ -317,14 +327,23 @@ impl ModelProvider for GenAiClient { }; for model in models { + // For OpenRouter, we need to prefix model IDs with "openrouter::" so genai + // can resolve them to the correct adapter. OpenRouter models use "/" as separator + // (e.g., "anthropic/claude-opus-4.5") but genai uses "::" for namespacing. + let model_id = if *endpoint == AdapterKind::OpenRouter { + format!("openrouter::{}", model) + } else { + model.clone() + }; + // Try to resolve the service target - this validates authentication - match self.client.resolve_service_target(&model).await { + match self.client.resolve_service_target(&model_id).await { Ok(_) => { // Model is accessible, continue } Err(e) => { // Authentication failed for this model, skip it - tracing::debug!("Skipping model {} due to auth error: {}", model, e); + tracing::debug!("Skipping model {} due to auth error: {}", model_id, e); continue; } } @@ -332,8 +351,8 @@ impl ModelProvider for GenAiClient { // Create basic ModelInfo from provider let model_info = ModelInfo { provider: endpoint.to_string(), - id: model.clone(), - name: model, + id: model_id.clone(), + name: model, // Keep original name for display capabilities: vec![], max_output_tokens: None, cost_per_1k_completion_tokens: None, diff --git a/crates/pattern_core/src/model/defaults.rs b/crates/pattern_core/src/model/defaults.rs index 28b4c56..58690af 100644 --- a/crates/pattern_core/src/model/defaults.rs +++ b/crates/pattern_core/src/model/defaults.rs @@ -611,6 +611,357 @@ fn apply_provider_defaults(model_info: &mut ModelInfo) { let provider_lower = model_info.provider.to_lowercase(); match provider_lower.as_str() { + "openrouter" => { + // OpenRouter models use provider/model format (e.g., "anthropic/claude-3-opus") + // Try to extract the underlying provider and model for better defaults + // Data sourced from OpenRouter API: https://openrouter.ai/api/v1/models + if let Some(slash_idx) = model_info.id.find('/') { + let underlying_provider = &model_info.id[..slash_idx]; + let underlying_model = &model_info.id[slash_idx + 1..]; + + // Apply defaults based on underlying provider + match underlying_provider.to_lowercase().as_str() { + "anthropic" => { + // Base Claude defaults (claude-3-opus, claude-3-haiku) + model_info.context_window = 200_000; + model_info.max_output_tokens = Some(4_096); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ModelCapability::VisionInput, + ModelCapability::LongContext, + ]; + + // Claude 4.x series - sonnet/opus variants have different contexts + if underlying_model.contains("sonnet-4.5") + || underlying_model.contains("sonnet-4") + { + // claude-sonnet-4.5 and claude-sonnet-4 have 1M context + model_info.context_window = 1_000_000; + model_info.max_output_tokens = Some(64_000); + model_info + .capabilities + .push(ModelCapability::ExtendedThinking); + model_info.capabilities.push(ModelCapability::ComputerUse); + model_info.capabilities.push(ModelCapability::TextEdit); + model_info.capabilities.push(ModelCapability::CodeExecution); + } else if underlying_model.contains("opus-4.5") + || underlying_model.contains("opus-4") + { + // claude-opus-4.5 and claude-opus-4 have 200k context, 32k output + model_info.context_window = 200_000; + model_info.max_output_tokens = Some(32_000); + model_info + .capabilities + .push(ModelCapability::ExtendedThinking); + model_info.capabilities.push(ModelCapability::ComputerUse); + model_info.capabilities.push(ModelCapability::TextEdit); + model_info.capabilities.push(ModelCapability::CodeExecution); + } else if underlying_model.contains("haiku-4.5") { + // claude-haiku-4.5 has 200k context, 64k output + model_info.context_window = 200_000; + model_info.max_output_tokens = Some(64_000); + model_info + .capabilities + .push(ModelCapability::ExtendedThinking); + model_info.capabilities.push(ModelCapability::ComputerUse); + model_info.capabilities.push(ModelCapability::TextEdit); + model_info.capabilities.push(ModelCapability::CodeExecution); + } else if underlying_model.contains("claude-3.7-sonnet") + || underlying_model.contains("3.7-sonnet") + { + // claude-3.7-sonnet has 200k context, 64k output + model_info.context_window = 200_000; + model_info.max_output_tokens = Some(64_000); + model_info + .capabilities + .push(ModelCapability::ExtendedThinking); + model_info.capabilities.push(ModelCapability::ComputerUse); + model_info.capabilities.push(ModelCapability::TextEdit); + } else if underlying_model.contains("claude-3.5-sonnet") + || underlying_model.contains("3.5-sonnet") + { + // claude-3.5-sonnet has 200k context, 8192 output + model_info.context_window = 200_000; + model_info.max_output_tokens = Some(8_192); + } else if underlying_model.contains("claude-3.5-haiku") + || underlying_model.contains("3.5-haiku") + { + // claude-3.5-haiku has 200k context, 8192 output + model_info.context_window = 200_000; + model_info.max_output_tokens = Some(8_192); + } + // claude-3-opus, claude-3-sonnet, claude-3-haiku keep base defaults (200k/4096) + } + "openai" => { + // Base OpenAI defaults + model_info.context_window = 128_000; + model_info.max_output_tokens = Some(4_096); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ModelCapability::VisionInput, + ModelCapability::LongContext, + ModelCapability::JsonMode, + ]; + + if underlying_model.starts_with("o1") + || underlying_model.starts_with("o3") + || underlying_model.starts_with("o4") + { + // o1/o3/o4 reasoning models: 200k context, 100k output + model_info.context_window = 200_000; + model_info.max_output_tokens = Some(100_000); + model_info + .capabilities + .push(ModelCapability::ExtendedThinking); + } else if underlying_model.contains("gpt-4o") { + // gpt-4o variants: 128k context, 16384 output + model_info.context_window = 128_000; + model_info.max_output_tokens = Some(16_384); + if underlying_model.contains(":extended") { + model_info.max_output_tokens = Some(64_000); + } + } else if underlying_model.contains("gpt-4-turbo") { + // gpt-4-turbo: 128k context, 4096 output + model_info.context_window = 128_000; + model_info.max_output_tokens = Some(4_096); + } else if underlying_model == "gpt-4" { + // gpt-4 base: 8191 context, 4096 output, no vision + model_info.context_window = 8_191; + model_info.max_output_tokens = Some(4_096); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ModelCapability::JsonMode, + ]; + } else if underlying_model.contains("gpt-5") { + // gpt-5 variants: 400k context (chat variants 128k), 128k output + if underlying_model.contains("-chat") { + model_info.context_window = 128_000; + model_info.max_output_tokens = Some(16_384); + } else { + model_info.context_window = 400_000; + model_info.max_output_tokens = Some(128_000); + } + } + } + "google" => { + // Gemini models default: 1M context, 8192 output + model_info.context_window = 1_048_576; + model_info.max_output_tokens = Some(8_192); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ModelCapability::VisionInput, + ModelCapability::LongContext, + ModelCapability::JsonMode, + ]; + + // Gemini 2.5+ models have 65536 output + if underlying_model.contains("gemini-2.5") + || underlying_model.contains("gemini-3") + { + model_info.max_output_tokens = Some(65_536); + model_info + .capabilities + .push(ModelCapability::ExtendedThinking); + } + } + "meta-llama" => { + // Llama 3.x defaults: 131072 context (from API) + model_info.context_window = 131_072; + model_info.max_output_tokens = Some(16_384); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ]; + + // Llama 3.1-405b has reduced context on OpenRouter + if underlying_model.contains("405b") && !underlying_model.contains(":free") + { + model_info.context_window = 10_000; + model_info.max_output_tokens = None; // varies + } + // Vision models + if underlying_model.contains("vision") { + model_info.capabilities.push(ModelCapability::VisionInput); + } + } + "mistralai" => { + // Mistral defaults: varies significantly by model + model_info.context_window = 131_072; + model_info.max_output_tokens = Some(16_384); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ]; + + if underlying_model.contains("mistral-large") { + // mistral-large: 128k-262k context + model_info.context_window = 128_000; + model_info.max_output_tokens = None; // varies + } else if underlying_model.contains("mixtral-8x22b") { + // mixtral-8x22b: 65536 context + model_info.context_window = 65_536; + model_info.max_output_tokens = None; + } else if underlying_model.contains("mixtral-8x7b") { + // mixtral-8x7b: 32768 context, 16384 output + model_info.context_window = 32_768; + model_info.max_output_tokens = Some(16_384); + } else if underlying_model.contains("devstral") { + // devstral models: up to 262k context + model_info.context_window = 262_144; + model_info.max_output_tokens = Some(65_536); + } else if underlying_model.contains("mistral-medium") { + // mistral-medium-3.x: 131k context + model_info.context_window = 131_072; + model_info.max_output_tokens = None; + } + // pixtral and ministral models support vision + if underlying_model.contains("pixtral") + || underlying_model.contains("ministral") + { + model_info.capabilities.push(ModelCapability::VisionInput); + } + } + "deepseek" => { + // DeepSeek defaults: 163840 context, 65536 output + model_info.context_window = 163_840; + model_info.max_output_tokens = Some(65_536); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ]; + + if underlying_model.contains("deepseek-r1") { + // R1 reasoning models + model_info + .capabilities + .push(ModelCapability::ExtendedThinking); + } + if underlying_model.contains("deepseek-chat") { + // deepseek-chat can output up to full context + model_info.max_output_tokens = Some(163_840); + } + } + "moonshotai" => { + // Moonshot Kimi models: 262144 context + model_info.context_window = 262_144; + model_info.max_output_tokens = Some(65_535); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ModelCapability::LongContext, + ]; + + if underlying_model.contains("thinking") { + model_info + .capabilities + .push(ModelCapability::ExtendedThinking); + } + if underlying_model.contains("kimi-k2-0905") { + // kimi-k2-0905 can output up to full context + model_info.max_output_tokens = Some(262_144); + } + } + "z-ai" => { + // GLM models: ~200k context, 65536 output + model_info.context_window = 202_752; + model_info.max_output_tokens = Some(65_536); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ModelCapability::LongContext, + ]; + + if underlying_model.contains("glm-4.5") { + // glm-4.5: 131k context + model_info.context_window = 131_072; + } + if underlying_model.contains("glm-4.6v") + || underlying_model.contains("glm-4.5v") + { + model_info.capabilities.push(ModelCapability::VisionInput); + } + } + "qwen" => { + // Qwen defaults: varies significantly + model_info.context_window = 32_768; + model_info.max_output_tokens = Some(16_384); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ]; + + if underlying_model.contains("qwen3") + || underlying_model.contains("qwen-plus") + || underlying_model.contains("qwen-turbo") + { + // Qwen3 and newer models have larger contexts + model_info.context_window = 262_144; + model_info.max_output_tokens = Some(32_768); + } + if underlying_model.contains("-vl-") + || underlying_model.contains("vl-max") + || underlying_model.contains("vl-plus") + { + model_info.capabilities.push(ModelCapability::VisionInput); + } + if underlying_model.contains("thinking") { + model_info + .capabilities + .push(ModelCapability::ExtendedThinking); + } + } + "cohere" => { + // Cohere Command models: 128k context, 4000 output + model_info.context_window = 128_000; + model_info.max_output_tokens = Some(4_000); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::FunctionCalling, + ModelCapability::SystemPrompt, + ModelCapability::LongContext, + ModelCapability::WebSearch, + ]; + + if underlying_model.contains("command-a") { + // command-a: 256k context, 8192 output + model_info.context_window = 256_000; + model_info.max_output_tokens = Some(8_192); + } + } + _ => { + // Generic OpenRouter defaults for unknown providers + model_info.context_window = 32_768; + model_info.max_output_tokens = Some(4_096); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::SystemPrompt, + ]; + } + } + } else { + // No slash in model ID, use generic defaults + model_info.context_window = 32_768; + model_info.max_output_tokens = Some(4_096); + model_info.capabilities = vec![ + ModelCapability::TextGeneration, + ModelCapability::SystemPrompt, + ]; + } + } "anthropic" => { model_info.context_window = 200_000; model_info.max_output_tokens = Some(4_096); diff --git a/docs/config-examples.md b/docs/config-examples.md index 915f218..fa90677 100644 --- a/docs/config-examples.md +++ b/docs/config-examples.md @@ -27,9 +27,43 @@ This directory contains example configuration files for Pattern: - `OPENAI_API_KEY` for OpenAI - `ANTHROPIC_API_KEY` for Anthropic - `GEMINI_API_KEY` for Google Gemini + - `OPENROUTER_API_KEY` for OpenRouter - `GROQ_API_KEY` for Groq + - `COHERE_API_KEY` for Cohere - etc. +## OpenRouter Setup + +OpenRouter provides access to multiple AI providers through a single API. It's especially useful for: +- Accessing models from multiple providers without managing separate API keys +- Using models that may not be directly available to you +- Cost optimization by routing to the best model for your use case + +### Configuration + +1. Get your API key from [OpenRouter](https://openrouter.ai/keys) +2. Set the environment variable: + ```bash + export OPENROUTER_API_KEY=sk-or-v1-your-key-here + ``` +3. Configure in `pattern.toml`: + ```toml + [model] + provider = "OpenRouter" + model = "anthropic/claude-3-opus" # Use provider/model format + ``` + +### Model Naming Convention + +OpenRouter uses `provider/model-name` format for model IDs: +- `anthropic/claude-3-opus` - Claude 3 Opus via OpenRouter +- `openai/gpt-4o` - GPT-4o via OpenRouter +- `google/gemini-pro` - Gemini Pro via OpenRouter +- `meta-llama/llama-3.1-70b-instruct` - Llama 3.1 70B via OpenRouter +- `mistralai/mistral-large` - Mistral Large via OpenRouter + +See [OpenRouter Models](https://openrouter.ai/models) for the full list of available models. + ## Group Member Configuration Groups support three ways to configure members: @@ -38,4 +72,4 @@ Groups support three ways to configure members: 2. **External config file**: Use `config_path` to load agent configuration from a separate file 3. **Inline configuration**: Define the agent configuration directly in the group member section -See `pattern.example.toml` for examples of all three methods. \ No newline at end of file +See `pattern.example.toml` for examples of all three methods. diff --git a/pattern.example.toml b/pattern.example.toml index 904cf38..752a211 100644 --- a/pattern.example.toml +++ b/pattern.example.toml @@ -63,11 +63,15 @@ memory_type = "Archival" description = "Domain-specific knowledge" [model] -provider = "Gemini" # Options: Anthropic, OpenAI, Gemini, Groq, Cohere, Xai, Ollama, DeepSeek +provider = "Gemini" # Options: Anthropic, OpenAI, OpenRouter, Gemini, Groq, Cohere, Xai, Ollama, DeepSeek # Optional: Specify a particular model # model = "gemini-2.5-flash" # temperature = 0.7 +# OpenRouter example +# provider = "OpenRouter" +# model = "anthropic/claude-3-opus" # Use provider/model format for OpenRouter + [database] # Uses embedded SurrealDB by default type = "embedded"