From d8c6a6f7b8c8e573c90305836abc4dafc2b1a62b Mon Sep 17 00:00:00 2001 From: deloreyj Date: Wed, 30 Apr 2025 18:17:38 -0500 Subject: [PATCH 1/2] feat: vectorize tools --- packages/mcp-common/src/tools/vectorize.ts | 484 +++++++++++++++++++++ packages/mcp-common/src/types/vectorize.ts | 171 ++++++++ 2 files changed, 655 insertions(+) create mode 100644 packages/mcp-common/src/tools/vectorize.ts create mode 100644 packages/mcp-common/src/types/vectorize.ts diff --git a/packages/mcp-common/src/tools/vectorize.ts b/packages/mcp-common/src/tools/vectorize.ts new file mode 100644 index 00000000..7e2b0eaf --- /dev/null +++ b/packages/mcp-common/src/tools/vectorize.ts @@ -0,0 +1,484 @@ +import { getCloudflareClient } from '../cloudflare-api' +import { MISSING_ACCOUNT_ID_RESPONSE } from '../constants' +import { + VectorizeIndexConfigSchema, + VectorizeIndexDescriptionSchema, + VectorizeIndexNameSchema, + VectorizeListDirectionParam, + VectorizeListOrderParam, + VectorizeListPageParam, + VectorizeListPerPageParam, + VectorizeNdjsonBodySchema, + VectorizeQueryFilterSchema, + VectorizeQueryReturnMetadataSchema, + VectorizeQueryReturnValuesSchema, + VectorizeQueryTopKSchema, + VectorizeQueryVectorSchema, + VectorizeUnparsableBehaviorSchema, + VectorizeVectorIdListSchema, +} from '../types/vectorize' + +import type { CloudflareMcpAgent } from '../types/cloudflare-mcp-agent' + +/** + * Registers Vectorize Index management tools with the MCP agent. + * @param agent - The Cloudflare MCP agent instance. + */ +export function registerVectorizeTools(agent: CloudflareMcpAgent) { + // --- vectorize_index_create --- + agent.server.tool( + 'vectorize_index_create', + 'Creates a new Vectorize Index. Use this when a user wants to set up a new vector database.', + { + name: VectorizeIndexNameSchema, + config: VectorizeIndexConfigSchema, + description: VectorizeIndexDescriptionSchema, + }, + async (params) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + const result = await client.vectorize.indexes.create({ + account_id, + ...params, + }) + + return { + content: [ + { + type: 'text', + text: JSON.stringify(result ?? 'Index created successfully (no detailed response).'), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error creating Vectorize Index: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) + + // --- vectorize_index_list --- + agent.server.tool( + 'vectorize_index_list', + 'Lists Vectorize Indexes in the current account, with optional pagination. Use this when a user asks to see their indexes.', + { + page: VectorizeListPageParam, + per_page: VectorizeListPerPageParam, + order: VectorizeListOrderParam, + direction: VectorizeListDirectionParam, + }, + async ({ page, per_page, order, direction }) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + const response = await client.vectorize.indexes.list( + { account_id }, + { + query: { + page: page ?? undefined, + per_page: per_page ?? undefined, + order: order ?? undefined, + direction: direction ?? undefined, + }, + } + ) + + return { + content: [ + { + type: 'text', + text: JSON.stringify(response), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error listing Vectorize Indexes: ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) + + // --- vectorize_index_get --- + agent.server.tool( + 'vectorize_index_get', + 'Retrieves the details and configuration of a specific Vectorize Index by its name.', + { + name: VectorizeIndexNameSchema, + }, + async ({ name }) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + const result = await client.vectorize.indexes.get(name, { account_id }) + + if (!result) { + return { + content: [ + { + type: 'text', + text: `Error: Vectorize Index "${name}" not found.`, + }, + ], + } + } + return { + content: [ + { + type: 'text', + text: JSON.stringify(result), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error getting Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) + + // --- vectorize_index_delete --- + agent.server.tool( + 'vectorize_index_delete', + 'Deletes a specific Vectorize Index by its name. This action is permanent.', + { + name: VectorizeIndexNameSchema, + }, + async ({ name }) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + await client.vectorize.indexes.delete(name, { account_id }) + + return { + content: [ + { + type: 'text', + text: JSON.stringify({ + success: true, + message: `Vectorize Index "${name}" deleted successfully.`, + }), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error deleting Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) + + // --- vectorize_index_info --- + agent.server.tool( + 'vectorize_index_info', + 'Gets operational information about a Vectorize Index, such as the number of vectors it contains.', + { + name: VectorizeIndexNameSchema, + }, + async ({ name }) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + const result = await client.vectorize.indexes.info(name, { account_id }) + + return { + content: [ + { + type: 'text', + text: JSON.stringify(result ?? `Could not retrieve info for index "${name}".`), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error getting info for Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) + + // --- vectorize_index_insert --- + agent.server.tool( + 'vectorize_index_insert', + 'Inserts vectors into a specified Vectorize Index using NDJSON format. Returns a mutation ID.', + { + name: VectorizeIndexNameSchema, + vectors_ndjson: VectorizeNdjsonBodySchema, + unparsable_behavior: VectorizeUnparsableBehaviorSchema, + }, + async ({ name, vectors_ndjson, unparsable_behavior }) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + const result = await client.vectorize.indexes.insert(name, { + account_id, + body: vectors_ndjson, + 'unparsable-behavior': unparsable_behavior, + }) + + return { + content: [ + { + type: 'text', + text: JSON.stringify(result ?? 'Insert operation initiated (no detailed response).'), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error inserting vectors into Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) + + // --- vectorize_index_upsert --- + agent.server.tool( + 'vectorize_index_upsert', + 'Upserts vectors into a specified Vectorize Index using NDJSON format (inserts new, updates existing). Returns a mutation ID.', + { + name: VectorizeIndexNameSchema, + vectors_ndjson: VectorizeNdjsonBodySchema, + unparsable_behavior: VectorizeUnparsableBehaviorSchema, + }, + async ({ name, vectors_ndjson, unparsable_behavior }) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + const result = await client.vectorize.indexes.upsert(name, { + account_id, + body: vectors_ndjson, + 'unparsable-behavior': unparsable_behavior, + }) + + return { + content: [ + { + type: 'text', + text: JSON.stringify(result ?? 'Upsert operation initiated (no detailed response).'), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error upserting vectors into Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) + + // --- vectorize_index_query --- + agent.server.tool( + 'vectorize_index_query', + 'Finds vectors in an index that are closest (nearest neighbors) to a given query vector. Can optionally filter by metadata.', + { + name: VectorizeIndexNameSchema, + vector: VectorizeQueryVectorSchema, + filter: VectorizeQueryFilterSchema, + return_metadata: VectorizeQueryReturnMetadataSchema, + return_values: VectorizeQueryReturnValuesSchema, + top_k: VectorizeQueryTopKSchema, + }, + async ({ name, vector, filter, return_metadata, return_values, top_k }) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + const result = await client.vectorize.indexes.query(name, { + account_id, + vector, + filter, // Pass filter directly as SDK expects 'unknown' + returnMetadata: return_metadata, + returnValues: return_values, + topK: top_k, + }) + + return { + content: [ + { + type: 'text', + text: JSON.stringify(result ?? 'Query executed, but no results returned.'), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error querying Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) + + // --- vectorize_index_get_by_ids --- + agent.server.tool( + 'vectorize_index_get_by_ids', + 'Retrieves specific vectors from an index by their unique identifiers.', + { + name: VectorizeIndexNameSchema, + ids: VectorizeVectorIdListSchema, + }, + async ({ name, ids }) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + const result = await client.vectorize.indexes.getByIds(name, { + account_id, + ids, + }) + + // The SDK types this response as 'unknown', needs careful handling + if (result === null || result === undefined) { + return { + content: [ + { + type: 'text', + text: `Error: No vectors found for the provided IDs in index "${name}".`, + }, + ], + } + } + // Format success response + return { + content: [ + { + type: 'text', + text: JSON.stringify(result), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error getting vectors by ID from Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) + + // --- vectorize_index_delete_by_ids --- + agent.server.tool( + 'vectorize_index_delete_by_ids', + 'Deletes specific vectors from an index by their unique identifiers. Returns a mutation ID.', + { + name: VectorizeIndexNameSchema, + ids: VectorizeVectorIdListSchema, + }, + async ({ name, ids }) => { + try { + const account_id = await agent.getActiveAccountId() + if (!account_id) { + return MISSING_ACCOUNT_ID_RESPONSE + } + const client = getCloudflareClient(agent.props.accessToken) + + const result = await client.vectorize.indexes.deleteByIds(name, { + account_id, + ids, + }) + + return { + content: [ + { + type: 'text', + text: JSON.stringify( + result ?? 'Delete by IDs operation initiated (no detailed response).' + ), + }, + ], + } + } catch (error) { + return { + content: [ + { + type: 'text', + text: `Error deleting vectors by ID from Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, + }, + ], + } + } + } + ) +} diff --git a/packages/mcp-common/src/types/vectorize.ts b/packages/mcp-common/src/types/vectorize.ts new file mode 100644 index 00000000..9ea5d562 --- /dev/null +++ b/packages/mcp-common/src/types/vectorize.ts @@ -0,0 +1,171 @@ +import { z } from 'zod' + +import type { + IndexCreateParams, + IndexDeleteByIDsParams, + IndexDimensionConfigurationParam, + IndexGetByIDsParams, + IndexInsertParams, + IndexQueryParams, + IndexUpsertParams, +} from 'cloudflare/resources/vectorize/indexes/indexes' + +/** Zod schema for a Vectorize Index name. */ +export const VectorizeIndexNameSchema = z + .string() + .min(1, 'Index name cannot be empty.') + .max(64, 'Index name cannot exceed 64 characters.') + .regex( + /^[a-zA-Z0-9_-]+$/, + 'Index name can only contain alphanumeric characters, underscores, and hyphens.' + ) + .describe('The unique name of the Vectorize Index.') + +/** Zod schema for a Vectorize Index description. */ +export const VectorizeIndexDescriptionSchema: z.ZodType = z + .string() + .max(1024, 'Description cannot exceed 1024 characters.') + .optional() + .describe('An optional description for the Vectorize Index.') + +/** Zod schema for Vectorize Index dimensions. */ +export const VectorizeIndexDimensionSchema: z.ZodType< + IndexDimensionConfigurationParam['dimensions'] +> = z + .number() + .int() + .positive('Dimensions must be a positive integer.') + .describe('The number of dimensions for the vectors in the index.') + +/** Zod schema for Vectorize Index distance metric. */ +export const VectorizeIndexMetricSchema: z.ZodType = z + .enum(['cosine', 'euclidean', 'dot-product']) + .describe('The distance metric to use for similarity calculations.') + +/** Zod schema for explicit dimension/metric configuration. */ +export const VectorizeIndexDimensionConfigSchema: z.ZodType = z + .object({ + dimensions: VectorizeIndexDimensionSchema, + metric: VectorizeIndexMetricSchema, + }) + .describe('Configuration specifying the dimensions and distance metric.') + +/** Zod schema for Vectorize Index preset models. */ +export const VectorizeIndexPresetSchema: z.ZodType< + IndexCreateParams.VectorizeIndexPresetConfiguration['preset'] +> = z.enum([ + '@cf/baai/bge-small-en-v1.5', + '@cf/baai/bge-base-en-v1.5', + '@cf/baai/bge-large-en-v1.5', + 'openai/text-embedding-ada-002', + 'cohere/embed-multilingual-v2.0', +]) + +/** Zod schema for preset-based configuration. */ +export const VectorizeIndexPresetConfigSchema: z.ZodType = + z + .object({ + preset: VectorizeIndexPresetSchema, + }) + .describe('Configuration specifying a pre-defined embedding model preset.') + +/** Zod schema for Vectorize Index configuration (either dimensions/metric or preset). */ +export const VectorizeIndexConfigSchema: z.ZodType = z + .union([VectorizeIndexDimensionConfigSchema, VectorizeIndexPresetConfigSchema]) + .describe( + 'The configuration for the Vectorize Index, specifying either dimensions/metric or a preset model.' + ) + +/** Zod schema for a list of vector IDs. */ +export const VectorizeVectorIdListSchema = z + .array(z.string().min(1)) + .min(1, 'At least one vector ID must be provided.') + .describe('A list of vector identifiers.') + +/** Zod schema for the NDJSON body used in insert/upsert operations. */ +export const VectorizeNdjsonBodySchema: z.ZodType< + IndexInsertParams['body'] | IndexUpsertParams['body'] +> = z + .string() + .min(1, 'NDJSON body cannot be empty.') + .describe( + 'A string containing newline-delimited JSON objects representing vectors to insert or upsert.' + ) + +/** Zod schema for handling unparsable lines in NDJSON. */ +export const VectorizeUnparsableBehaviorSchema: z.ZodType< + IndexInsertParams['unparsable-behavior'] | IndexUpsertParams['unparsable-behavior'] +> = z + .enum(['error', 'discard']) + .optional() + .describe('Behavior for handling unparsable lines in NDJSON input.') + +/** Zod schema for the query vector. */ +export const VectorizeQueryVectorSchema: z.ZodType = z + .array(z.number()) + .min(1, 'Query vector cannot be empty.') + .describe('The vector used to find nearest neighbors.') + +/** Zod schema for the query metadata filter. */ +export const VectorizeQueryFilterSchema: z.ZodType = z + .record(z.unknown()) // Using z.record(z.unknown()) to represent a generic JSON object + .optional() + .describe('A metadata filter expression (JSON object) used to limit search results.') + +/** Zod schema for controlling metadata return in queries. */ +export const VectorizeQueryReturnMetadataSchema: z.ZodType = z + .enum(['none', 'indexed', 'all']) + .optional() + .describe('Specifies whether to return no metadata, only indexed metadata, or all metadata.') + +/** Zod schema for controlling value return in queries. */ +export const VectorizeQueryReturnValuesSchema: z.ZodType = z + .boolean() + .optional() + .describe('Specifies whether to return the vector values themselves in the results.') + +/** Zod schema for the number of nearest neighbors to return in queries. */ +export const VectorizeQueryTopKSchema: z.ZodType = z + .number() + .int() + .positive('topK must be a positive integer.') + .optional() + .describe('The number of nearest neighbors to retrieve.') + +/** Zod schema for the page number for pagination. */ +export const VectorizeListPageParam = z // Corresponds roughly to PaginationPageParam in shared + .number() + .int() + .positive() + .optional() + .describe('Page number for pagination.') + +/** Zod schema for the number of items per page for pagination. */ +export const VectorizeListPerPageParam = z // Corresponds roughly to PaginationPerPageParam in shared + .number() + .int() + .positive() + .max(100) // Assuming a max page size, adjust if needed + .optional() + .describe('Number of indexes to return per page (max 100).') + +/** Zod schema for the order field for pagination. */ +export const VectorizeListOrderParam = z // Corresponds roughly to PaginationOrderParam in shared + .string() // Usually specific fields like 'name', 'created_on' - let LLM decide or refine later + .optional() + .describe('Field to order results by (e.g., "name", "created_on").') + +/** Zod schema for the direction for pagination. */ +export const VectorizeListDirectionParam = z // Corresponds roughly to PaginationDirectionParam in shared + .enum(['asc', 'desc']) + .optional() + .describe('Direction to order results (ascending or descending).') + +// Combine into a single schema for the list tool parameters (optional) +// Although the tool registration takes individual params, this can be useful internally +export const VectorizeIndexListParamsSchema = z.object({ + page: VectorizeListPageParam, + per_page: VectorizeListPerPageParam, + order: VectorizeListOrderParam, + direction: VectorizeListDirectionParam, +}) // Note: SDK IndexListParams only has account_id, these go in options.query From 0cfa4cad8b3d1cc2aa5d882ddd22ad319b3a0358 Mon Sep 17 00:00:00 2001 From: deloreyj Date: Wed, 30 Apr 2025 18:17:56 -0500 Subject: [PATCH 2/2] feat: vectorize tools --- .../evals/kv_namespaces.eval.ts | 10 +- apps/workers-bindings/evals/vectorize.eval.ts | 197 +++++++++++++ apps/workers-bindings/src/index.ts | 2 + packages/mcp-common/src/tools/vectorize.ts | 271 ++---------------- packages/mcp-common/src/types/vectorize.ts | 39 ++- 5 files changed, 238 insertions(+), 281 deletions(-) create mode 100644 apps/workers-bindings/evals/vectorize.eval.ts diff --git a/apps/workers-bindings/evals/kv_namespaces.eval.ts b/apps/workers-bindings/evals/kv_namespaces.eval.ts index bd9ecfd7..53f9cc64 100644 --- a/apps/workers-bindings/evals/kv_namespaces.eval.ts +++ b/apps/workers-bindings/evals/kv_namespaces.eval.ts @@ -29,7 +29,7 @@ eachModel('$modelName', ({ model }) => { }, scorers: [checkFactuality], threshold: 1, - timeout: 60000, // 60 seconds + timeout: 60000, }) describeEval('List Cloudflare KV Namespaces', { data: async () => [ @@ -51,7 +51,7 @@ eachModel('$modelName', ({ model }) => { }, scorers: [checkFactuality], threshold: 1, - timeout: 60000, // 60 seconds + timeout: 60000, }) describeEval('Rename Cloudflare KV Namespace', { data: async () => [ @@ -74,7 +74,7 @@ eachModel('$modelName', ({ model }) => { }, scorers: [checkFactuality], threshold: 1, - timeout: 60000, // 60 seconds + timeout: 60000, }) describeEval('Get Cloudflare KV Namespace Details', { data: async () => [ @@ -96,7 +96,7 @@ eachModel('$modelName', ({ model }) => { }, scorers: [checkFactuality], threshold: 1, - timeout: 60000, // 60 seconds + timeout: 60000, }) describeEval('Delete Cloudflare KV Namespace', { data: async () => [ @@ -118,6 +118,6 @@ eachModel('$modelName', ({ model }) => { }, scorers: [checkFactuality], threshold: 1, - timeout: 60000, // 60 seconds + timeout: 60000, }) }) diff --git a/apps/workers-bindings/evals/vectorize.eval.ts b/apps/workers-bindings/evals/vectorize.eval.ts new file mode 100644 index 00000000..b1ec3932 --- /dev/null +++ b/apps/workers-bindings/evals/vectorize.eval.ts @@ -0,0 +1,197 @@ +import { expect } from 'vitest' +import { describeEval } from 'vitest-evals' + +import { runTask } from '@repo/eval-tools/src/runTask' +import { checkFactuality } from '@repo/eval-tools/src/scorers' +import { eachModel } from '@repo/eval-tools/src/test-models' +import { VECTORIZE_TOOLS } from '@repo/mcp-common/src/tools/vectorize' + +import { initializeClient } from './utils' // Assuming utils.ts will exist here + +const MOCK_INDEX_NAME = 'test-vectorize-index' +const MOCK_INDEX_DESCRIPTION = 'A test index for evaluation' +const MOCK_DIMENSIONS = 32 +const MOCK_METRIC = 'cosine' +const MOCK_PRESET = '@cf/baai/bge-small-en-v1.5' + +eachModel('$modelName', ({ model }) => { + describeEval('Create Vectorize Index (Dimensions/Metric)', { + data: async () => [ + { + input: `Create a Vectorize index named "${MOCK_INDEX_NAME}" with ${MOCK_DIMENSIONS} dimensions using the "${MOCK_METRIC}" metric. Add description: "${MOCK_INDEX_DESCRIPTION}".`, + expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}", config specifying ${MOCK_DIMENSIONS} dimensions and "${MOCK_METRIC}" metric, and description "${MOCK_INDEX_DESCRIPTION}".`, + }, + ], + task: async (input: string) => { + const client = await initializeClient() + const { promptOutput, toolCalls } = await runTask(client, model, input) + const toolCall = toolCalls.find( + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create + ) + expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined() + expect(toolCall?.args, 'Arguments did not match').toEqual( + expect.objectContaining({ + name: MOCK_INDEX_NAME, + config: expect.objectContaining({ + dimensions: MOCK_DIMENSIONS, + metric: MOCK_METRIC, + }), + description: MOCK_INDEX_DESCRIPTION, + }) + ) + return promptOutput + }, + scorers: [checkFactuality], + threshold: 1, + timeout: 60000, + }) + + // --- Test vectorize_index_create (with preset) --- + describeEval('Create Vectorize Index (Preset)', { + data: async () => [ + { + input: `Create a Vectorize index named "${MOCK_INDEX_NAME}-preset" using the "${MOCK_PRESET}" preset.`, + expected: `The ${VECTORIZE_TOOLS.vectorize_index_create} tool should be called with name "${MOCK_INDEX_NAME}-preset" and config specifying the preset "${MOCK_PRESET}".`, + }, + ], + task: async (input: string) => { + const client = await initializeClient() + const { promptOutput, toolCalls } = await runTask(client, model, input) + const toolCall = toolCalls.find( + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_create + ) + expect(toolCall, 'Tool vectorize_index_create was not called').toBeDefined() + expect(toolCall?.args, 'Arguments did not match').toEqual( + expect.objectContaining({ + name: `${MOCK_INDEX_NAME}-preset`, + config: expect.objectContaining({ + preset: MOCK_PRESET, + }), + }) + ) + return promptOutput + }, + scorers: [checkFactuality], + threshold: 1, + timeout: 60000, + }) + + // --- Test vectorize_index_list --- + describeEval('List Vectorize Indexes', { + data: async () => [ + { + input: 'List my Vectorize indexes.', + expected: `The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called.`, + }, + { + input: 'Show me page 2 of my Vectorize indexes, 10 per page, ordered by name descending.', + expected: + 'The ${VECTORIZE_TOOLS.vectorize_index_list} tool should be called with page 2, per_page 10, order name, direction desc.', + }, + ], + task: async (input: string) => { + const client = await initializeClient() + const { promptOutput, toolCalls } = await runTask(client, model, input) + const toolCall = toolCalls.find( + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_list + ) + expect(toolCall, 'Tool vectorize_index_list was not called').toBeDefined() + + // Check specific args only for the pagination case + if (input.includes('page 2')) { + expect(toolCall?.args, 'Pagination arguments did not match').toEqual( + expect.objectContaining({ + page: 2, + per_page: 10, + order: 'name', + direction: 'desc', + }) + ) + } + + return promptOutput + }, + scorers: [checkFactuality], + threshold: 1, + timeout: 60000, + }) + + // --- Test vectorize_index_get --- + describeEval('Get Vectorize Index Details', { + data: async () => [ + { + input: `Get the details for the Vectorize index named "${MOCK_INDEX_NAME}".`, + expected: `The ${VECTORIZE_TOOLS.vectorize_index_get} tool should be called with name "${MOCK_INDEX_NAME}".`, + }, + ], + task: async (input: string) => { + const client = await initializeClient() + const { promptOutput, toolCalls } = await runTask(client, model, input) + const toolCall = toolCalls.find( + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_get + ) + expect(toolCall, 'Tool vectorize_index_get was not called').toBeDefined() + expect(toolCall?.args, 'Arguments did not match').toEqual( + expect.objectContaining({ + name: MOCK_INDEX_NAME, + }) + ) + return promptOutput + }, + scorers: [checkFactuality], + threshold: 1, + timeout: 60000, + }) + + describeEval('Get Vectorize Index Info', { + data: async () => [ + { + input: `Get operational info for the Vectorize index "${MOCK_INDEX_NAME}".`, + expected: `The ${VECTORIZE_TOOLS.vectorize_index_info} tool should be called with name "${MOCK_INDEX_NAME}".`, + }, + ], + task: async (input: string) => { + const client = await initializeClient() + const { promptOutput, toolCalls } = await runTask(client, model, input) + const toolCall = toolCalls.find( + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_info + ) + expect(toolCall, 'Tool vectorize_index_info was not called').toBeDefined() + expect(toolCall?.args, 'Arguments did not match').toEqual( + expect.objectContaining({ + name: MOCK_INDEX_NAME, + }) + ) + return promptOutput + }, + scorers: [checkFactuality], + threshold: 1, + timeout: 60000, + }) + + describeEval('Delete Vectorize Index', { + data: async () => [ + { + input: `Delete the Vectorize index named "${MOCK_INDEX_NAME}".`, + expected: `The ${VECTORIZE_TOOLS.vectorize_index_delete} tool should be called with name "${MOCK_INDEX_NAME}".`, + }, + ], + task: async (input: string) => { + const client = await initializeClient() + const { promptOutput, toolCalls } = await runTask(client, model, input) + const toolCall = toolCalls.find( + (call) => call.toolName === VECTORIZE_TOOLS.vectorize_index_delete + ) + expect(toolCall, 'Tool vectorize_index_delete was not called').toBeDefined() + expect(toolCall?.args, 'Arguments did not match').toEqual( + expect.objectContaining({ + name: MOCK_INDEX_NAME, + }) + ) + return promptOutput + }, + scorers: [checkFactuality], + threshold: 1, + timeout: 60000, + }) +}) diff --git a/apps/workers-bindings/src/index.ts b/apps/workers-bindings/src/index.ts index 66cb15a8..a21c4d68 100644 --- a/apps/workers-bindings/src/index.ts +++ b/apps/workers-bindings/src/index.ts @@ -15,6 +15,7 @@ import { registerD1Tools } from '@repo/mcp-common/src/tools/d1' import { registerHyperdriveTools } from '@repo/mcp-common/src/tools/hyperdrive' import { registerKVTools } from '@repo/mcp-common/src/tools/kv_namespace' import { registerR2BucketTools } from '@repo/mcp-common/src/tools/r2_bucket' +import { registerVectorizeTools } from '@repo/mcp-common/src/tools/vectorize' import { registerWorkersTools } from '@repo/mcp-common/src/tools/worker' import { MetricsTracker } from '@repo/mcp-observability' @@ -74,6 +75,7 @@ export class WorkersBindingsMCP extends McpAgent { - try { - const account_id = await agent.getActiveAccountId() - if (!account_id) { - return MISSING_ACCOUNT_ID_RESPONSE - } - const client = getCloudflareClient(agent.props.accessToken) - - const result = await client.vectorize.indexes.insert(name, { - account_id, - body: vectors_ndjson, - 'unparsable-behavior': unparsable_behavior, - }) - - return { - content: [ - { - type: 'text', - text: JSON.stringify(result ?? 'Insert operation initiated (no detailed response).'), - }, - ], - } - } catch (error) { - return { - content: [ - { - type: 'text', - text: `Error inserting vectors into Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, - }, - ], - } - } - } - ) - - // --- vectorize_index_upsert --- - agent.server.tool( - 'vectorize_index_upsert', - 'Upserts vectors into a specified Vectorize Index using NDJSON format (inserts new, updates existing). Returns a mutation ID.', - { - name: VectorizeIndexNameSchema, - vectors_ndjson: VectorizeNdjsonBodySchema, - unparsable_behavior: VectorizeUnparsableBehaviorSchema, - }, - async ({ name, vectors_ndjson, unparsable_behavior }) => { - try { - const account_id = await agent.getActiveAccountId() - if (!account_id) { - return MISSING_ACCOUNT_ID_RESPONSE - } - const client = getCloudflareClient(agent.props.accessToken) - - const result = await client.vectorize.indexes.upsert(name, { - account_id, - body: vectors_ndjson, - 'unparsable-behavior': unparsable_behavior, - }) - - return { - content: [ - { - type: 'text', - text: JSON.stringify(result ?? 'Upsert operation initiated (no detailed response).'), - }, - ], - } - } catch (error) { - return { - content: [ - { - type: 'text', - text: `Error upserting vectors into Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, - }, - ], - } - } - } - ) - - // --- vectorize_index_query --- - agent.server.tool( - 'vectorize_index_query', - 'Finds vectors in an index that are closest (nearest neighbors) to a given query vector. Can optionally filter by metadata.', - { - name: VectorizeIndexNameSchema, - vector: VectorizeQueryVectorSchema, - filter: VectorizeQueryFilterSchema, - return_metadata: VectorizeQueryReturnMetadataSchema, - return_values: VectorizeQueryReturnValuesSchema, - top_k: VectorizeQueryTopKSchema, - }, - async ({ name, vector, filter, return_metadata, return_values, top_k }) => { - try { - const account_id = await agent.getActiveAccountId() - if (!account_id) { - return MISSING_ACCOUNT_ID_RESPONSE - } - const client = getCloudflareClient(agent.props.accessToken) - - const result = await client.vectorize.indexes.query(name, { - account_id, - vector, - filter, // Pass filter directly as SDK expects 'unknown' - returnMetadata: return_metadata, - returnValues: return_values, - topK: top_k, - }) - - return { - content: [ - { - type: 'text', - text: JSON.stringify(result ?? 'Query executed, but no results returned.'), - }, - ], - } - } catch (error) { - return { - content: [ - { - type: 'text', - text: `Error querying Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, - }, - ], - } - } - } - ) - - // --- vectorize_index_get_by_ids --- - agent.server.tool( - 'vectorize_index_get_by_ids', - 'Retrieves specific vectors from an index by their unique identifiers.', - { - name: VectorizeIndexNameSchema, - ids: VectorizeVectorIdListSchema, - }, - async ({ name, ids }) => { - try { - const account_id = await agent.getActiveAccountId() - if (!account_id) { - return MISSING_ACCOUNT_ID_RESPONSE - } - const client = getCloudflareClient(agent.props.accessToken) - - const result = await client.vectorize.indexes.getByIds(name, { - account_id, - ids, - }) - - // The SDK types this response as 'unknown', needs careful handling - if (result === null || result === undefined) { - return { - content: [ - { - type: 'text', - text: `Error: No vectors found for the provided IDs in index "${name}".`, - }, - ], - } - } - // Format success response - return { - content: [ - { - type: 'text', - text: JSON.stringify(result), - }, - ], - } - } catch (error) { - return { - content: [ - { - type: 'text', - text: `Error getting vectors by ID from Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, - }, - ], - } - } - } - ) - - // --- vectorize_index_delete_by_ids --- - agent.server.tool( - 'vectorize_index_delete_by_ids', - 'Deletes specific vectors from an index by their unique identifiers. Returns a mutation ID.', - { - name: VectorizeIndexNameSchema, - ids: VectorizeVectorIdListSchema, - }, - async ({ name, ids }) => { - try { - const account_id = await agent.getActiveAccountId() - if (!account_id) { - return MISSING_ACCOUNT_ID_RESPONSE - } - const client = getCloudflareClient(agent.props.accessToken) - - const result = await client.vectorize.indexes.deleteByIds(name, { - account_id, - ids, - }) - - return { - content: [ - { - type: 'text', - text: JSON.stringify( - result ?? 'Delete by IDs operation initiated (no detailed response).' - ), - }, - ], - } - } catch (error) { - return { - content: [ - { - type: 'text', - text: `Error deleting vectors by ID from Vectorize Index "${name}": ${error instanceof Error ? error.message : String(error)}`, - }, - ], - } - } - } - ) } diff --git a/packages/mcp-common/src/types/vectorize.ts b/packages/mcp-common/src/types/vectorize.ts index 9ea5d562..ade49595 100644 --- a/packages/mcp-common/src/types/vectorize.ts +++ b/packages/mcp-common/src/types/vectorize.ts @@ -2,16 +2,14 @@ import { z } from 'zod' import type { IndexCreateParams, - IndexDeleteByIDsParams, IndexDimensionConfigurationParam, - IndexGetByIDsParams, IndexInsertParams, IndexQueryParams, IndexUpsertParams, } from 'cloudflare/resources/vectorize/indexes/indexes' /** Zod schema for a Vectorize Index name. */ -export const VectorizeIndexNameSchema = z +export const VectorizeIndexNameSchema: z.ZodType = z .string() .min(1, 'Index name cannot be empty.') .max(64, 'Index name cannot exceed 64 characters.') @@ -19,7 +17,7 @@ export const VectorizeIndexNameSchema = z /^[a-zA-Z0-9_-]+$/, 'Index name can only contain alphanumeric characters, underscores, and hyphens.' ) - .describe('The unique name of the Vectorize Index.') + .describe('The name of the Vectorize Index.') /** Zod schema for a Vectorize Index description. */ export const VectorizeIndexDescriptionSchema: z.ZodType = z @@ -34,6 +32,8 @@ export const VectorizeIndexDimensionSchema: z.ZodType< > = z .number() .int() + .min(32, 'Dimensions must be at least 32.') + .max(1536, 'Dimensions must be at most 1536.') .positive('Dimensions must be a positive integer.') .describe('The number of dimensions for the vectors in the index.') @@ -76,10 +76,12 @@ export const VectorizeIndexConfigSchema: z.ZodType 'The configuration for the Vectorize Index, specifying either dimensions/metric or a preset model.' ) -/** Zod schema for a list of vector IDs. */ +/** Zod schema for a list of vector IDs. Parameter itself is optional in tools. */ export const VectorizeVectorIdListSchema = z .array(z.string().min(1)) .min(1, 'At least one vector ID must be provided.') + .optional() + .nullable() .describe('A list of vector identifiers.') /** Zod schema for the NDJSON body used in insert/upsert operations. */ @@ -108,7 +110,7 @@ export const VectorizeQueryVectorSchema: z.ZodType = /** Zod schema for the query metadata filter. */ export const VectorizeQueryFilterSchema: z.ZodType = z - .record(z.unknown()) // Using z.record(z.unknown()) to represent a generic JSON object + .record(z.unknown()) .optional() .describe('A metadata filter expression (JSON object) used to limit search results.') @@ -133,39 +135,34 @@ export const VectorizeQueryTopKSchema: z.ZodType = z .describe('The number of nearest neighbors to retrieve.') /** Zod schema for the page number for pagination. */ -export const VectorizeListPageParam = z // Corresponds roughly to PaginationPageParam in shared +export const VectorizeListPageParam = z .number() .int() .positive() .optional() + .nullable() .describe('Page number for pagination.') /** Zod schema for the number of items per page for pagination. */ -export const VectorizeListPerPageParam = z // Corresponds roughly to PaginationPerPageParam in shared +export const VectorizeListPerPageParam = z .number() .int() .positive() - .max(100) // Assuming a max page size, adjust if needed + .max(100) .optional() + .nullable() .describe('Number of indexes to return per page (max 100).') /** Zod schema for the order field for pagination. */ -export const VectorizeListOrderParam = z // Corresponds roughly to PaginationOrderParam in shared - .string() // Usually specific fields like 'name', 'created_on' - let LLM decide or refine later +export const VectorizeListOrderParam = z + .string() .optional() + .nullable() .describe('Field to order results by (e.g., "name", "created_on").') /** Zod schema for the direction for pagination. */ -export const VectorizeListDirectionParam = z // Corresponds roughly to PaginationDirectionParam in shared +export const VectorizeListDirectionParam = z .enum(['asc', 'desc']) .optional() + .nullable() .describe('Direction to order results (ascending or descending).') - -// Combine into a single schema for the list tool parameters (optional) -// Although the tool registration takes individual params, this can be useful internally -export const VectorizeIndexListParamsSchema = z.object({ - page: VectorizeListPageParam, - per_page: VectorizeListPerPageParam, - order: VectorizeListOrderParam, - direction: VectorizeListDirectionParam, -}) // Note: SDK IndexListParams only has account_id, these go in options.query