diff --git a/nuxt.config.ts b/nuxt.config.ts index 5965c351e..b4468bcc0 100644 --- a/nuxt.config.ts +++ b/nuxt.config.ts @@ -112,6 +112,7 @@ export default defineNuxtConfig({ '/api/registry/file/**': { isr: true, cache: { maxAge: 365 * 24 * 60 * 60 } }, '/api/registry/provenance/**': { isr: true, cache: { maxAge: 365 * 24 * 60 * 60 } }, '/api/registry/files/**': { isr: true, cache: { maxAge: 365 * 24 * 60 * 60 } }, + '/package/**/llms.txt': { isr: 3600 }, '/api/registry/package-meta/**': { isr: 300 }, '/:pkg/.well-known/skills/**': { isr: 3600 }, '/:scope/:pkg/.well-known/skills/**': { isr: 3600 }, diff --git a/server/routes/package/[name]/llms.txt.get.ts b/server/routes/package/[name]/llms.txt.get.ts new file mode 100644 index 000000000..89510289f --- /dev/null +++ b/server/routes/package/[name]/llms.txt.get.ts @@ -0,0 +1,54 @@ +import * as v from 'valibot' +import { PackageRouteParamsSchema } from '#shared/schemas/package' +import { CACHE_MAX_AGE_ONE_HOUR } from '#shared/utils/constants' +import { handleApiError } from '#server/utils/error-handler' +import { handleLlmsTxt } from '#server/utils/llms-txt' + +/** + * Serves llms.txt for an npm package. + * + * Handles all URL shapes via re-exports: + * - /package/:name/llms.txt + * - /package/:org/:name/llms.txt + * - /package/:name/v/:version/llms.txt + * - /package/:org/:name/v/:version/llms.txt + */ +export default defineCachedEventHandler( + async event => { + const org = getRouterParam(event, 'org') + const name = getRouterParam(event, 'name') + const rawVersion = getRouterParam(event, 'version') + if (!name) { + throw createError({ statusCode: 404, message: 'Package name is required.' }) + } + + const rawPackageName = org ? `${org}/${name}` : name + + try { + const { packageName, version } = v.parse(PackageRouteParamsSchema, { + packageName: rawPackageName, + version: rawVersion, + }) + + const content = await handleLlmsTxt(packageName, version) + setHeader(event, 'Content-Type', 'text/markdown; charset=utf-8') + return content + } catch (error: unknown) { + handleApiError(error, { + statusCode: 502, + message: 'Failed to generate llms.txt.', + }) + } + }, + { + maxAge: CACHE_MAX_AGE_ONE_HOUR, + swr: true, + getKey: event => { + const org = getRouterParam(event, 'org') + const name = getRouterParam(event, 'name') + const version = getRouterParam(event, 'version') + const pkg = org ? `${org}/${name}` : name + return version ? `llms-txt:${pkg}@${version}` : `llms-txt:${pkg}` + }, + }, +) diff --git a/server/routes/package/[name]/v/[version]/llms.txt.get.ts b/server/routes/package/[name]/v/[version]/llms.txt.get.ts new file mode 100644 index 000000000..f227b344b --- /dev/null +++ b/server/routes/package/[name]/v/[version]/llms.txt.get.ts @@ -0,0 +1 @@ +export { default } from '../../llms.txt.get' diff --git a/server/routes/package/[org]/[name]/llms.txt.get.ts b/server/routes/package/[org]/[name]/llms.txt.get.ts new file mode 100644 index 000000000..552f2e647 --- /dev/null +++ b/server/routes/package/[org]/[name]/llms.txt.get.ts @@ -0,0 +1 @@ +export { default } from '../../[name]/llms.txt.get' diff --git a/server/routes/package/[org]/[name]/v/[version]/llms.txt.get.ts b/server/routes/package/[org]/[name]/v/[version]/llms.txt.get.ts new file mode 100644 index 000000000..88644280a --- /dev/null +++ b/server/routes/package/[org]/[name]/v/[version]/llms.txt.get.ts @@ -0,0 +1 @@ +export { default } from '../../../../[name]/llms.txt.get' diff --git a/server/utils/llms-txt.ts b/server/utils/llms-txt.ts new file mode 100644 index 000000000..aeb38016e --- /dev/null +++ b/server/utils/llms-txt.ts @@ -0,0 +1,246 @@ +import type { Packument } from '@npm/types' +import type { JsDelivrFileNode, AgentFile, LlmsTxtResult } from '#shared/types' +import { NPM_MISSING_README_SENTINEL } from '#shared/utils/constants' + +/** Well-known agent instruction files at the package root */ +const ROOT_AGENT_FILES: Record = { + 'CLAUDE.md': 'Claude Code', + 'AGENTS.md': 'Agent Instructions', + 'AGENT.md': 'Agent Instructions', + '.cursorrules': 'Cursor Rules', + '.windsurfrules': 'Windsurf Rules', + '.clinerules': 'Cline Rules', +} + +/** Well-known agent files inside specific directories */ +const DIRECTORY_AGENT_FILES: Record = { + '.github/copilot-instructions.md': 'GitHub Copilot', +} + +/** Directories containing rule files (match *.md inside) */ +const RULE_DIRECTORIES: Record = { + '.cursor/rules': 'Cursor Rules', + '.windsurf/rules': 'Windsurf Rules', +} + +/** + * Discover agent instruction file paths from a jsDelivr file tree. + * Scans root-level files, known subdirectory files, and rule directories. + */ +export function discoverAgentFiles(files: JsDelivrFileNode[]): string[] { + const discovered: string[] = [] + + for (const node of files) { + // Root-level well-known files + if (node.type === 'file' && node.name in ROOT_AGENT_FILES) { + discovered.push(node.name) + } + + // Directory-based files + if (node.type === 'directory') { + // .github/copilot-instructions.md + if (node.name === '.github' && node.files) { + for (const child of node.files) { + const fullPath = `.github/${child.name}` + if (child.type === 'file' && fullPath in DIRECTORY_AGENT_FILES) { + discovered.push(fullPath) + } + } + } + + // .cursor/rules/*.md and .windsurf/rules/*.md + for (const dirPath of Object.keys(RULE_DIRECTORIES)) { + const [topDir, subDir] = dirPath.split('/') + if (node.name === topDir && node.files) { + const rulesDir = node.files.find(f => f.type === 'directory' && f.name === subDir) + if (rulesDir?.files) { + for (const ruleFile of rulesDir.files) { + if (ruleFile.type === 'file' && ruleFile.name.endsWith('.md')) { + discovered.push(`${dirPath}/${ruleFile.name}`) + } + } + } + } + } + } + } + + return discovered +} + +/** + * Get the display name for an agent file path. + */ +function getDisplayName(filePath: string): string { + if (filePath in ROOT_AGENT_FILES) return ROOT_AGENT_FILES[filePath] + if (filePath in DIRECTORY_AGENT_FILES) return DIRECTORY_AGENT_FILES[filePath] + + for (const [dirPath, displayName] of Object.entries(RULE_DIRECTORIES)) { + if (filePath.startsWith(`${dirPath}/`)) return `${displayName}: ${filePath.split('/').pop()}` + } + + return filePath +} + +/** + * Fetch agent instruction files from jsDelivr CDN. + * Fetches in parallel, gracefully skipping failures. + */ +export async function fetchAgentFiles( + packageName: string, + version: string, + filePaths: string[], +): Promise { + const results = await Promise.all( + filePaths.map(async (path): Promise => { + try { + const url = `https://cdn.jsdelivr.net/npm/${packageName}@${version}/${path}` + const response = await fetch(url) + if (!response.ok) return null + const content = await response.text() + return { path, content, displayName: getDisplayName(path) } + } catch { + return null + } + }), + ) + + return results.filter((r): r is AgentFile => r !== null) +} + +/** + * Generate llms.txt markdown content per the llmstxt.org spec. + * + * Structure: + * - H1 title with package name and version + * - Blockquote description (if available) + * - Metadata list (homepage, repository, npm) + * - README section + * - Agent Instructions section (one sub-heading per file) + */ +export function generateLlmsTxt(result: LlmsTxtResult): string { + const lines: string[] = [] + + // Title + lines.push(`# ${result.packageName}@${result.version}`) + lines.push('') + + // Description blockquote + if (result.description) { + lines.push(`> ${result.description}`) + lines.push('') + } + + // Metadata + const meta: string[] = [] + if (result.homepage) meta.push(`- Homepage: ${result.homepage}`) + if (result.repositoryUrl) meta.push(`- Repository: ${result.repositoryUrl}`) + meta.push(`- npm: https://www.npmjs.com/package/${result.packageName}/v/${result.version}`) + lines.push(...meta) + lines.push('') + + // README + if (result.readme) { + lines.push('## README') + lines.push('') + lines.push(result.readme) + lines.push('') + } + + // Agent instructions + if (result.agentFiles.length > 0) { + lines.push('## Agent Instructions') + lines.push('') + + for (const file of result.agentFiles) { + lines.push(`### ${file.displayName} (\`${file.path}\`)`) + lines.push('') + lines.push(file.content) + lines.push('') + } + } + + return lines.join('\n').trimEnd() + '\n' +} + +/** Standard README filenames to try from jsDelivr CDN */ +const README_FILENAMES = ['README.md', 'readme.md', 'Readme.md'] + +/** Fetch README from jsDelivr CDN as fallback */ +async function fetchReadmeFromCdn(packageName: string, version: string): Promise { + for (const filename of README_FILENAMES) { + try { + const url = `https://cdn.jsdelivr.net/npm/${packageName}@${version}/${filename}` + const response = await fetch(url) + if (response.ok) return await response.text() + } catch { + // Try next + } + } + return null +} + +/** Extract README from packument data */ +function getReadmeFromPackument(packageData: Packument, requestedVersion?: string): string | null { + const readme = requestedVersion + ? packageData.versions[requestedVersion]?.readme + : packageData.readme + + if (readme && readme !== NPM_MISSING_README_SENTINEL) { + return readme + } + return null +} + +/** Extract a clean repository URL from packument repository field */ +function parseRepoUrl( + repository?: { type?: string; url?: string; directory?: string } | string, +): string | undefined { + if (!repository) return undefined + const url = typeof repository === 'string' ? repository : repository.url + if (!url) return undefined + return url.replace(/^git\+/, '').replace(/\.git$/, '') +} + +/** + * Orchestrates fetching all data and generating llms.txt for a package. + * Shared by both versioned and unversioned route handlers. + */ +export async function handleLlmsTxt( + packageName: string, + requestedVersion?: string, +): Promise { + const packageData = await fetchNpmPackage(packageName) + const resolvedVersion = requestedVersion ?? packageData['dist-tags']?.latest + + if (!resolvedVersion) { + throw createError({ statusCode: 404, message: 'Could not resolve package version.' }) + } + + // Extract README from packument (sync) + const readmeFromPackument = getReadmeFromPackument(packageData, requestedVersion) + + // Fetch file tree (and README from CDN if packument didn't have one) + const [fileTreeData, cdnReadme] = await Promise.all([ + fetchFileTree(packageName, resolvedVersion), + readmeFromPackument ? null : fetchReadmeFromCdn(packageName, resolvedVersion), + ]) + + const readme = readmeFromPackument ?? cdnReadme ?? undefined + + // Discover and fetch agent files + const agentFilePaths = discoverAgentFiles(fileTreeData.files) + const agentFiles = await fetchAgentFiles(packageName, resolvedVersion, agentFilePaths) + + const result: LlmsTxtResult = { + packageName, + version: resolvedVersion, + description: packageData.description, + homepage: packageData.homepage, + repositoryUrl: parseRepoUrl(packageData.repository), + readme, + agentFiles, + } + + return generateLlmsTxt(result) +} diff --git a/shared/types/index.ts b/shared/types/index.ts index 88e28afe0..e378738e1 100644 --- a/shared/types/index.ts +++ b/shared/types/index.ts @@ -9,3 +9,4 @@ export * from './i18n-status' export * from './comparison' export * from './skills' export * from './version-downloads' +export * from './llms-txt' diff --git a/shared/types/llms-txt.ts b/shared/types/llms-txt.ts new file mode 100644 index 000000000..85bca8758 --- /dev/null +++ b/shared/types/llms-txt.ts @@ -0,0 +1,31 @@ +/** + * Agent instruction file discovered in a package + */ +export interface AgentFile { + /** Relative path within the package (e.g., "CLAUDE.md", ".github/copilot-instructions.md") */ + path: string + /** File content */ + content: string + /** Human-readable display name (e.g., "Claude Code", "GitHub Copilot") */ + displayName: string +} + +/** + * Result of gathering all data needed to generate llms.txt + */ +export interface LlmsTxtResult { + /** Package name (e.g., "nuxt" or "@nuxt/kit") */ + packageName: string + /** Resolved version (e.g., "3.12.0") */ + version: string + /** Package description from packument */ + description?: string + /** Homepage URL */ + homepage?: string + /** Repository URL */ + repositoryUrl?: string + /** README content (raw markdown) */ + readme?: string + /** Discovered agent instruction files */ + agentFiles: AgentFile[] +} diff --git a/test/unit/server/utils/llms-txt.spec.ts b/test/unit/server/utils/llms-txt.spec.ts new file mode 100644 index 000000000..af326694d --- /dev/null +++ b/test/unit/server/utils/llms-txt.spec.ts @@ -0,0 +1,301 @@ +import { describe, expect, it, vi } from 'vitest' +import type { JsDelivrFileNode, LlmsTxtResult } from '../../../../shared/types' +import { + discoverAgentFiles, + fetchAgentFiles, + generateLlmsTxt, +} from '../../../../server/utils/llms-txt' + +describe('discoverAgentFiles', () => { + it('discovers root-level agent files', () => { + const files: JsDelivrFileNode[] = [ + { type: 'file', name: 'CLAUDE.md', size: 100 }, + { type: 'file', name: 'AGENTS.md', size: 200 }, + { type: 'file', name: 'AGENT.md', size: 50 }, + { type: 'file', name: '.cursorrules', size: 80 }, + { type: 'file', name: '.windsurfrules', size: 60 }, + { type: 'file', name: '.clinerules', size: 40 }, + { type: 'file', name: 'package.json', size: 500 }, + { type: 'file', name: 'README.md', size: 3000 }, + ] + + const result = discoverAgentFiles(files) + + expect(result).toContain('CLAUDE.md') + expect(result).toContain('AGENTS.md') + expect(result).toContain('AGENT.md') + expect(result).toContain('.cursorrules') + expect(result).toContain('.windsurfrules') + expect(result).toContain('.clinerules') + expect(result).not.toContain('package.json') + expect(result).not.toContain('README.md') + expect(result).toHaveLength(6) + }) + + it('discovers .github/copilot-instructions.md', () => { + const files: JsDelivrFileNode[] = [ + { + type: 'directory', + name: '.github', + files: [ + { type: 'file', name: 'copilot-instructions.md', size: 150 }, + { type: 'file', name: 'FUNDING.yml', size: 30 }, + ], + }, + ] + + const result = discoverAgentFiles(files) + + expect(result).toEqual(['.github/copilot-instructions.md']) + }) + + it('discovers .cursor/rules/*.md files', () => { + const files: JsDelivrFileNode[] = [ + { + type: 'directory', + name: '.cursor', + files: [ + { + type: 'directory', + name: 'rules', + files: [ + { type: 'file', name: 'coding-style.md', size: 100 }, + { type: 'file', name: 'testing.md', size: 80 }, + { type: 'file', name: 'config.json', size: 50 }, + ], + }, + ], + }, + ] + + const result = discoverAgentFiles(files) + + expect(result).toContain('.cursor/rules/coding-style.md') + expect(result).toContain('.cursor/rules/testing.md') + expect(result).not.toContain('.cursor/rules/config.json') + expect(result).toHaveLength(2) + }) + + it('discovers .windsurf/rules/*.md files', () => { + const files: JsDelivrFileNode[] = [ + { + type: 'directory', + name: '.windsurf', + files: [ + { + type: 'directory', + name: 'rules', + files: [{ type: 'file', name: 'project.md', size: 200 }], + }, + ], + }, + ] + + const result = discoverAgentFiles(files) + + expect(result).toEqual(['.windsurf/rules/project.md']) + }) + + it('returns empty array for empty file tree', () => { + expect(discoverAgentFiles([])).toEqual([]) + }) + + it('returns empty array when no agent files exist', () => { + const files: JsDelivrFileNode[] = [ + { type: 'file', name: 'package.json', size: 500 }, + { type: 'file', name: 'index.js', size: 1000 }, + { + type: 'directory', + name: 'src', + files: [{ type: 'file', name: 'main.ts', size: 200 }], + }, + ] + + expect(discoverAgentFiles(files)).toEqual([]) + }) +}) + +describe('fetchAgentFiles', () => { + it('fetches files in parallel and returns results', async () => { + const fetchMock = vi.fn().mockImplementation((url: string) => { + if (url.includes('CLAUDE.md')) { + return Promise.resolve({ ok: true, text: () => Promise.resolve('# Claude instructions') }) + } + if (url.includes('AGENTS.md')) { + return Promise.resolve({ ok: true, text: () => Promise.resolve('# Agent config') }) + } + return Promise.resolve({ ok: false }) + }) + vi.stubGlobal('fetch', fetchMock) + + try { + const result = await fetchAgentFiles('test-pkg', '1.0.0', ['CLAUDE.md', 'AGENTS.md']) + + expect(result).toHaveLength(2) + expect(result[0]).toMatchObject({ + path: 'CLAUDE.md', + content: '# Claude instructions', + displayName: 'Claude Code', + }) + expect(result[1]).toMatchObject({ + path: 'AGENTS.md', + content: '# Agent config', + displayName: 'Agent Instructions', + }) + expect(fetchMock).toHaveBeenCalledTimes(2) + } finally { + vi.unstubAllGlobals() + } + }) + + it('gracefully skips failed fetches', async () => { + const fetchMock = vi.fn().mockImplementation((url: string) => { + if (url.includes('CLAUDE.md')) { + return Promise.resolve({ ok: true, text: () => Promise.resolve('# Claude') }) + } + return Promise.resolve({ ok: false }) + }) + vi.stubGlobal('fetch', fetchMock) + + try { + const result = await fetchAgentFiles('test-pkg', '1.0.0', ['CLAUDE.md', 'missing.md']) + + expect(result).toHaveLength(1) + expect(result[0]?.path).toBe('CLAUDE.md') + } finally { + vi.unstubAllGlobals() + } + }) + + it('gracefully handles network errors', async () => { + vi.stubGlobal('fetch', vi.fn().mockRejectedValue(new Error('Network error'))) + + try { + const result = await fetchAgentFiles('test-pkg', '1.0.0', ['CLAUDE.md']) + expect(result).toEqual([]) + } finally { + vi.unstubAllGlobals() + } + }) + + it('returns empty array for empty file paths', async () => { + const result = await fetchAgentFiles('test-pkg', '1.0.0', []) + expect(result).toEqual([]) + }) + + it('constructs correct CDN URLs for scoped packages', async () => { + const fetchMock = vi.fn().mockResolvedValue({ + ok: true, + text: () => Promise.resolve('content'), + }) + vi.stubGlobal('fetch', fetchMock) + + try { + await fetchAgentFiles('@nuxt/kit', '1.0.0', ['CLAUDE.md']) + expect(fetchMock).toHaveBeenCalledWith( + 'https://cdn.jsdelivr.net/npm/@nuxt/kit@1.0.0/CLAUDE.md', + ) + } finally { + vi.unstubAllGlobals() + } + }) +}) + +describe('generateLlmsTxt', () => { + it('generates full output with all fields', () => { + const result: LlmsTxtResult = { + packageName: 'nuxt', + version: '3.12.0', + description: 'The Intuitive Vue Framework', + homepage: 'https://nuxt.com', + repositoryUrl: 'https://github.com/nuxt/nuxt', + readme: '# Nuxt\n\nThe Intuitive Vue Framework.', + agentFiles: [ + { + path: 'CLAUDE.md', + content: '# Claude\n\nUse Nuxt conventions.', + displayName: 'Claude Code', + }, + { path: '.cursorrules', content: 'Use composition API.', displayName: 'Cursor Rules' }, + ], + } + + const output = generateLlmsTxt(result) + + expect(output).toContain('# nuxt@3.12.0') + expect(output).toContain('> The Intuitive Vue Framework') + expect(output).toContain('- Homepage: https://nuxt.com') + expect(output).toContain('- Repository: https://github.com/nuxt/nuxt') + expect(output).toContain('- npm: https://www.npmjs.com/package/nuxt/v/3.12.0') + expect(output).toContain('## README') + expect(output).toContain('# Nuxt') + expect(output).toContain('## Agent Instructions') + expect(output).toContain('### Claude Code (`CLAUDE.md`)') + expect(output).toContain('Use Nuxt conventions.') + expect(output).toContain('### Cursor Rules (`.cursorrules`)') + expect(output).toContain('Use composition API.') + expect(output.endsWith('\n')).toBe(true) + }) + + it('generates minimal output with no optional fields', () => { + const result: LlmsTxtResult = { + packageName: 'tiny-pkg', + version: '0.1.0', + agentFiles: [], + } + + const output = generateLlmsTxt(result) + + expect(output).toContain('# tiny-pkg@0.1.0') + expect(output).toContain('- npm: https://www.npmjs.com/package/tiny-pkg/v/0.1.0') + expect(output).not.toContain('>') + expect(output).not.toContain('Homepage') + expect(output).not.toContain('Repository') + expect(output).not.toContain('## README') + expect(output).not.toContain('## Agent Instructions') + }) + + it('omits Agent Instructions section when no agent files exist', () => { + const result: LlmsTxtResult = { + packageName: 'test-pkg', + version: '1.0.0', + description: 'A test package', + readme: '# Test\n\nHello world.', + agentFiles: [], + } + + const output = generateLlmsTxt(result) + + expect(output).toContain('## README') + expect(output).not.toContain('## Agent Instructions') + }) + + it('omits README section when no readme provided', () => { + const result: LlmsTxtResult = { + packageName: 'no-readme', + version: '1.0.0', + agentFiles: [ + { path: 'AGENTS.md', content: 'Agent rules here.', displayName: 'Agent Instructions' }, + ], + } + + const output = generateLlmsTxt(result) + + expect(output).not.toContain('## README') + expect(output).toContain('## Agent Instructions') + expect(output).toContain('### Agent Instructions (`AGENTS.md`)') + }) + + it('handles scoped package names in npm URL', () => { + const result: LlmsTxtResult = { + packageName: '@nuxt/kit', + version: '1.0.0', + agentFiles: [], + } + + const output = generateLlmsTxt(result) + + expect(output).toContain('# @nuxt/kit@1.0.0') + expect(output).toContain('- npm: https://www.npmjs.com/package/@nuxt/kit/v/1.0.0') + }) +})