diff --git a/apps/api/src/lib/files.ts b/apps/api/src/lib/files.ts index 0621b4b6d..452d55039 100644 --- a/apps/api/src/lib/files.ts +++ b/apps/api/src/lib/files.ts @@ -1,5 +1,6 @@ import type { Entry } from "apache-autoindex-parse"; -import { trimTrailingSlash } from "@luxass/utils"; +import { trimLeadingSlash, trimTrailingSlash } from "@luxass/utils"; +import { createGlobMatcher } from "@ucdjs-internal/shared"; import { parse } from "apache-autoindex-parse"; /** @@ -20,13 +21,105 @@ import { parse } from "apache-autoindex-parse"; * console.log(entries); // [{ type: 'directory', name: 'UNIDATA', path: '/UNIDATA', ... }] * ``` */ -export async function parseUnicodeDirectory(html: string): Promise { - const files = parse(html, "F2"); - - return files.map(({ type, name, path, lastModified }) => ({ - type, - name: trimTrailingSlash(name), - path: trimTrailingSlash(path), - lastModified, - })); +export async function parseUnicodeDirectory(html: string, basePath = ""): Promise { + const files = parse(html, { + format: "F2", + basePath, + }); + + return files.map((entry) => { + entry.name = trimLeadingSlash(trimTrailingSlash(entry.name)); + return entry; + }); +} + +export interface DirectoryFilterOptions { + /** + * A string to filter file/directory names that start with this query (case-insensitive). + */ + query?: string; + + /** + * A glob pattern to filter file/directory names. + */ + pattern?: string; + + /** + * Type of entries to include: "all" (default), "files", or "directories". + */ + type?: string; + + /** + * Field to sort by: "name" (default) or "lastModified". + */ + sort?: string; + + /** + * Sort order: "asc" (default) or "desc". + */ + order?: string; +} + +/** + * Applies filtering and sorting to directory entries based on query parameters. + * + * @param {Entry[]} files - Array of directory entries to filter and sort + * @param {DirectoryFilterOptions} options - Filter and sort options + * @returns {Entry[]} Filtered and sorted array of entries + */ +export function applyDirectoryFiltersAndSort( + files: Entry[], + options: DirectoryFilterOptions, +): Entry[] { + let filtered = [...files]; + + // Apply query filter (prefix search, case-insensitive) + if (options.query) { + // eslint-disable-next-line no-console + console.info(`[v1_files]: applying query filter: ${options.query}`); + const queryLower = options.query.toLowerCase(); + filtered = filtered.filter((entry) => entry.name.toLowerCase().startsWith(queryLower)); + } + + // Apply pattern filter if provided + if (options.pattern) { + // eslint-disable-next-line no-console + console.info(`[v1_files]: applying glob pattern filter: ${options.pattern}`); + const matcher = createGlobMatcher(options.pattern); + filtered = filtered.filter((entry) => matcher(entry.name)); + } + + // Apply type filter + const type = options.type || "all"; + if (type === "files") { + filtered = filtered.filter((entry) => entry.type === "file"); + } else if (type === "directories") { + filtered = filtered.filter((entry) => entry.type === "directory"); + } + + // Apply sorting (directories always first, like Windows File Explorer) + const sort = options.sort || "name"; + const order = options.order || "asc"; + + filtered = filtered.toSorted((a, b) => { + // Directories always come first + if (a.type !== b.type) { + return a.type === "directory" ? -1 : 1; + } + + // Within same type, apply the requested sort + let comparison: number; + + if (sort === "lastModified") { + // lastModified is always available from parseUnicodeDirectory + comparison = (a.lastModified ?? 0) - (b.lastModified ?? 0); + } else { + // Natural name sorting (numeric aware) so 2.0.0 < 10.0.0 + comparison = a.name.localeCompare(b.name, undefined, { numeric: true, sensitivity: "base" }); + } + + return order === "desc" ? -comparison : comparison; + }); + + return filtered; } diff --git a/apps/api/src/routes/v1_files/$wildcard.ts b/apps/api/src/routes/v1_files/$wildcard.ts index 916017c45..c2deb1c16 100644 --- a/apps/api/src/routes/v1_files/$wildcard.ts +++ b/apps/api/src/routes/v1_files/$wildcard.ts @@ -1,123 +1,70 @@ +/* eslint-disable no-console */ import type { OpenAPIHono } from "@hono/zod-openapi"; +import type { Entry } from "apache-autoindex-parse"; import type { HonoEnv } from "../../types"; import { createRoute, z } from "@hono/zod-openapi"; import { dedent } from "@luxass/utils"; -import { createGlobMatcher, isValidGlobPattern } from "@ucdjs-internal/shared"; -import { DEFAULT_USER_AGENT, UCD_STAT_TYPE_HEADER } from "@ucdjs/env"; +import { isValidGlobPattern } from "@ucdjs-internal/shared"; +import { + DEFAULT_USER_AGENT, + UCD_STAT_CHILDREN_DIRS_HEADER, + UCD_STAT_CHILDREN_FILES_HEADER, + UCD_STAT_CHILDREN_HEADER, + UCD_STAT_SIZE_HEADER, + UCD_STAT_TYPE_HEADER, +} from "@ucdjs/env"; import { FileEntryListSchema } from "@ucdjs/schemas"; import { cache } from "hono/cache"; import { HTML_EXTENSIONS, MAX_AGE_ONE_WEEK_SECONDS } from "../../constants"; import { badGateway, badRequest, notFound } from "../../lib/errors"; -import { parseUnicodeDirectory } from "../../lib/files"; +import { applyDirectoryFiltersAndSort, parseUnicodeDirectory } from "../../lib/files"; import { generateReferences, OPENAPI_TAGS } from "../../openapi"; +import { + ORDER_QUERY_PARAM, + PATTERN_QUERY_PARAM, + QUERY_PARAM, + SORT_QUERY_PARAM, + TYPE_QUERY_PARAM, + WILDCARD_PARAM, +} from "./openapi-params"; import { determineContentTypeFromExtension, isInvalidPath } from "./utils"; -const WILDCARD_PARAM = { - in: "path", - name: "wildcard", - description: dedent` - The path to the Unicode data resource you want to access. This can be any valid path from the official Unicode Public directory structure. - - ## Path Format Options - - | Pattern | Description | Example | - |--------------------------------|--------------------------------|-------------------------------------| - | \`{version}/ucd/{filename}\` | UCD files for specific version | \`15.1.0/ucd/UnicodeData.txt\` | - | \`{version}/ucd/{sub}/{file}\` | Files in subdirectories | \`15.1.0/ucd/emoji/emoji-data.txt\` | - | \`{version}\` | List files for version | \`15.1.0\` | - | \`latest/ucd/{filename}\` | Latest version of file | \`latest/ucd/PropList.txt\` | - `, - required: true, - schema: { - type: "string", - pattern: ".*", - }, - examples: { - "UnicodeData.txt": { - summary: "UnicodeData.txt for Unicode 15.0.0", - value: "15.0.0/ucd/UnicodeData.txt", - }, - "emoji-data.txt": { - summary: "Emoji data file", - value: "15.1.0/ucd/emoji/emoji-data.txt", - }, - "root": { - summary: "Root path", - value: "", - }, - "list-version-dir": { - summary: "Versioned path", - value: "15.1.0", - }, - }, -} as const; - -const PATTERN_QUERY_PARAM = { - in: "query", - name: "pattern", - description: dedent` - A glob pattern to filter directory listing results by filename. Only applies when the response is a directory listing. - The matching is **case-insensitive**. - - ## Supported Glob Syntax - - | Pattern | Description | Example | - |-----------|-----------------------------------------------|------------------------------------------------------| - | \`*\` | Match any characters (except path separators) | \`*.txt\` matches \`file.txt\` | - | \`?\` | Match a single character | \`file?.txt\` matches \`file1.txt\` | - | \`{a,b}\` | Match any of the patterns | \`*.{txt,xml}\` matches \`file.txt\` or \`file.xml\` | - | \`[abc]\` | Match any character in the set | \`file[123].txt\` matches \`file1.txt\` | - - ## Examples - - - \`*.txt\` - Match all text files - - \`Uni*\` - Match files starting with "Uni" (e.g., UnicodeData.txt) - - \`*Data*\` - Match files containing "Data" - - \`*.{txt,xml}\` - Match text or XML files - `, - required: false, - schema: { - type: "string", - }, - examples: { - "txt-files": { - summary: "Match all .txt files", - value: "*.txt", - }, - "prefix-match": { - summary: "Match files starting with 'Uni'", - value: "Uni*", - }, - "contains-match": { - summary: "Match files containing 'Data'", - value: "*Data*", - }, - "multi-extension": { - summary: "Match .txt or .xml files", - value: "*.{txt,xml}", - }, - }, -} as const; - export const WILDCARD_ROUTE = createRoute({ method: "get", path: "/{wildcard}", tags: [OPENAPI_TAGS.FILES], - parameters: [WILDCARD_PARAM, PATTERN_QUERY_PARAM], + parameters: [ + WILDCARD_PARAM, + PATTERN_QUERY_PARAM, + QUERY_PARAM, + TYPE_QUERY_PARAM, + SORT_QUERY_PARAM, + ORDER_QUERY_PARAM, + ], description: dedent` - This endpoint proxies your request directly to Unicode.org, allowing you to access any file or directory under the Unicode Public directory structure with only slight [modifications](#tag/files/get/api/v1/files/{wildcard}/description/modifications). + This endpoint proxies requests to Unicode.org's Public directory, streaming files directly while transforming directory listings into structured JSON. + + All paths are relative to \`/api/v1/files\` — for example, requesting \`/api/v1/files/15.1.0/ucd/emoji/emoji-data.txt\` fetches the emoji data file from Unicode version 15.1.0. > [!IMPORTANT] - > The \`{wildcard}\` parameter can be any valid path, you are even allowed to use nested paths like \`15.1.0/ucd/emoji/emoji-data.txt\`. + > The \`{wildcard}\` parameter accepts any valid path, including deeply nested ones like \`15.1.0/ucd/emoji/emoji-data.txt\`. In directory listing responses, paths for directories include a trailing slash (e.g., \`/15.1.0/ucd/charts/\`), while file paths do not. > [!NOTE] - > If you wanna access only some metadata about the path, you can use a \`HEAD\` request instead. See [here](#tag/files/head/api/v1/files/{wildcard}) + > To retrieve only metadata without downloading content, use a \`HEAD\` request instead. See [here](#tag/files/head/api/v1/files/{wildcard}) + ### Directory Listing Features + + When accessing a directory, you can filter and sort entries using these query parameters: + + - \`query\` - Prefix-based search (case-insensitive) on entry names + - \`pattern\` - Glob pattern matching for filtering + - \`type\` - Filter by entry type: \`all\` (default), \`files\`, or \`directories\` + - \`sort\` - Sort by \`name\` (default) or \`lastModified\` + - \`order\` - Sort order: \`asc\` (default) or \`desc\` ### Modifications - We are doing a slight modification to the response, only if the response is for a directory. - If you request a directory, we will return a JSON listing of the files and subdirectories in that directory. + Directory responses are automatically transformed into JSON arrays containing file and directory entries. Files are streamed directly from Unicode.org with appropriate content types. `, responses: { 200: { @@ -131,6 +78,34 @@ export const WILDCARD_ROUTE = createRoute({ }, required: true, }, + [UCD_STAT_SIZE_HEADER]: { + description: "The size of the file in bytes (only for files)", + schema: { + type: "string", + }, + required: false, + }, + [UCD_STAT_CHILDREN_HEADER]: { + description: "Number of children (only for directories)", + schema: { + type: "string", + }, + required: false, + }, + [UCD_STAT_CHILDREN_FILES_HEADER]: { + description: "Number of child files (only for directories)", + schema: { + type: "string", + }, + required: false, + }, + [UCD_STAT_CHILDREN_DIRS_HEADER]: { + description: "Number of child directories (only for directories)", + schema: { + type: "string", + }, + required: false, + }, }, content: { "application/json": { @@ -142,13 +117,13 @@ export const WILDCARD_ROUTE = createRoute({ { type: "file", name: "ReadMe.txt", - path: "ReadMe.txt", + path: "/15.1.0/ucd/ReadMe.txt", lastModified: 1693213740000, }, { type: "directory", name: "charts", - path: "charts", + path: "/15.1.0/ucd/charts/", lastModified: 1697495340000, }, ], @@ -171,7 +146,7 @@ export const WILDCARD_ROUTE = createRoute({ 0004;;Cc;0;BN;;;;;N;END OF TRANSMISSION;;;; 0005;;Cc;0;BN;;;;;N;ENQUIRY;;;; 0006;;Cc;0;BN;;;;;N;ACKNOWLEDGE;;;; - `, + `.trim(), }, "15.1.0/ucd/emoji/emoji-data.txt": { summary: "Emoji data file for Unicode 15.1.0", @@ -180,7 +155,7 @@ export const WILDCARD_ROUTE = createRoute({ 2660 ; Emoji # E0.6 [1] (♠️) spade suit 2663 ; Emoji # E0.6 [1] (♣️) club suit 2665..2666 ; Emoji # E0.6 [2] (♥️..♦️) heart suit..diamond suit - `, + `.trim(), }, }, }, @@ -214,15 +189,21 @@ export const METADATA_WILDCARD_ROUTE = createRoute({ method: "head", path: "/{wildcard}", tags: [OPENAPI_TAGS.FILES], - parameters: [WILDCARD_PARAM], + parameters: [ + WILDCARD_PARAM, + PATTERN_QUERY_PARAM, + QUERY_PARAM, + TYPE_QUERY_PARAM, + SORT_QUERY_PARAM, + ORDER_QUERY_PARAM, + ], description: dedent` - This endpoint returns metadata about the requested file or directory without fetching the entire content. - It is useful for checking the existence of a file or directory and retrieving its metadata without downloading - the content. + Retrieve metadata about a file or directory without downloading the content. Useful for checking existence, file size, and other metadata. + + All paths are relative to \`/api/v1/files\`. Directory paths always include a trailing slash (e.g., \`/15.1.0/ucd/charts/\`), while file paths do not. > [!NOTE] - > The \`HEAD\` request will return the same headers as a \`GET\` request, but without the body. - > This means you can use it to check if a file exists or to get metadata like the last modified date, size, etc. + > This endpoint returns the same headers as the \`GET\` request (file size, directory entry counts, last modified timestamps, content type) without the response body. `, responses: { 200: { @@ -236,6 +217,34 @@ export const METADATA_WILDCARD_ROUTE = createRoute({ }, required: true, }, + [UCD_STAT_SIZE_HEADER]: { + description: "The size of the file in bytes (only for files)", + schema: { + type: "string", + }, + required: true, + }, + [UCD_STAT_CHILDREN_HEADER]: { + description: "Number of children (only for directories)", + schema: { + type: "string", + }, + required: false, + }, + [UCD_STAT_CHILDREN_FILES_HEADER]: { + description: "Number of child files (only for directories)", + schema: { + type: "string", + }, + required: false, + }, + [UCD_STAT_CHILDREN_DIRS_HEADER]: { + description: "Number of child directories (only for directories)", + schema: { + type: "string", + }, + required: false, + }, "Content-Type": { description: "The content type of the file", schema: { @@ -251,80 +260,109 @@ export const METADATA_WILDCARD_ROUTE = createRoute({ "Content-Length": { description: "Byte length when applicable", schema: { type: "string" }, - required: false, + required: true, }, }, }, }, }); +function buildDirectoryHeaders(files: Entry[], baseHeaders: Record): Record { + return { + ...baseHeaders, + [UCD_STAT_TYPE_HEADER]: "directory", + [UCD_STAT_CHILDREN_HEADER]: `${files.length}`, + [UCD_STAT_CHILDREN_FILES_HEADER]: `${files.filter((f) => f.type === "file").length}`, + [UCD_STAT_CHILDREN_DIRS_HEADER]: `${files.filter((f) => f.type === "directory").length}`, + }; +} + +function buildFileHeaders( + contentType: string, + baseHeaders: Record, + response: Response, + actualContentLength: number, +): Record { + const headers: Record = { + "Content-Type": contentType, + ...baseHeaders, + [UCD_STAT_TYPE_HEADER]: "file", + [UCD_STAT_SIZE_HEADER]: `${actualContentLength}`, + "Content-Length": `${actualContentLength}`, + }; + + const cd = response.headers.get("Content-Disposition"); + if (cd) headers["Content-Disposition"] = cd; + + return headers; +} + export function registerWildcardRoute(router: OpenAPIHono) { router.openAPIRegistry.registerPath(WILDCARD_ROUTE); router.openAPIRegistry.registerPath(METADATA_WILDCARD_ROUTE); - router.get("/:wildcard{.*}?", cache({ - cacheName: "ucdjs:v1_files:files", - cacheControl: `max-age=${MAX_AGE_ONE_WEEK_SECONDS}`, // 7 days - }), async (c) => { - const path = c.req.param("wildcard")?.trim() || ""; + router.get( + "/:wildcard{.*}?", + cache({ + cacheName: "ucdjs:v1_files:files", + cacheControl: `max-age=${MAX_AGE_ONE_WEEK_SECONDS}`, // 7 days + }), + async (c) => { + const path = c.req.param("wildcard")?.trim() || ""; + + // Validate path for path traversal attacks + if (isInvalidPath(path)) { + return badRequest({ + message: "Invalid path", + }); + } - // Validate path for path traversal attacks - if (isInvalidPath(path)) { - return badRequest({ - message: "Invalid path", - }); - } + const normalizedPath = path.replace(/^\/+|\/+$/g, ""); + const url = normalizedPath + ? `https://unicode.org/Public/${normalizedPath}?F=2` + : "https://unicode.org/Public?F=2"; - const normalizedPath = path.replace(/^\/+|\/+$/g, ""); - const url = normalizedPath - ? `https://unicode.org/Public/${normalizedPath}?F=2` - : "https://unicode.org/Public?F=2"; + console.info(`[v1_files]: fetching file at ${url}`); - // eslint-disable-next-line no-console - console.info(`[v1_files]: fetching file at ${url}`); + const response = await fetch(url, { + method: "GET", + headers: { + "User-Agent": DEFAULT_USER_AGENT, + }, + }); - const response = await fetch(url, { - method: "GET", - headers: { - "User-Agent": DEFAULT_USER_AGENT, - }, - }); + if (!response.ok) { + if (response.status === 404) { + return notFound(c, { + message: "Resource not found", + }); + } - if (!response.ok) { - if (response.status === 404) { - return notFound(c, { - message: "Resource not found", - }); + return badGateway(c); } - return badGateway(c); - } - - let contentType = response.headers.get("content-type") || ""; - const lastModified = response.headers.get("Last-Modified") || undefined; - const upstreamContentLength = response.headers.get("Content-Length") || undefined; - const baseHeaders: Record = {}; - if (lastModified) baseHeaders["Last-Modified"] = lastModified; - - const leaf = normalizedPath.split("/").pop() ?? ""; - const extName = leaf.includes(".") ? leaf.split(".").pop()!.toLowerCase() : ""; - const isHtmlFile = HTML_EXTENSIONS.includes(`.${extName}`); - - // check if this is a directory listing (HTML response for non-HTML files) - const isDirectoryListing = contentType.includes("text/html") && !isHtmlFile; - - // eslint-disable-next-line no-console - console.info(`[v1_files]: fetched content type: ${contentType} for .${extName} file`); - if (isDirectoryListing) { - const html = await response.text(); - let files = await parseUnicodeDirectory(html); - - // Apply pattern filter if provided - const pattern = c.req.query("pattern"); - if (pattern) { - // eslint-disable-next-line no-console - console.info(`[v1_files]: applying glob pattern filter: ${pattern}`); - if (!isValidGlobPattern(pattern, { + let contentType = response.headers.get("content-type") || ""; + const lastModified = response.headers.get("Last-Modified") || undefined; + const baseHeaders: Record = {}; + if (lastModified) baseHeaders["Last-Modified"] = lastModified; + + const leaf = normalizedPath.split("/").pop() ?? ""; + const extName = leaf.includes(".") ? leaf.split(".").pop()!.toLowerCase() : ""; + const isHtmlFile = HTML_EXTENSIONS.includes(`.${extName}`); + + // check if this is a directory listing (HTML response for non-HTML files) + const isDirectoryListing = contentType.includes("text/html") && !isHtmlFile; + + console.info(`[v1_files]: fetched content type: ${contentType} for .${extName} file`); + if (isDirectoryListing) { + const html = await response.text(); + const parsedFiles = await parseUnicodeDirectory(html, normalizedPath || "/"); + + // Get query parameters for filtering and sorting + const pattern = c.req.query("pattern"); + + // Validate glob pattern before applying + if (pattern && !isValidGlobPattern(pattern, { maxLength: 128, maxSegments: 8, maxBraceExpansions: 8, @@ -336,35 +374,46 @@ export function registerWildcardRoute(router: OpenAPIHono) { }); } - const matcher = createGlobMatcher(pattern); - files = files.filter((entry) => matcher(entry.name)); + const files = applyDirectoryFiltersAndSort(parsedFiles, { + query: c.req.query("query"), + pattern, + type: c.req.query("type"), + sort: c.req.query("sort"), + order: c.req.query("order"), + }); + + const headers = buildDirectoryHeaders(files, baseHeaders); + return c.json(files, 200, headers); + } + + // Handle file response + console.log(`[v1_files]: pre content type check: ${contentType} for .${extName} file`); + contentType ||= determineContentTypeFromExtension(extName); + console.log(`[v1_files]: inferred content type as ${contentType} for .${extName} file`); + + const isHeadRequest = c.req.method === "HEAD"; + + // For HEAD requests, buffer to calculate accurate size + if (isHeadRequest) { + const blob = await response.blob(); + const actualSize = blob.size; + const headers = buildFileHeaders(contentType, baseHeaders, response, actualSize); + console.log(`[v1_files]: HEAD request, calculated size: ${actualSize}`); + return c.newResponse(null, 200, headers); } - return c.json(files, 200, { + const headers: Record = { + "Content-Type": contentType, ...baseHeaders, + [UCD_STAT_TYPE_HEADER]: "file", + }; - // Custom STAT Headers - [UCD_STAT_TYPE_HEADER]: "directory", - }); - } - - // eslint-disable-next-line no-console - console.log(`[v1_files]: pre content type check: ${contentType} for .${extName} file`); - contentType ||= determineContentTypeFromExtension(extName); - // eslint-disable-next-line no-console - console.log(`[v1_files]: inferred content type as ${contentType} for .${extName} file`); - - const headers: Record = { - "Content-Type": contentType, - ...baseHeaders, - - // Custom STAT Headers - [UCD_STAT_TYPE_HEADER]: "file", - }; - - const cd = response.headers.get("Content-Disposition"); - if (cd) headers["Content-Disposition"] = cd; - if (upstreamContentLength) headers["Content-Length"] = upstreamContentLength; - return c.newResponse(response.body!, 200, headers); - }); + const cd = response.headers.get("Content-Disposition"); + if (cd) headers["Content-Disposition"] = cd; + + console.log(`[v1_files]: binary file, streaming without buffering`); + + return c.newResponse(response.body, 200, headers); + }, + ); } diff --git a/apps/api/src/routes/v1_files/openapi-params.ts b/apps/api/src/routes/v1_files/openapi-params.ts new file mode 100644 index 000000000..8f5a4a028 --- /dev/null +++ b/apps/api/src/routes/v1_files/openapi-params.ts @@ -0,0 +1,202 @@ +import { dedent } from "@luxass/utils"; + +export const WILDCARD_PARAM = { + in: "path", + name: "wildcard", + description: dedent` + The path to the Unicode data resource you want to access. This can be any valid path from the official Unicode Public directory structure. + + ## Path Format Options + + | Pattern | Description | Example | + |--------------------------------|--------------------------------|-------------------------------------| + | \`{version}/ucd/{filename}\` | UCD files for specific version | \`15.1.0/ucd/UnicodeData.txt\` | + | \`{version}/ucd/{sub}/{file}\` | Files in subdirectories | \`15.1.0/ucd/emoji/emoji-data.txt\` | + | \`{version}\` | List files for version | \`15.1.0\` | + | \`latest/ucd/{filename}\` | Latest version of file | \`latest/ucd/PropList.txt\` | + `, + required: true, + schema: { + type: "string", + pattern: ".*", + }, + examples: { + "UnicodeData.txt": { + summary: "UnicodeData.txt for Unicode 15.0.0", + value: "15.0.0/ucd/UnicodeData.txt", + }, + "emoji-data.txt": { + summary: "Emoji data file", + value: "15.1.0/ucd/emoji/emoji-data.txt", + }, + "root": { + summary: "Root path", + value: "", + }, + "list-version-dir": { + summary: "Versioned path", + value: "15.1.0", + }, + }, +} as const; + +export const PATTERN_QUERY_PARAM = { + in: "query", + name: "pattern", + description: dedent` + A glob pattern to filter directory listing results by filename. Only applies when the response is a directory listing. + The matching is **case-insensitive**. + + ## Supported Glob Syntax + + | Pattern | Description | Example | + |-----------|-----------------------------------------------|------------------------------------------------------| + | \`*\` | Match any characters (except path separators) | \`*.txt\` matches \`file.txt\` | + | \`?\` | Match a single character | \`file?.txt\` matches \`file1.txt\` | + | \`{a,b}\` | Match any of the patterns | \`*.{txt,xml}\` matches \`file.txt\` or \`file.xml\` | + | \`[abc]\` | Match any character in the set | \`file[123].txt\` matches \`file1.txt\` | + + ## Examples + + - \`*.txt\` - Match all text files + - \`Uni*\` - Match files starting with "Uni" (e.g., UnicodeData.txt) + - \`*Data*\` - Match files containing "Data" + - \`*.{txt,xml}\` - Match text or XML files + `, + required: false, + schema: { + type: "string", + }, + examples: { + "txt-files": { + summary: "Match all .txt files", + value: "*.txt", + }, + "prefix-match": { + summary: "Match files starting with 'Uni'", + value: "Uni*", + }, + "contains-match": { + summary: "Match files containing 'Data'", + value: "*Data*", + }, + "multi-extension": { + summary: "Match .txt or .xml files", + value: "*.{txt,xml}", + }, + }, +} as const; + +export const QUERY_PARAM = { + in: "query", + name: "query", + description: dedent` + A search query to filter directory listing results. Entries are matched if their name **starts with** this value (case-insensitive). + This is useful for quick prefix-based searching within a directory. + + ## Examples + + - \`Uni\` - Match entries starting with "Uni" (e.g., UnicodeData.txt) + - \`15\` - Match version directories starting with "15" + `, + required: false, + schema: { + type: "string", + }, + examples: { + "unicode-prefix": { + summary: "Search for entries starting with 'Uni'", + value: "Uni", + }, + "version-prefix": { + summary: "Search for version directories", + value: "15", + }, + }, +} as const; + +export const TYPE_QUERY_PARAM = { + in: "query", + name: "type", + description: dedent` + Filter directory listing results by entry type. + + - \`all\` (default) - Return both files and directories + - \`files\` - Return only files + - \`directories\` - Return only directories + `, + required: false, + schema: { + type: "string", + enum: ["all", "files", "directories"] as string[], + default: "all", + }, + examples: { + "all": { + summary: "Show all entries (default)", + value: "all", + }, + "files-only": { + summary: "Show only files", + value: "files", + }, + "directories-only": { + summary: "Show only directories", + value: "directories", + }, + }, +} as const; + +export const SORT_QUERY_PARAM = { + in: "query", + name: "sort", + description: dedent` + The field to sort directory listing results by. + + - \`name\` (default) - Sort alphabetically by entry name + - \`lastModified\` - Sort by last modification timestamp + `, + required: false, + schema: { + type: "string", + enum: ["name", "lastModified"] as string[], + default: "name", + }, + examples: { + "by-name": { + summary: "Sort by name (default)", + value: "name", + }, + "by-date": { + summary: "Sort by last modified date", + value: "lastModified", + }, + }, +} as const; + +export const ORDER_QUERY_PARAM = { + in: "query", + name: "order", + description: dedent` + The sort order for directory listing results. + + - \`asc\` (default) - Ascending order (A-Z, oldest first) + - \`desc\` - Descending order (Z-A, newest first) + `, + required: false, + schema: { + type: "string", + enum: ["asc", "desc"] as string[], + default: "asc", + }, + examples: { + ascending: { + summary: "Ascending order (default)", + value: "asc", + }, + descending: { + summary: "Descending order", + value: "desc", + }, + }, +} as const; diff --git a/apps/api/src/routes/v1_files/router.ts b/apps/api/src/routes/v1_files/router.ts index 00958a3c7..414dcaea3 100644 --- a/apps/api/src/routes/v1_files/router.ts +++ b/apps/api/src/routes/v1_files/router.ts @@ -2,10 +2,7 @@ import type { HonoEnv } from "../../types"; import { OpenAPIHono } from "@hono/zod-openapi"; import { V1_FILES_ROUTER_BASE_PATH } from "../../constants"; import { registerWildcardRoute } from "./$wildcard"; -import { registerSearchRoute } from "./search"; export const V1_FILES_ROUTER = new OpenAPIHono().basePath(V1_FILES_ROUTER_BASE_PATH); -// Search endpoint - must be registered BEFORE the wildcard route -registerSearchRoute(V1_FILES_ROUTER); registerWildcardRoute(V1_FILES_ROUTER); diff --git a/apps/api/src/routes/v1_files/search.ts b/apps/api/src/routes/v1_files/search.ts deleted file mode 100644 index 3299e1e1b..000000000 --- a/apps/api/src/routes/v1_files/search.ts +++ /dev/null @@ -1,209 +0,0 @@ -import type { OpenAPIHono } from "@hono/zod-openapi"; -import type { HonoEnv } from "../../types"; -import { createRoute } from "@hono/zod-openapi"; -import { dedent } from "@luxass/utils"; -import { DEFAULT_USER_AGENT } from "@ucdjs/env"; -import { FileEntryListSchema } from "@ucdjs/schemas"; -import { cache } from "hono/cache"; -import { MAX_AGE_ONE_WEEK_SECONDS } from "../../constants"; -import { badGateway, badRequest } from "../../lib/errors"; -import { parseUnicodeDirectory } from "../../lib/files"; -import { generateReferences, OPENAPI_TAGS } from "../../openapi"; -import { isInvalidPath } from "./utils"; - -const SEARCH_ROUTE_DOCS = dedent` - Search for files and directories within a path. This endpoint performs a **prefix-based search** on entry names. - - ## Search Behavior - - The search is **case-insensitive** and matches entries where the name **starts with** the query string. - - Results are sorted with **files first**, followed by **directories**. This prioritization means: - - If your query matches both files and directories, files appear first - - Within each group (files/directories), results maintain their original order - - ## Example - - Given a directory with: - - \`come/\` (directory) - - \`computer.txt\` (file) - - | Query | Result | - |----------|------------------------------------------------| - | \`com\` | \`computer.txt\` (file), \`come/\` (directory) | - | \`come\` | \`come/\` (exact directory match) | - | \`comp\` | \`computer.txt\` | - - > [!NOTE] - > If no entries match the query, an empty array is returned with a 200 status. -`; - -const SEARCH_QUERY_PARAM_DOCS = dedent` - The search query string. Entries are matched if their name **starts with** this value (case-insensitive). -`; - -const SEARCH_PATH_PARAM_DOCS = dedent` - The base path to search within. If not provided, searches from the root of the Unicode Public directory. -`; - -export const SEARCH_ROUTE = createRoute({ - method: "get", - path: "/search", - tags: [OPENAPI_TAGS.FILES], - middleware: [ - cache({ - cacheName: "ucdjs:v1_files:search", - cacheControl: `max-age=${MAX_AGE_ONE_WEEK_SECONDS}`, // 7 days - }), - ], - parameters: [ - { - in: "query", - name: "q", - description: SEARCH_QUERY_PARAM_DOCS, - required: true, - schema: { - type: "string", - minLength: 1, - }, - examples: { - "unicode-prefix": { - summary: "Search for entries starting with 'uni'", - value: "uni", - }, - "version-prefix": { - summary: "Search for version directories", - value: "15", - }, - }, - }, - { - in: "query", - name: "path", - description: SEARCH_PATH_PARAM_DOCS, - required: false, - schema: { - type: "string", - }, - examples: { - "root": { - summary: "Search from root", - value: "", - }, - "ucd-dir": { - summary: "Search within UCD directory", - value: "15.1.0/ucd", - }, - }, - }, - ], - description: SEARCH_ROUTE_DOCS, - responses: { - 200: { - description: "Search results sorted with files first, then directories", - content: { - "application/json": { - schema: FileEntryListSchema, - examples: { - "files-first": { - summary: "Files appear before directories", - value: [ - { - type: "file", - name: "computer.txt", - path: "computer.txt", - lastModified: 1693213740000, - }, - { - type: "directory", - name: "come", - path: "come", - lastModified: 1697495340000, - }, - ], - }, - "empty-results": { - summary: "No matching entries", - value: [], - }, - }, - }, - }, - }, - ...(generateReferences([ - 400, - 500, - 502, - ])), - }, -}); - -export function registerSearchRoute(router: OpenAPIHono) { - router.openapi(SEARCH_ROUTE, async (c) => { - const query = c.req.query("q"); - const basePath = c.req.query("path") || ""; - - if (!query) { - return badRequest({ - message: "Missing required query parameter: q", - }); - } - - // Validate basePath for path traversal attacks - if (isInvalidPath(basePath)) { - return badRequest({ - message: "Invalid path", - }); - } - - const normalizedPath = basePath.replace(/^\/+|\/+$/g, ""); - const url = normalizedPath - ? `https://unicode.org/Public/${normalizedPath}?F=2` - : "https://unicode.org/Public?F=2"; - - // eslint-disable-next-line no-console - console.info(`[v1_files:search]: fetching directory at ${url}`); - - const response = await fetch(url, { - method: "GET", - headers: { - "User-Agent": DEFAULT_USER_AGENT, - }, - }); - - if (!response.ok) { - if (response.status === 404) { - // Return empty array if the base path doesn't exist - return c.json([], 200); - } - return badGateway(c); - } - - const contentType = response.headers.get("content-type") || ""; - - // If not a directory listing, return empty results - if (!contentType.includes("text/html")) { - return c.json([], 200); - } - - const html = await response.text(); - const entries = await parseUnicodeDirectory(html); - - // Filter entries where name starts with query (case-insensitive) - const queryLower = query.toLowerCase(); - const matchingEntries = entries.filter((entry) => - entry.name.toLowerCase().startsWith(queryLower), - ); - - // Sort: files first, then directories - const sortedEntries = matchingEntries.toSorted((a, b) => { - // Files before directories - if (a.type === "file" && b.type === "directory") return -1; - if (a.type === "directory" && b.type === "file") return 1; - // Maintain original order within same type - return 0; - }); - - return c.json(sortedEntries, 200); - }); -} diff --git a/apps/api/test/routes/v1_files/$wildcard.test.ts b/apps/api/test/routes/v1_files/$wildcard.test.ts index 4864c6541..feca8c470 100644 --- a/apps/api/test/routes/v1_files/$wildcard.test.ts +++ b/apps/api/test/routes/v1_files/$wildcard.test.ts @@ -1,16 +1,12 @@ +/// + +import type { FileEntryList } from "@ucdjs/schemas"; import { HttpResponse, mockFetch, RawResponse } from "#test-utils/msw"; -import { UCD_STAT_TYPE_HEADER } from "@ucdjs/env"; +import { UCD_STAT_SIZE_HEADER, UCD_STAT_TYPE_HEADER } from "@ucdjs/env"; import { generateAutoIndexHtml } from "apache-autoindex-parse/test-utils"; import { env } from "cloudflare:workers"; import { describe, expect, it } from "vitest"; import { executeRequest } from "../../helpers/request"; -import { - expectApiError, - expectCacheHeaders, - expectContentType, - expectHeadError, - expectSuccess, -} from "../../helpers/response"; describe("v1_files", () => { // eslint-disable-next-line test/prefer-lowercase-title @@ -35,9 +31,47 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - expectContentType(response, "text/plain; charset=utf-8"); - expectCacheHeaders(response); + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "text/plain; charset=utf-8", + }, + cache: true, + }); + + const content = await text(); + expect(content).toBe(mockFileContent); + }); + + it("should not forward content-length for streamed GET responses", async () => { + const mockFileContent = "Plain text content"; + + mockFetch([ + ["GET", "https://unicode.org/Public/15.1.0/ucd/ReadMe.txt", () => { + return HttpResponse.text(mockFileContent, { + headers: { + "content-type": "text/plain; charset=utf-8", + "content-length": mockFileContent.length.toString(), + }, + }); + }], + ]); + + const { response, text } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files/15.1.0/ucd/ReadMe.txt"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "text/plain; charset=utf-8", + }, + cache: true, + }); + + expect(response.headers.has("Content-Length")).toBe(false); + expect(response.headers.has(UCD_STAT_SIZE_HEADER)).toBe(false); const content = await text(); expect(content).toBe(mockFileContent); @@ -51,7 +85,10 @@ describe("v1_files", () => { env, ); - await expectApiError(response, { status: 400, message: "Invalid path" }); + expect(response).toBeApiError({ + status: 400, + message: "Invalid path", + }); }); it("should reject paths with '//' segments", async () => { @@ -60,7 +97,10 @@ describe("v1_files", () => { env, ); - await expectApiError(response, { status: 400, message: "Invalid path" }); + expect(response).toBeApiError({ + status: 400, + message: "Invalid path", + }); }); }); @@ -77,7 +117,7 @@ describe("v1_files", () => { env, ); - await expectApiError(response, { status: 404, message: "Resource not found" }); + expect(response).toBeApiError({ status: 404, message: "Resource not found" }); }); it("should handle 502 from unicode.org", async () => { @@ -92,7 +132,7 @@ describe("v1_files", () => { env, ); - await expectApiError(response, { status: 502, message: "Bad Gateway" }); + expect(response).toBeApiError({ status: 502, message: "Bad Gateway" }); }); }); @@ -123,9 +163,13 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - expectContentType(response, "application/octet-stream"); - expectCacheHeaders(response); + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "application/octet-stream", + }, + cache: true, + }); }); it("should infer content-type from .txt when upstream omits it", async () => { @@ -147,9 +191,13 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - expectContentType(response, "text/plain"); - expectCacheHeaders(response); + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "text/plain", + }, + cache: true, + }); const content = await text(); expect(content).toBe(mockContent); @@ -173,9 +221,13 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - expectContentType(response, "application/xml"); - expectCacheHeaders(response); + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "application/xml", + }, + cache: true, + }); const content = await text(); expect(content).toBe(mockContent); @@ -199,9 +251,13 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - expectContentType(response, "application/octet-stream"); - expectCacheHeaders(response); + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "application/octet-stream", + }, + cache: true, + }); const content = await text(); expect(content).toBe(mockContent); @@ -211,10 +267,10 @@ describe("v1_files", () => { describe("pattern filter", () => { it("should filter directory listing by glob pattern *.txt", async () => { const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/15.1.0/ucd/UnicodeData.txt", type: "file", lastModified: Date.now() }, - { name: "Blocks.txt", path: "/Public/15.1.0/ucd/Blocks.txt", type: "file", lastModified: Date.now() }, - { name: "emoji", path: "/Public/15.1.0/ucd/emoji", type: "directory", lastModified: Date.now() }, - { name: "data.xml", path: "/Public/15.1.0/ucd/data.xml", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, + { name: "emoji", path: "emoji/", type: "directory", lastModified: Date.now() }, + { name: "data.xml", path: "data.xml", type: "file", lastModified: Date.now() }, ], "F2"); mockFetch([ @@ -230,17 +286,20 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - const files = await json() as { name: string }[]; + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const files = await json(); expect(files).toHaveLength(2); - expect(files.map((f) => f.name)).toEqual(["UnicodeData.txt", "Blocks.txt"]); + expect(files.map((f) => f.name)).toEqual(["Blocks.txt", "UnicodeData.txt"]); }); it("should filter directory listing by prefix pattern Uni*", async () => { const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/15.1.0/ucd/UnicodeData.txt", type: "file", lastModified: Date.now() }, - { name: "Unihan.zip", path: "/Public/15.1.0/ucd/Unihan.zip", type: "file", lastModified: Date.now() }, - { name: "Blocks.txt", path: "/Public/15.1.0/ucd/Blocks.txt", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "Unihan.zip", path: "Unihan.zip", type: "file", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, ], "F2"); mockFetch([ @@ -256,16 +315,19 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - const files = await json() as { name: string }[]; + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const files = await json(); expect(files).toHaveLength(2); expect(files.map((f) => f.name)).toEqual(["UnicodeData.txt", "Unihan.zip"]); }); it("should filter case-insensitively", async () => { const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/15.1.0/ucd/UnicodeData.txt", type: "file", lastModified: Date.now() }, - { name: "Blocks.txt", path: "/Public/15.1.0/ucd/Blocks.txt", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, ], "F2"); mockFetch([ @@ -281,16 +343,20 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - const files = await json() as { name: string }[]; + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + + const files = await json(); expect(files).toHaveLength(1); expect(files[0]!.name).toBe("UnicodeData.txt"); }); it("should return empty array when no matches", async () => { const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/15.1.0/ucd/UnicodeData.txt", type: "file", lastModified: Date.now() }, - { name: "Blocks.txt", path: "/Public/15.1.0/ucd/Blocks.txt", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, ], "F2"); mockFetch([ @@ -306,16 +372,19 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - const files = await json() as { name: string }[]; + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const files = await json(); expect(files).toEqual([]); }); it("should support multi-extension pattern *.{txt,xml}", async () => { const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/15.1.0/ucd/UnicodeData.txt", type: "file", lastModified: Date.now() }, - { name: "ucd.all.flat.xml", path: "/Public/15.1.0/ucd/ucd.all.flat.xml", type: "file", lastModified: Date.now() }, - { name: "Unihan.zip", path: "/Public/15.1.0/ucd/Unihan.zip", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "ucd.all.flat.xml", path: "ucd.all.flat.xml", type: "file", lastModified: Date.now() }, + { name: "Unihan.zip", path: "Unihan.zip", type: "file", lastModified: Date.now() }, ], "F2"); mockFetch([ @@ -331,17 +400,20 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - const files = await json() as { name: string }[]; + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const files = await json(); expect(files).toHaveLength(2); - expect(files.map((f) => f.name)).toEqual(["UnicodeData.txt", "ucd.all.flat.xml"]); + expect(files.map((f) => f.name)).toEqual(["ucd.all.flat.xml", "UnicodeData.txt"]); }); - it("should support substring pattern *Data*", async () => { + it("should support substring pattern *Data* (case-insensitive)", async () => { const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/15.1.0/ucd/UnicodeData.txt", type: "file", lastModified: Date.now() }, - { name: "emoji-data.txt", path: "/Public/15.1.0/ucd/emoji-data.txt", type: "file", lastModified: Date.now() }, - { name: "Blocks.txt", path: "/Public/15.1.0/ucd/Blocks.txt", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "emoji-data.txt", path: "emoji-data.txt", type: "file", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, ], "F2"); mockFetch([ @@ -357,10 +429,13 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - const files = await json() as { name: string }[]; + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const files = await json(); expect(files).toHaveLength(2); - expect(files.map((f) => f.name)).toEqual(["UnicodeData.txt", "emoji-data.txt"]); + expect(files.map((f) => f.name)).toEqual(["emoji-data.txt", "UnicodeData.txt"]); }); it("should not apply pattern filter for file requests", async () => { @@ -380,15 +455,20 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - expectContentType(response, "text/plain; charset=utf-8"); + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "text/plain; charset=utf-8", + }, + }); + const content = await text(); expect(content).toBe(mockFileContent); }); it("should return 200 for empty pattern", async () => { const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/15.1.0/ucd/UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, ], "F2"); mockFetch([ @@ -405,18 +485,494 @@ describe("v1_files", () => { env, ); - expectSuccess(response); + expect(response).toMatchResponse({ + status: 200, + json: true, + }); const result = await json(); expect(result).toEqual([ { lastModified: expect.any(Number), name: "UnicodeData.txt", - path: "/Public/15.1.0/ucd/UnicodeData.txt", + path: "/15.1.0/ucd/UnicodeData.txt", type: "file", }, ]); }); }); + + describe("query filter (prefix search)", () => { + it("should filter entries by prefix", async () => { + const html = generateAutoIndexHtml([ + { name: "come", path: "come/", type: "directory", lastModified: Date.now() }, + { name: "computer.txt", path: "computer.txt", type: "file", lastModified: Date.now() }, + { name: "other.txt", path: "other.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?query=com"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + expect(results).toHaveLength(2); + expect(results.map((r) => r.name)).toContain("come"); + expect(results.map((r) => r.name)).toContain("computer.txt"); + }); + + it("should search case-insensitively", async () => { + const html = generateAutoIndexHtml([ + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?query=unicode"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + expect(results).toHaveLength(1); + expect(results[0]!.name).toBe("UnicodeData.txt"); + }); + + it("should search within a specific path", async () => { + const html = generateAutoIndexHtml([ + { name: "emoji-data.txt", path: "emoji-data.txt", type: "file", lastModified: Date.now() }, + { name: "emoji-sequences.txt", path: "emoji-sequences.txt", type: "file", lastModified: Date.now() }, + { name: "other.txt", path: "other.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public/15.1.0/ucd/emoji", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files/15.1.0/ucd/emoji?query=emoji"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + expect(results).toHaveLength(2); + expect(results.map((r) => r.name)).toEqual(["emoji-data.txt", "emoji-sequences.txt"]); + }); + + it("should return empty array when no matches found", async () => { + const html = generateAutoIndexHtml([ + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?query=nonexistent"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + expect(results).toEqual([]); + }); + + it("should match exact entry name when query matches exactly", async () => { + const html = generateAutoIndexHtml([ + { name: "come", path: "come/", type: "directory", lastModified: Date.now() }, + { name: "computer.txt", path: "computer.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?query=come"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + // Only the directory matches exactly + expect(results).toHaveLength(1); + expect(results[0]).toMatchObject({ name: "come", type: "directory" }); + }); + + it("should combine query with pattern filter", async () => { + const html = generateAutoIndexHtml([ + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "Unicode.zip", path: "Unicode.zip", type: "file", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?query=Uni&pattern=*.txt"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + expect(results).toHaveLength(1); + expect(results[0]!.name).toBe("UnicodeData.txt"); + }); + }); + + describe("type filter", () => { + it("should return only files when type=files", async () => { + const html = generateAutoIndexHtml([ + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "emoji", path: "emoji/", type: "directory", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?type=files"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + expect(results).toHaveLength(2); + expect(results.every((r) => r.type === "file")).toBe(true); + }); + + it("should return only directories when type=directories", async () => { + const html = generateAutoIndexHtml([ + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "emoji", path: "emoji/", type: "directory", lastModified: Date.now() }, + { name: "charts", path: "charts/", type: "directory", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?type=directories"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + expect(results).toHaveLength(2); + expect(results.every((r) => r.type === "directory")).toBe(true); + }); + + it("should return all entries when type=all", async () => { + const html = generateAutoIndexHtml([ + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "emoji", path: "emoji/", type: "directory", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?type=all"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + expect(results).toHaveLength(2); + }); + + it("should combine type with query filter", async () => { + const html = generateAutoIndexHtml([ + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "Unicode", path: "Unicode/", type: "directory", lastModified: Date.now() }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?query=Uni&type=files"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + expect(results).toHaveLength(1); + expect(results[0]).toMatchObject({ name: "UnicodeData.txt", type: "file" }); + }); + }); + + describe("sort and order", () => { + it("should sort by name ascending by default", async () => { + const html = generateAutoIndexHtml([ + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "ArabicShaping.txt", path: "ArabicShaping.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + expect(results.map((r) => r.name)).toEqual([ + "ArabicShaping.txt", + "Blocks.txt", + "UnicodeData.txt", + ]); + }); + + it("should sort by name descending when order=desc", async () => { + const html = generateAutoIndexHtml([ + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "ArabicShaping.txt", path: "ArabicShaping.txt", type: "file", lastModified: Date.now() }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?sort=name&order=desc"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + expect(results.map((r) => r.name)).toEqual([ + "UnicodeData.txt", + "Blocks.txt", + "ArabicShaping.txt", + ]); + }); + + it("should sort by lastModified ascending", async () => { + const now = Date.now(); + const html = generateAutoIndexHtml([ + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: now - 2000 }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: now - 1000 }, + { name: "ArabicShaping.txt", path: "ArabicShaping.txt", type: "file", lastModified: now - 3000 }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?sort=lastModified&order=asc"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + // Check all entries returned and have lastModified + expect(results).toHaveLength(3); + expect(results.every((r) => typeof r.lastModified === "number")).toBe(true); + + // Verify sorted by lastModified ascending (oldest first) + for (let i = 1; i < results.length; i++) { + expect(results[i]!.lastModified!).toBeGreaterThanOrEqual(results[i - 1]!.lastModified!); + } + }); + + it("should sort by lastModified descending", async () => { + const now = Date.now(); + const html = generateAutoIndexHtml([ + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: now - 2000 }, + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: now - 1000 }, + { name: "ArabicShaping.txt", path: "ArabicShaping.txt", type: "file", lastModified: now - 3000 }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?sort=lastModified&order=desc"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + + // Check all entries returned and have lastModified + expect(results).toHaveLength(3); + expect(results.every((r) => typeof r.lastModified === "number")).toBe(true); + + // Verify sorted by lastModified descending (newest first) + for (let i = 1; i < results.length; i++) { + expect(results[i]!.lastModified!).toBeLessThanOrEqual(results[i - 1]!.lastModified!); + } + }); + + it("should combine sort with filters", async () => { + const now = Date.now(); + const html = generateAutoIndexHtml([ + { name: "UnicodeData.txt", path: "UnicodeData.txt", type: "file", lastModified: now - 1000 }, + { name: "Unihan.zip", path: "Unihan.zip", type: "file", lastModified: now - 3000 }, + { name: "Blocks.txt", path: "Blocks.txt", type: "file", lastModified: now - 2000 }, + ], "F2"); + + mockFetch([ + ["GET", "https://unicode.org/Public", () => { + return HttpResponse.text(html, { + headers: { "content-type": "text/html; charset=utf-8" }, + }); + }], + ]); + + const { response, json } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files?query=Uni&sort=lastModified&order=desc"), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + json: true, + }); + const results = await json(); + expect(results).toHaveLength(2); + expect(results.map((r) => r.name)).toEqual([ + "UnicodeData.txt", + "Unihan.zip", + ]); + }); + }); }); // eslint-disable-next-line test/prefer-lowercase-title @@ -443,15 +999,53 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - expectContentType(response, "text/plain; charset=utf-8"); - expectCacheHeaders(response); + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "text/plain; charset=utf-8", + }, + cache: true, + }); + }); + + it("should include size headers for HEAD file requests", async () => { + const mockFileContent = "Head response content"; + + mockFetch([ + ["GET", "https://unicode.org/Public/sample/file.txt", () => { + return HttpResponse.text(mockFileContent, { + headers: { + "content-type": "text/plain; charset=utf-8", + "content-length": mockFileContent.length.toString(), + }, + }); + }], + ]); + + const { response } = await executeRequest( + new Request("https://api.ucdjs.dev/api/v1/files/sample/file.txt", { + method: "HEAD", + }), + env, + ); + + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "text/plain; charset=utf-8", + }, + cache: true, + }); + + expect(response.headers.get("Content-Length")).toBe(`${mockFileContent.length}`); + expect(response.headers.get(UCD_STAT_SIZE_HEADER)).toBe(`${mockFileContent.length}`); + expect(response.headers.get(UCD_STAT_TYPE_HEADER)).toBe("file"); }); it("should handle HEAD requests for directories", async () => { const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/15.1.0/ucd/UnicodeData.txt", type: "file", lastModified: Date.now() }, - { name: "Blocks.txt", path: "/Public/15.1.0/ucd/Blocks.txt", type: "file", lastModified: Date.now() }, + { name: "UnicodeData.txt", path: "15.1.0/ucd/UnicodeData.txt", type: "file", lastModified: Date.now() }, + { name: "Blocks.txt", path: "15.1.0/ucd/Blocks.txt", type: "file", lastModified: Date.now() }, ], "F2"); mockFetch([ @@ -472,9 +1066,11 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - expectContentType(response, "application/json"); - expectCacheHeaders(response); + expect(response).toMatchResponse({ + status: 200, + json: true, + cache: true, + }); expect(response.headers.get(UCD_STAT_TYPE_HEADER)).toBe("directory"); expect(response.headers.get("content-length")).toBeDefined(); expect(response.headers.get("last-modified")).toBeDefined(); @@ -490,7 +1086,7 @@ describe("v1_files", () => { env, ); - expectHeadError(response, 400); + expect(response).toBeHeadError(400); }); it("should handle HEAD requests with '//' segments", async () => { @@ -501,7 +1097,7 @@ describe("v1_files", () => { env, ); - expectHeadError(response, 400); + expect(response).toBeHeadError(400); }); }); @@ -520,7 +1116,7 @@ describe("v1_files", () => { env, ); - expectHeadError(response, 404); + expect(response).toBeHeadError(404); }); it("should handle HEAD requests with 502 from unicode.org", async () => { @@ -539,7 +1135,7 @@ describe("v1_files", () => { env, ); - expectHeadError(response, 502); + expect(response).toBeHeadError(502); }); }); @@ -571,8 +1167,12 @@ describe("v1_files", () => { env, ); - expectSuccess(response); - expectContentType(response, "application/octet-stream"); + expect(response).toMatchResponse({ + status: 200, + headers: { + "Content-Type": "application/octet-stream", + }, + }); }); }); }); diff --git a/apps/api/test/routes/v1_files/search.test.ts b/apps/api/test/routes/v1_files/search.test.ts deleted file mode 100644 index 8bb3359aa..000000000 --- a/apps/api/test/routes/v1_files/search.test.ts +++ /dev/null @@ -1,189 +0,0 @@ -import { HttpResponse, mockFetch } from "#test-utils/msw"; - -import { generateAutoIndexHtml } from "apache-autoindex-parse/test-utils"; -import { env } from "cloudflare:workers"; -import { describe, expect, it } from "vitest"; -import { executeRequest } from "../../helpers/request"; -import { expectApiError, expectSuccess } from "../../helpers/response"; - -describe("v1_files", () => { - // eslint-disable-next-line test/prefer-lowercase-title - describe("GET /api/v1/files/search", () => { - it("should search files by prefix and return files first", async () => { - const html = generateAutoIndexHtml([ - { name: "come", path: "/Public/come", type: "directory", lastModified: Date.now() }, - { name: "computer.txt", path: "/Public/computer.txt", type: "file", lastModified: Date.now() }, - { name: "other.txt", path: "/Public/other.txt", type: "file", lastModified: Date.now() }, - ], "F2"); - - mockFetch([ - ["GET", "https://unicode.org/Public", () => { - return HttpResponse.text(html, { - headers: { "content-type": "text/html; charset=utf-8" }, - }); - }], - ]); - - const { response, json } = await executeRequest( - new Request("https://api.ucdjs.dev/api/v1/files/search?q=com"), - env, - ); - - expectSuccess(response); - const results = await json() as { name: string; type: string }[]; - - expect(results).toHaveLength(2); - // Files should come before directories - expect(results[0]).toMatchObject({ name: "computer.txt", type: "file" }); - expect(results[1]).toMatchObject({ name: "come", type: "directory" }); - }); - - it("should search case-insensitively", async () => { - const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/UnicodeData.txt", type: "file", lastModified: Date.now() }, - { name: "Blocks.txt", path: "/Public/Blocks.txt", type: "file", lastModified: Date.now() }, - ], "F2"); - - mockFetch([ - ["GET", "https://unicode.org/Public", () => { - return HttpResponse.text(html, { - headers: { "content-type": "text/html; charset=utf-8" }, - }); - }], - ]); - - const { response, json } = await executeRequest( - new Request("https://api.ucdjs.dev/api/v1/files/search?q=unicode"), - env, - ); - - expectSuccess(response); - const results = await json() as { name: string; type: string }[]; - - expect(results).toHaveLength(1); - expect(results[0]!.name).toBe("UnicodeData.txt"); - }); - - it("should search within a specific path", async () => { - const html = generateAutoIndexHtml([ - { name: "emoji-data.txt", path: "/Public/15.1.0/ucd/emoji/emoji-data.txt", type: "file", lastModified: Date.now() }, - { name: "emoji-sequences.txt", path: "/Public/15.1.0/ucd/emoji/emoji-sequences.txt", type: "file", lastModified: Date.now() }, - { name: "other.txt", path: "/Public/15.1.0/ucd/emoji/other.txt", type: "file", lastModified: Date.now() }, - ], "F2"); - - mockFetch([ - ["GET", "https://unicode.org/Public/15.1.0/ucd/emoji", () => { - return HttpResponse.text(html, { - headers: { "content-type": "text/html; charset=utf-8" }, - }); - }], - ]); - - const { response, json } = await executeRequest( - new Request("https://api.ucdjs.dev/api/v1/files/search?q=emoji&path=15.1.0/ucd/emoji"), - env, - ); - - expectSuccess(response); - const results = await json() as { name: string; type: string }[]; - - expect(results).toHaveLength(2); - expect(results.map((r) => r.name)).toEqual(["emoji-data.txt", "emoji-sequences.txt"]); - }); - - it("should return empty array when no matches found", async () => { - const html = generateAutoIndexHtml([ - { name: "UnicodeData.txt", path: "/Public/UnicodeData.txt", type: "file", lastModified: Date.now() }, - { name: "Blocks.txt", path: "/Public/Blocks.txt", type: "file", lastModified: Date.now() }, - ], "F2"); - - mockFetch([ - ["GET", "https://unicode.org/Public", () => { - return HttpResponse.text(html, { - headers: { "content-type": "text/html; charset=utf-8" }, - }); - }], - ]); - - const { response, json } = await executeRequest( - new Request("https://api.ucdjs.dev/api/v1/files/search?q=nonexistent"), - env, - ); - - expectSuccess(response); - const results = await json(); - expect(results).toEqual([]); - }); - - it("should return empty array when path does not exist", async () => { - mockFetch([ - ["GET", "https://unicode.org/Public/nonexistent/path", () => { - return HttpResponse.text("Not Found", { status: 404 }); - }], - ]); - - const { response, json } = await executeRequest( - new Request("https://api.ucdjs.dev/api/v1/files/search?q=test&path=nonexistent/path"), - env, - ); - - expectSuccess(response); - const results = await json(); - expect(results).toEqual([]); - }); - - it("should reject invalid path with '..'", async () => { - const { response } = await executeRequest( - new Request("https://api.ucdjs.dev/api/v1/files/search?q=test&path=../etc"), - env, - ); - - await expectApiError(response, { status: 400, message: "Invalid path" }); - }); - - it("should reject invalid path with '//'", async () => { - const { response } = await executeRequest( - new Request("https://api.ucdjs.dev/api/v1/files/search?q=test&path=path//double"), - env, - ); - - await expectApiError(response, { status: 400, message: "Invalid path" }); - }); - - it("should return 400 when q parameter is missing", async () => { - const { response } = await executeRequest( - new Request("https://api.ucdjs.dev/api/v1/files/search"), - env, - ); - - await expectApiError(response, { status: 400 }); - }); - - it("should match exact directory name when query matches exactly", async () => { - const html = generateAutoIndexHtml([ - { name: "come", path: "/Public/come", type: "directory", lastModified: Date.now() }, - { name: "computer.txt", path: "/Public/computer.txt", type: "file", lastModified: Date.now() }, - ], "F2"); - - mockFetch([ - ["GET", "https://unicode.org/Public", () => { - return HttpResponse.text(html, { - headers: { "content-type": "text/html; charset=utf-8" }, - }); - }], - ]); - - const { response, json } = await executeRequest( - new Request("https://api.ucdjs.dev/api/v1/files/search?q=come"), - env, - ); - - expectSuccess(response); - const results = await json() as { name: string; type: string }[]; - - // Only the directory matches exactly - expect(results).toHaveLength(1); - expect(results[0]).toMatchObject({ name: "come", type: "directory" }); - }); - }); -}); diff --git a/apps/api/test/unit/files.test.ts b/apps/api/test/unit/files.test.ts index 3986d934b..b6498da4e 100644 --- a/apps/api/test/unit/files.test.ts +++ b/apps/api/test/unit/files.test.ts @@ -5,7 +5,7 @@ import { parseUnicodeDirectory } from "../../src/lib/files"; describe("parseUnicodeDirectory", () => { it("should parse HTML directory listing", async () => { const mockHtml = generateAutoIndexHtml([ - { type: "directory", name: "15.1.0", path: "/15.1.0", lastModified: Date.now() }, + { type: "directory", name: "15.1.0", path: "/15.1.0/", lastModified: Date.now() }, { type: "file", name: "UnicodeData.txt", path: "/UnicodeData.txt", lastModified: Date.now() }, ], "F2"); @@ -15,7 +15,7 @@ describe("parseUnicodeDirectory", () => { expect(result[0]).toEqual({ type: "directory", name: "15.1.0", - path: "/15.1.0", + path: "/15.1.0/", lastModified: expect.any(Number), }); expect(result[1]).toEqual({ @@ -26,7 +26,7 @@ describe("parseUnicodeDirectory", () => { }); }); - it("should trim trailing slashes from names and paths", async () => { + it("should trim trailing slashes from names", async () => { const mockHtml = generateAutoIndexHtml([ { type: "directory", name: "folder/", path: "/folder/", lastModified: Date.now() }, ], "F2"); @@ -34,7 +34,18 @@ describe("parseUnicodeDirectory", () => { const result = await parseUnicodeDirectory(mockHtml); expect(result[0]!.name).toBe("folder"); - expect(result[0]!.path).toBe("/folder"); + expect(result[0]!.path).toBe("/folder/"); + }); + + it("should trim leading slashes from names", async () => { + const mockHtml = generateAutoIndexHtml([ + { type: "directory", name: "/folder", path: "/folder/", lastModified: Date.now() }, + ], "F2"); + + const result = await parseUnicodeDirectory(mockHtml); + + expect(result[0]!.name).toBe("folder"); + expect(result[0]!.path).toBe("/folder/"); }); it("should return empty array when parsing fails", async () => { diff --git a/packages/schemas/src/fs.ts b/packages/schemas/src/fs.ts index 777aa5010..bd1920f44 100644 --- a/packages/schemas/src/fs.ts +++ b/packages/schemas/src/fs.ts @@ -59,10 +59,7 @@ export const FileEntrySchema = z.union([ This schema represents either a directory listing or a file response. `, -}); -// TODO: Add this to the FileEntrySchema -// But we need to add more of the features of #420, before we can do that. -/* .superRefine((data, ctx) => { +}).superRefine((data, ctx) => { // Ensure that directory paths end with a slash if (data.type === "directory" && !data.path.endsWith("/")) { ctx.addIssue({ @@ -78,7 +75,7 @@ export const FileEntrySchema = z.union([ message: "Paths must start with a leading slash ('/').", }); } -}); */ +}); export type FileEntry = z.infer; diff --git a/packages/schemas/test/fs.test.ts b/packages/schemas/test/fs.test.ts index ea4064bf1..4e8b40b66 100644 --- a/packages/schemas/test/fs.test.ts +++ b/packages/schemas/test/fs.test.ts @@ -8,7 +8,7 @@ describe("FileEntrySchema", () => { it("should validate a directory entry", () => { const validDirectory = { name: "docs", - path: "/docs", + path: "/docs/", lastModified: Date.now(), type: "directory", }; @@ -96,7 +96,7 @@ describe("FileEntryListSchema", () => { }, { name: "folder", - path: "/folder", + path: "/folder/", lastModified: Date.now(), type: "directory", },