diff --git a/src/api/crawler.ts b/src/api/crawler.ts index c5679bcd..b6d0707e 100644 --- a/src/api/crawler.ts +++ b/src/api/crawler.ts @@ -260,6 +260,7 @@ export class CrawlerHost extends RPCHost { crawlerOptionsHeaderOnly: CrawlerOptionsHeaderOnly, crawlerOptionsParamsAllowed: CrawlerOptions, ) { + this.logger.debug('Incoming headers', { headers: ctx.request.headers }); const uid = await auth.solveUID(); let chargeAmount = 0; const crawlerOptions = ctx.method === 'GET' ? crawlerOptionsHeaderOnly : crawlerOptionsParamsAllowed; diff --git a/src/services/registry.ts b/src/services/registry.ts index e4257a18..7c7640e3 100644 --- a/src/services/registry.ts +++ b/src/services/registry.ts @@ -24,6 +24,8 @@ export class RPCRegistry extends KoaRPCRegistry { override koaMiddlewares = [ this.__CORSAllowAllMiddleware.bind(this), + // Map UI-friendly query params -> request headers so GET requests from interactive builder work + this.__mapQueryToHeaders.bind(this), bodyParser({ encoding: 'utf-8', enableTypes: ['json', 'form'], @@ -53,6 +55,70 @@ export class RPCRegistry extends KoaRPCRegistry { this.emit('ready'); } + /** + * Translate well-known query parameters into request headers so that: + * - GET requests (used by the interactive builder) can pass options in query params, + * - downstream code that inspects headers (x-respond-with, x-with-generated-alt, Accept, etc.) + * will behave identically. + * + * Example mappings: + * ?respond_with=markdown => x-respond-with: markdown + * ?with_generated_alt=true => x-with-generated-alt: true + * ?no_cache=true => x-no-cache: true + * ?wait_for_selector=#content => x-wait-for-selector: #content + * ?target_selector=.main => x-target-selector: .main + * ?proxy=https://proxy:3128 => x-proxy-url: https://proxy:3128 + * ?cache_tolerance=120 => x-cache-tolerance: 120 + * ?timeout=30 => x-timeout: 30 + * ?accept=application/json => Accept: application/json + */ + async __mapQueryToHeaders(ctx: any, next: any) { + try { + const q = ctx.query || {}; + const map: Record = { + 'respond_with': 'x-respond-with', + 'respondWith': 'x-respond-with', + 'with_generated_alt': 'x-with-generated-alt', + 'withGeneratedAlt': 'x-with-generated-alt', + 'no_cache': 'x-no-cache', + 'noCache': 'x-no-cache', + 'wait_for_selector': 'x-wait-for-selector', + 'waitForSelector': 'x-wait-for-selector', + 'target_selector': 'x-target-selector', + 'targetSelector': 'x-target-selector', + 'proxy': 'x-proxy-url', + 'proxy_url': 'x-proxy-url', + 'proxyUrl': 'x-proxy-url', + 'cache_tolerance': 'x-cache-tolerance', + 'cacheTolerance': 'x-cache-tolerance', + 'timeout': 'x-timeout', + 'accept': 'accept', + }; + + for (const [qp, headerName] of Object.entries(map)) { + const v = q[qp]; + if (v !== undefined && v !== null && String(v) !== '') { + // Koa request headers object is normally readonly in types; cast to any to assign + (ctx.request.headers as any)[headerName] = String(v); + } + } + + // Support boolean flags present without value (e.g. ?with_generated_alt) + const booleanFlags = ['with_generated_alt', 'withGeneratedAlt', 'no_cache', 'noCache']; + for (const f of booleanFlags) { + if (Object.prototype.hasOwnProperty.call(q, f) && (q[f] === '' || q[f] === undefined)) { + const headerName = map[f]; + (ctx.request.headers as any)[headerName] = 'true'; + } + } + } catch (err) { + // don't fail the request on mapping errors; log and continue + this.logger.warn(`Failed to map query params to headers: ${err}`, { err }); + } + + return next(); + } + } const instance = container.resolve(RPCRegistry);