diff --git a/src/outputs/writers.ts b/src/outputs/writers.ts index 1b41376..246c173 100644 --- a/src/outputs/writers.ts +++ b/src/outputs/writers.ts @@ -1,5 +1,6 @@ import * as fs from "node:fs/promises"; import * as path from "node:path"; +import * as os from "node:os"; import { ContentItem, Thread, @@ -17,33 +18,66 @@ async function ensureDir(p: string) { await fs.mkdir(p, { recursive: true }); } +const DEFAULT_COPY_CONCURRENCY = Math.max( + 2, + Math.min( + 32, + typeof os.availableParallelism === "function" + ? os.availableParallelism() + : 8 + ) +); + /** * Copy media attachments for a set of items into imagesDir, prefixing names with "_". * If an attachment lacks absPath, it will be skipped with a warning. + * Copies are performed with bounded concurrency to speed up large archives. */ async function copyMedia( items: ContentItem[], imagesDir: string, - logger: (l: Level, m: string) => void, + logger: (l: Level, m: string) => void ) { await ensureDir(imagesDir); + + const copies: Array<{ src: string; dest: string }> = []; for (const it of items) { for (const m of it.media ?? []) { - const base = m.absPath ? path.basename(m.absPath) : `${m.id}.bin`; + if (!m.absPath) { + logger("warn", `No absPath for media ${m.id}; skipping copy`); + continue; + } + const base = path.basename(m.absPath); + copies.push({ src: m.absPath, dest: path.join(imagesDir, `_${base}`) }); + } + } + + if (!copies.length) return; + + const parsedEnv = Number.parseInt( + process.env.SPLICE_MEDIA_CONCURRENCY ?? "", + 10 + ); + const concurrency = + Number.isFinite(parsedEnv) && parsedEnv > 0 + ? parsedEnv + : DEFAULT_COPY_CONCURRENCY; + + let next = 0; + const worker = async () => { + while (true) { + const idx = next++; + if (idx >= copies.length) break; + const { src, dest } = copies[idx]; try { - if (!m.absPath) { - logger("warn", `No absPath for media ${m.id}; skipping copy`); - continue; - } - await fs.copyFile(m.absPath, path.join(imagesDir, `_${base}`)); + await fs.copyFile(src, dest); } catch (e) { - logger( - "warn", - `Failed to copy media ${m.absPath ?? m.id}: ${(e as Error).message}`, - ); + logger("warn", `Failed to copy media ${src}: ${(e as Error).message}`); } } - } + }; + + await Promise.all(Array.from({ length: concurrency }, worker)); } /** @@ -83,7 +117,7 @@ function isolateQuotedTweetLinks(text: string): string { /** * Write Markdown outputs: - * - threads/<yyyymmdd>-thread-<slug>.md with frontmatter, cleaned text, media links, and link to Twitter + * - threads/<yyyymmdd>/<slug>.md with frontmatter, cleaned text, media links, and link to Twitter * - tweets/<yyyymmdd>-tweet-<slug>.md for non-thread tweets (excluding RTs) * - images/_<file> copied for referenced items */ @@ -142,7 +176,9 @@ export async function writeMarkdown( const ymd = date.replace(/-/g, ""); const filePath = path.join(threadsDir, `${ymd}/${name}.md`); const topLink = `https://twitter.com/i/web/status/${first.id}`; - const body = `${fm}\n${parts.join("\n\n")}\n\n[View on Twitter](${topLink})`; + const body = `${fm}\n${parts.join( + "\n\n", + )}\n\n[View on Twitter](${topLink})`; if (dryRun) { logger("info", `(dry-run) would write thread file: ${filePath}`);