Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 50 additions & 14 deletions src/outputs/writers.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import * as fs from "node:fs/promises";
import * as path from "node:path";
import * as os from "node:os";
import {
ContentItem,
Thread,
Expand All @@ -17,33 +18,66 @@ async function ensureDir(p: string) {
await fs.mkdir(p, { recursive: true });
}

const DEFAULT_COPY_CONCURRENCY = Math.max(
2,
Math.min(
32,
typeof os.availableParallelism === "function"
? os.availableParallelism()
: 8
)
);

/**
* Copy media attachments for a set of items into imagesDir, prefixing names with "_".
* If an attachment lacks absPath, it will be skipped with a warning.
* Copies are performed with bounded concurrency to speed up large archives.
*/
async function copyMedia(
items: ContentItem[],
imagesDir: string,
logger: (l: Level, m: string) => void,
logger: (l: Level, m: string) => void
) {
await ensureDir(imagesDir);

const copies: Array<{ src: string; dest: string }> = [];
for (const it of items) {
for (const m of it.media ?? []) {
const base = m.absPath ? path.basename(m.absPath) : `${m.id}.bin`;
if (!m.absPath) {
logger("warn", `No absPath for media ${m.id}; skipping copy`);
continue;
}
const base = path.basename(m.absPath);
copies.push({ src: m.absPath, dest: path.join(imagesDir, `_${base}`) });
}
}

if (!copies.length) return;

const parsedEnv = Number.parseInt(
process.env.SPLICE_MEDIA_CONCURRENCY ?? "",
10
);
const concurrency =
Number.isFinite(parsedEnv) && parsedEnv > 0
? parsedEnv
: DEFAULT_COPY_CONCURRENCY;

let next = 0;
const worker = async () => {
while (true) {
const idx = next++;
if (idx >= copies.length) break;
const { src, dest } = copies[idx];
try {
if (!m.absPath) {
logger("warn", `No absPath for media ${m.id}; skipping copy`);
continue;
}
await fs.copyFile(m.absPath, path.join(imagesDir, `_${base}`));
await fs.copyFile(src, dest);
} catch (e) {
logger(
"warn",
`Failed to copy media ${m.absPath ?? m.id}: ${(e as Error).message}`,
);
logger("warn", `Failed to copy media ${src}: ${(e as Error).message}`);
}
}
}
};

await Promise.all(Array.from({ length: concurrency }, worker));
}

/**
Expand Down Expand Up @@ -83,7 +117,7 @@ function isolateQuotedTweetLinks(text: string): string {

/**
* Write Markdown outputs:
* - threads/&lt;yyyymmdd&gt;-thread-&lt;slug&gt;.md with frontmatter, cleaned text, media links, and link to Twitter
* - threads/&lt;yyyymmdd&gt;/&lt;slug&gt;.md with frontmatter, cleaned text, media links, and link to Twitter
* - tweets/&lt;yyyymmdd&gt;-tweet-&lt;slug&gt;.md for non-thread tweets (excluding RTs)
* - images/_&lt;file&gt; copied for referenced items
*/
Expand Down Expand Up @@ -142,7 +176,9 @@ export async function writeMarkdown(
const ymd = date.replace(/-/g, "");
const filePath = path.join(threadsDir, `${ymd}/${name}.md`);
const topLink = `https://twitter.com/i/web/status/${first.id}`;
const body = `${fm}\n${parts.join("\n\n")}\n\n[View on Twitter](${topLink})`;
const body = `${fm}\n${parts.join(
"\n\n",
)}\n\n[View on Twitter](${topLink})`;

if (dryRun) {
logger("info", `(dry-run) would write thread file: ${filePath}`);
Expand Down
Loading