From bfae0313a4a778ab6b52bd0110eb81e5c7356a69 Mon Sep 17 00:00:00 2001 From: Chris Campbell Date: Mon, 7 Apr 2025 17:13:12 -0700 Subject: [PATCH 1/4] fix: throw error if invalid link syntax is detected --- packages/docs-builder/src/parse.spec.ts | 45 +++++++++++++++++++++++++ packages/docs-builder/src/parse.ts | 19 +++++++++++ 2 files changed, 64 insertions(+) diff --git a/packages/docs-builder/src/parse.spec.ts b/packages/docs-builder/src/parse.spec.ts index dcb1980..c6ce1e1 100644 --- a/packages/docs-builder/src/parse.spec.ts +++ b/packages/docs-builder/src/parse.spec.ts @@ -275,4 +275,49 @@ there 'Unresolved reference-style link found for lang=en link=[This is a link][unknown_ref]' ) }) + + it('should throw an error if invalid link syntax is detected', () => { + const links = `\ +This is a valid normal link: [page](https://climateinteractive.org) + +This is a valid reference-style link: [page][ref] + +This is an invalid normal link: [page] (https://climateinteractive.org) + +This is an invalid reference-style link: [page] [ref] +` + + const md = `\ +# Section 1 + + + +${links} + + + +[ref]: https://climateinteractive.org +` + + function expectedMsg(lang: string) { + const langPart = lang === 'en' ? '' : `lang=${lang} ` + return `\ +Detected invalid link syntax: +[page] (https://climateinteractive.org) +[page] [ref] +To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (${langPart}page=page_1.md scope=section_1)` + } + + // Verify that an error is thrown if the English content contains invalid link syntax + const enContext = new Context(config, 'en') + expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(expectedMsg('en')) + + // Verify that an error is thrown if the translated content contains invalid link syntax + const deBlocks = new Map([ + ['section_1__title', 'Section 1'], + ['section_1__block_1', links] + ]) + const deContext = new Context(config, 'de', undefined, deBlocks) + expect(() => parseMarkdownPageContent(deContext, 'page_1.md', md)).toThrow(expectedMsg('de')) + }) }) diff --git a/packages/docs-builder/src/parse.ts b/packages/docs-builder/src/parse.ts index 1cddb18..0363619 100644 --- a/packages/docs-builder/src/parse.ts +++ b/packages/docs-builder/src/parse.ts @@ -517,6 +517,7 @@ function addBlockForTokens( sep = '\n\n' } const blockText = blockParts.join(sep) + checkForInvalidLinkSyntax(context, blockText) context.addBlock(localBlockId, blockText) return tokens } else { @@ -525,6 +526,7 @@ function addBlockForTokens( const blockText = context.getTranslatedBlockText(fullBlockId) if (blockText) { // Parse the translated tokens and insert them + checkForInvalidLinkSyntax(context, blockText) // XXX: If there is more than one token (even if some are whitespace only), we // need to include extra newlines after the last block, otherwise marked will // not parse the text correctly @@ -539,6 +541,23 @@ function addBlockForTokens( } } +/** + * Throw an error if the given text contains invalid link syntax. + */ +function checkForInvalidLinkSyntax(context: Context, md: string): void { + const invalidLinkRegExp = /\[([^\]]+)\]\s+\(([^)]+)\)|\[([^\]]+)\]\s+\[([^\]]+)\]/g + const matches = md.match(invalidLinkRegExp) + if (matches) { + let msg = 'Detected invalid link syntax:\n' + for (const match of matches) { + msg += `${match}\n` + } + msg += + 'To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref]' + throw new Error(context.getScopedMessage(msg)) + } +} + /** * Log a warning if the given text token is on a translatable page but is * not included in a `def` or `begin/end-def` pair. From 63cf56ddcc8b7b453321900069996a821f442cbc Mon Sep 17 00:00:00 2001 From: Chris Campbell Date: Tue, 8 Apr 2025 12:41:13 -0700 Subject: [PATCH 2/4] fix: move invalid syntax detection to gen HTML phase --- packages/docs-builder/src/gen-html.spec.ts | 103 ++++++++++++++++++++- packages/docs-builder/src/gen-html.ts | 30 ++++++ packages/docs-builder/src/parse.spec.ts | 45 --------- packages/docs-builder/src/parse.ts | 19 ---- 4 files changed, 132 insertions(+), 65 deletions(-) diff --git a/packages/docs-builder/src/gen-html.spec.ts b/packages/docs-builder/src/gen-html.spec.ts index 7ce6f66..a82d4b9 100644 --- a/packages/docs-builder/src/gen-html.spec.ts +++ b/packages/docs-builder/src/gen-html.spec.ts @@ -2,7 +2,108 @@ import { describe, expect, it } from 'vitest' -import { convertMarkdownToHtml, subscriptify } from './gen-html' +import type { Config } from './config' +import { Context } from './context' +import { convertMarkdownToHtml, generateHtml, subscriptify } from './gen-html' +import { parseMarkdownPageContent } from './parse' + +const config: Config = { + mode: 'development', + baseProjDir: 'xxx', + sourceDir: 'xxx', + outDir: 'xxx', + version: '25.1.0', + langs: [{ code: 'de', version: '25.1.0' }], + formats: [], + template: 'default', + author: 'Climate Interactive', + logoPath: 'xxx', + defs: [], + pages: ['page_1.md'], + untranslated: [], + options: {} +} + +describe('generateHtml', () => { + it('should convert valid Markdown', () => { + const md = `\ +This is a valid normal link: [page](https://climateinteractive.org) + +This is a valid reference-style link: [page][ref] + +This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text + +This is a valid reference-style link: [page][ref] (with parentheses after) and more text + +[ref]: https://climateinteractive.org +` + + const html = generateHtml(new Context(config, 'en'), 'page_1.md', { raw: md }) + expect(html.baseName).toBe('page_1') + expect(html.relPath).toBe('page_1.html') + expect(html.body).toBe(`\ +

This is a valid normal link: page

+

This is a valid reference-style link: page

+

This is a valid normal link: page (with parentheses after) and more text

+

This is a valid reference-style link: page (with parentheses after) and more text

+`) + }) + + it.only('should throw an error if invalid link syntax is detected', () => { + const links = `\ +This is a valid normal link: [page](https://climateinteractive.org) + +This is a valid reference-style link: [page][ref] + +This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text + +This is a valid reference-style link: [page][ref] (with parentheses after) and more text + +This is an invalid normal link: [page] (https://climateinteractive.org) (with parentheses after) and more text + +This is an invalid reference-style link: [page] [ref] (with parentheses after) and more text +` + + const md = `\ +# Section 1 + + + +${links} + + + +[ref]: https://climateinteractive.org +` + + // Verify that an error is thrown if the English content contains invalid link syntax. + // Note that in the English case, the invalid ref link will be converted to an HTML link. + const enContext = new Context(config, 'en') + const enMd = parseMarkdownPageContent(enContext, 'page_1.md', md) + expect(() => generateHtml(enContext, 'page_1.md', { raw: enMd.raw })).toThrow(`\ +Detected invalid Markdown link syntax in the generated HTML: +[page] (<a href +[page] <a href +To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (page=page_1.md)`) + + // Verify that an error is thrown if the translated content contains invalid link syntax. + // Note that in the non-English case, the invalid ref link target will not be converted + // to an HTML link (unlike the English case above), so the error message will be different. + const deContext = enContext.derive( + 'de', + new Map([ + ['section_1__title', 'Section 1'], + ['section_1__block_1', links] + ]) + ) + const deMd = parseMarkdownPageContent(deContext, 'page_1.md', md) + expect(() => generateHtml(deContext, 'page_1.md', { raw: deMd.raw })).toThrow(`\ +Detected invalid Markdown link syntax in the generated HTML: +[page] (<a href +[page] [ref] +To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (lang=de page=page_1.md)`) + }) +}) describe('subscriptify', () => { it('should convert chemical formulas', () => { diff --git a/packages/docs-builder/src/gen-html.ts b/packages/docs-builder/src/gen-html.ts index 6406f54..70f1b22 100644 --- a/packages/docs-builder/src/gen-html.ts +++ b/packages/docs-builder/src/gen-html.ts @@ -144,6 +144,9 @@ export function generateHtml(context: Context, mdRelPath: string, mdPage: Markdo // Convert the Markdown content to HTML const body = convertMarkdownToHtml(context, md) + // Check for evidence of invalid Markdown link syntax that remains in the generated HTML + checkForInvalidLinkSyntax(context, body) + // Save the names of the `` fragments to include const headFragments = mdPage.frontmatter?.fragments?.head || [] @@ -617,3 +620,30 @@ export function subscriptify(s: string): string { return subscriptMap.get(m1) }) } + +// This will match cases where a space in the Markdown link syntax caused the link parts +// to be converted to separate elements in the HTML output, for example: +// Markdown: [text] (https://example.com) +// HTML: [text] (https://example.com) +// Markdown: [text] [ref] +// HTML (en): [text] ref +// HTML (xx): [text] [ref] +// Note that the generated HTML in the second example is different for the English and +// non-English cases (due to different parsing code paths), so we need to detect both. +const invalidLinkRegExp = /\[([^\]]+)\]\s+(\(? { - const links = `\ -This is a valid normal link: [page](https://climateinteractive.org) - -This is a valid reference-style link: [page][ref] - -This is an invalid normal link: [page] (https://climateinteractive.org) - -This is an invalid reference-style link: [page] [ref] -` - - const md = `\ -# Section 1 - - - -${links} - - - -[ref]: https://climateinteractive.org -` - - function expectedMsg(lang: string) { - const langPart = lang === 'en' ? '' : `lang=${lang} ` - return `\ -Detected invalid link syntax: -[page] (https://climateinteractive.org) -[page] [ref] -To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (${langPart}page=page_1.md scope=section_1)` - } - - // Verify that an error is thrown if the English content contains invalid link syntax - const enContext = new Context(config, 'en') - expect(() => parseMarkdownPageContent(enContext, 'page_1.md', md)).toThrow(expectedMsg('en')) - - // Verify that an error is thrown if the translated content contains invalid link syntax - const deBlocks = new Map([ - ['section_1__title', 'Section 1'], - ['section_1__block_1', links] - ]) - const deContext = new Context(config, 'de', undefined, deBlocks) - expect(() => parseMarkdownPageContent(deContext, 'page_1.md', md)).toThrow(expectedMsg('de')) - }) }) diff --git a/packages/docs-builder/src/parse.ts b/packages/docs-builder/src/parse.ts index 0363619..1cddb18 100644 --- a/packages/docs-builder/src/parse.ts +++ b/packages/docs-builder/src/parse.ts @@ -517,7 +517,6 @@ function addBlockForTokens( sep = '\n\n' } const blockText = blockParts.join(sep) - checkForInvalidLinkSyntax(context, blockText) context.addBlock(localBlockId, blockText) return tokens } else { @@ -526,7 +525,6 @@ function addBlockForTokens( const blockText = context.getTranslatedBlockText(fullBlockId) if (blockText) { // Parse the translated tokens and insert them - checkForInvalidLinkSyntax(context, blockText) // XXX: If there is more than one token (even if some are whitespace only), we // need to include extra newlines after the last block, otherwise marked will // not parse the text correctly @@ -541,23 +539,6 @@ function addBlockForTokens( } } -/** - * Throw an error if the given text contains invalid link syntax. - */ -function checkForInvalidLinkSyntax(context: Context, md: string): void { - const invalidLinkRegExp = /\[([^\]]+)\]\s+\(([^)]+)\)|\[([^\]]+)\]\s+\[([^\]]+)\]/g - const matches = md.match(invalidLinkRegExp) - if (matches) { - let msg = 'Detected invalid link syntax:\n' - for (const match of matches) { - msg += `${match}\n` - } - msg += - 'To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref]' - throw new Error(context.getScopedMessage(msg)) - } -} - /** * Log a warning if the given text token is on a translatable page but is * not included in a `def` or `begin/end-def` pair. From 650d274145545914d68b81bc708dd640f9820266 Mon Sep 17 00:00:00 2001 From: Chris Campbell Date: Tue, 8 Apr 2025 12:42:43 -0700 Subject: [PATCH 3/4] test: remove only --- packages/docs-builder/src/gen-html.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/docs-builder/src/gen-html.spec.ts b/packages/docs-builder/src/gen-html.spec.ts index a82d4b9..aaaa20c 100644 --- a/packages/docs-builder/src/gen-html.spec.ts +++ b/packages/docs-builder/src/gen-html.spec.ts @@ -49,7 +49,7 @@ This is a valid reference-style link: [page][ref] (with parentheses after) and m `) }) - it.only('should throw an error if invalid link syntax is detected', () => { + it('should throw an error if invalid link syntax is detected', () => { const links = `\ This is a valid normal link: [page](https://climateinteractive.org) From a2f73b6a9ab52caba0897c92f846cf164a2d825d Mon Sep 17 00:00:00 2001 From: Chris Campbell Date: Tue, 8 Apr 2025 12:52:38 -0700 Subject: [PATCH 4/4] fix: pass options to parse instead of using global setOptions, which can affect tests --- packages/docs-builder/src/gen-html.spec.ts | 2 +- packages/docs-builder/src/gen-html.ts | 4 +++- packages/docs-builder/src/parse.ts | 5 ----- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/packages/docs-builder/src/gen-html.spec.ts b/packages/docs-builder/src/gen-html.spec.ts index aaaa20c..208ce2a 100644 --- a/packages/docs-builder/src/gen-html.spec.ts +++ b/packages/docs-builder/src/gen-html.spec.ts @@ -130,7 +130,7 @@ describe('convertMarkdownToHtml', () => { '

This is -CO2-

\n' ) expect(convertMarkdownToHtml(undefined, '# This is CO2')).toBe( - '

This is CO2

\n' + '

This is CO2

\n' ) expect(convertMarkdownToHtml(undefined, '> This is _CO2_')).toBe( '
\n

This is CO2

\n
\n' diff --git a/packages/docs-builder/src/gen-html.ts b/packages/docs-builder/src/gen-html.ts index 70f1b22..d80d85d 100644 --- a/packages/docs-builder/src/gen-html.ts +++ b/packages/docs-builder/src/gen-html.ts @@ -595,7 +595,9 @@ export function convertMarkdownToHtml(context: Context, md: string): string { }) // Parse the Markdown into HTML - return marked.parse(md) + return marked.parse(md, { + headerIds: false + }) } /** diff --git a/packages/docs-builder/src/parse.ts b/packages/docs-builder/src/parse.ts index 1cddb18..dd0b4b3 100644 --- a/packages/docs-builder/src/parse.ts +++ b/packages/docs-builder/src/parse.ts @@ -60,11 +60,6 @@ export function parseMarkdownPageContent( relPath: string, origMarkdownWithFrontmatter: string ): MarkdownPage { - // Configure marked.js - marked.setOptions({ - headerIds: false - }) - // Separate frontmatter from the content const origMarkdownSeparated = matter(origMarkdownWithFrontmatter) const origMarkdown = origMarkdownSeparated.content