climateinteractive · chrispcampbell · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025 · Apr 8, 2025
diff --git a/packages/docs-builder/src/gen-html.spec.ts b/packages/docs-builder/src/gen-html.spec.ts
@@ -2,7 +2,108 @@
 
 import { describe, expect, it } from 'vitest'
 
-import { convertMarkdownToHtml, subscriptify } from './gen-html'
+import type { Config } from './config'
+import { Context } from './context'
+import { convertMarkdownToHtml, generateHtml, subscriptify } from './gen-html'
+import { parseMarkdownPageContent } from './parse'
+
+const config: Config = {
+  mode: 'development',
+  baseProjDir: 'xxx',
+  sourceDir: 'xxx',
+  outDir: 'xxx',
+  version: '25.1.0',
+  langs: [{ code: 'de', version: '25.1.0' }],
+  formats: [],
+  template: 'default',
+  author: 'Climate Interactive',
+  logoPath: 'xxx',
+  defs: [],
+  pages: ['page_1.md'],
+  untranslated: [],
+  options: {}
+}
+
+describe('generateHtml', () => {
+  it('should convert valid Markdown', () => {
+    const md = `\
+This is a valid normal link: [page](https://climateinteractive.org)
+
+This is a valid reference-style link: [page][ref]
+
+This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text
+
+This is a valid reference-style link: [page][ref] (with parentheses after) and more text
+
+[ref]: https://climateinteractive.org
+`
+
+    const html = generateHtml(new Context(config, 'en'), 'page_1.md', { raw: md })
+    expect(html.baseName).toBe('page_1')
+    expect(html.relPath).toBe('page_1.html')
+    expect(html.body).toBe(`\
+<p>This is a valid normal link: <a href="https://climateinteractive.org">page</a></p>
+<p>This is a valid reference-style link: <a href="https://climateinteractive.org">page</a></p>
+<p>This is a valid normal link: <a href="https://climateinteractive.org">page</a> (with parentheses after) and more text</p>
+<p>This is a valid reference-style link: <a href="https://climateinteractive.org">page</a> (with parentheses after) and more text</p>
+`)
+  })
+
+  it('should throw an error if invalid link syntax is detected', () => {
+    const links = `\
+This is a valid normal link: [page](https://climateinteractive.org)
+
+This is a valid reference-style link: [page][ref]
+
+This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text
+
+This is a valid reference-style link: [page][ref] (with parentheses after) and more text
+
+This is an invalid normal link: [page] (https://climateinteractive.org) (with parentheses after) and more text
+
+This is an invalid reference-style link: [page] [ref] (with parentheses after) and more text
+`
+
+    const md = `\
+# <!-- section:section_1 -->Section 1
+
+<!-- begin-def:block_1 -->
+
+${links}
+
+<!-- end-def -->
+
+[ref]: https://climateinteractive.org
+`
+
+    // Verify that an error is thrown if the English content contains invalid link syntax.
+    // Note that in the English case, the invalid ref link will be converted to an HTML link.
+    const enContext = new Context(config, 'en')
+    const enMd = parseMarkdownPageContent(enContext, 'page_1.md', md)
+    expect(() => generateHtml(enContext, 'page_1.md', { raw: enMd.raw })).toThrow(`\
+Detected invalid Markdown link syntax in the generated HTML:
+[page] (&lt;a href
+[page] &lt;a href
+To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (page=page_1.md)`)
+
+    // Verify that an error is thrown if the translated content contains invalid link syntax.
+    // Note that in the non-English case, the invalid ref link target will not be converted
+    // to an HTML link (unlike the English case above), so the error message will be different.
+    const deContext = enContext.derive(
+      'de',
+      new Map([
+        ['section_1__title', 'Section 1'],
+        ['section_1__block_1', links]
+      ])
+    )
+    const deMd = parseMarkdownPageContent(deContext, 'page_1.md', md)
+    expect(() => generateHtml(deContext, 'page_1.md', { raw: deMd.raw })).toThrow(`\
+Detected invalid Markdown link syntax in the generated HTML:
+[page] (&lt;a href
+[page] [ref]
+To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (lang=de page=page_1.md)`)
+  })
+})
 
 describe('subscriptify', () => {
   it('should convert chemical formulas', () => {
@@ -29,7 +130,7 @@ describe('convertMarkdownToHtml', () => {
       '<p>This is -CO<sub>2</sub>-</p>\n'
     )
     expect(convertMarkdownToHtml(undefined, '# This is CO2')).toBe(
-      '<h1 id="this-is-co2">This is CO<sub>2</sub></h1>\n'
+      '<h1>This is CO<sub>2</sub></h1>\n'
     )
     expect(convertMarkdownToHtml(undefined, '> This is _CO2_')).toBe(
       '<blockquote>\n<p>This is <em>CO<sub>2</sub></em></p>\n</blockquote>\n'

diff --git a/packages/docs-builder/src/gen-html.ts b/packages/docs-builder/src/gen-html.ts
@@ -144,6 +144,9 @@ export function generateHtml(context: Context, mdRelPath: string, mdPage: Markdo
   // Convert the Markdown content to HTML
   const body = convertMarkdownToHtml(context, md)
 
+  // Check for evidence of invalid Markdown link syntax that remains in the generated HTML
+  checkForInvalidLinkSyntax(context, body)
+
   // Save the names of the `<head>` fragments to include
   const headFragments = mdPage.frontmatter?.fragments?.head || []
 
@@ -592,7 +595,9 @@ export function convertMarkdownToHtml(context: Context, md: string): string {
   })
 
   // Parse the Markdown into HTML
-  return marked.parse(md)
+  return marked.parse(md, {
+    headerIds: false
+  })
 }
 
 /**
@@ -617,3 +622,30 @@ export function subscriptify(s: string): string {
     return subscriptMap.get(m1)
   })
 }
+
+// This will match cases where a space in the Markdown link syntax caused the link parts
+// to be converted to separate elements in the HTML output, for example:
+//   Markdown:  [text] (https://example.com)
+//   HTML:      [text] (<a href="https://example.com">https://example.com</a>)
+//   Markdown:  [text] [ref]
+//   HTML (en): [text] <a href="https://climateinteractive.org">ref</a>
+//   HTML (xx): [text] [ref]
+// Note that the generated HTML in the second example is different for the English and
+// non-English cases (due to different parsing code paths), so we need to detect both.
+const invalidLinkRegExp = /\[([^\]]+)\]\s+(\(?<a\s\w+|\[([^\]]+)\])/g
+
+/**
+ * Throw an error if the given HTML text contains evidence ofinvalid Markdown link syntax.
+ */
+function checkForInvalidLinkSyntax(context: Context, md: string): void {
+  const matches = md.match(invalidLinkRegExp)
+  if (matches) {
+    let msg = 'Detected invalid Markdown link syntax in the generated HTML:\n'
+    for (const match of matches) {
+      msg += `${match.replace('<', '&lt;')}\n`
+    }
+    msg +=
+      'To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref]'
+    throw new Error(context.getScopedMessage(msg))
+  }
+}
diff --git a/packages/docs-builder/src/parse.ts b/packages/docs-builder/src/parse.ts
@@ -60,11 +60,6 @@ export function parseMarkdownPageContent(
   relPath: string,
   origMarkdownWithFrontmatter: string
 ): MarkdownPage {
-  // Configure marked.js
-  marked.setOptions({
-    headerIds: false
-  })
-
   // Separate frontmatter from the content
   const origMarkdownSeparated = matter(origMarkdownWithFrontmatter)
   const origMarkdown = origMarkdownSeparated.content