Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 103 additions & 2 deletions packages/docs-builder/src/gen-html.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,108 @@

import { describe, expect, it } from 'vitest'

import { convertMarkdownToHtml, subscriptify } from './gen-html'
import type { Config } from './config'
import { Context } from './context'
import { convertMarkdownToHtml, generateHtml, subscriptify } from './gen-html'
import { parseMarkdownPageContent } from './parse'

const config: Config = {
mode: 'development',
baseProjDir: 'xxx',
sourceDir: 'xxx',
outDir: 'xxx',
version: '25.1.0',
langs: [{ code: 'de', version: '25.1.0' }],
formats: [],
template: 'default',
author: 'Climate Interactive',
logoPath: 'xxx',
defs: [],
pages: ['page_1.md'],
untranslated: [],
options: {}
}

describe('generateHtml', () => {
it('should convert valid Markdown', () => {
const md = `\
This is a valid normal link: [page](https://climateinteractive.org)

This is a valid reference-style link: [page][ref]

This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text

This is a valid reference-style link: [page][ref] (with parentheses after) and more text

[ref]: https://climateinteractive.org
`

const html = generateHtml(new Context(config, 'en'), 'page_1.md', { raw: md })
expect(html.baseName).toBe('page_1')
expect(html.relPath).toBe('page_1.html')
expect(html.body).toBe(`\
<p>This is a valid normal link: <a href="https://climateinteractive.org">page</a></p>
<p>This is a valid reference-style link: <a href="https://climateinteractive.org">page</a></p>
<p>This is a valid normal link: <a href="https://climateinteractive.org">page</a> (with parentheses after) and more text</p>
<p>This is a valid reference-style link: <a href="https://climateinteractive.org">page</a> (with parentheses after) and more text</p>
`)
})

it('should throw an error if invalid link syntax is detected', () => {
const links = `\
This is a valid normal link: [page](https://climateinteractive.org)

This is a valid reference-style link: [page][ref]

This is a valid normal link: [page](https://climateinteractive.org) (with parentheses after) and more text

This is a valid reference-style link: [page][ref] (with parentheses after) and more text

This is an invalid normal link: [page] (https://climateinteractive.org) (with parentheses after) and more text

This is an invalid reference-style link: [page] [ref] (with parentheses after) and more text
`

const md = `\
# <!-- section:section_1 -->Section 1

<!-- begin-def:block_1 -->

${links}

<!-- end-def -->

[ref]: https://climateinteractive.org
`

// Verify that an error is thrown if the English content contains invalid link syntax.
// Note that in the English case, the invalid ref link will be converted to an HTML link.
const enContext = new Context(config, 'en')
const enMd = parseMarkdownPageContent(enContext, 'page_1.md', md)
expect(() => generateHtml(enContext, 'page_1.md', { raw: enMd.raw })).toThrow(`\
Detected invalid Markdown link syntax in the generated HTML:
[page] (&lt;a href
[page] &lt;a href
To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (page=page_1.md)`)

// Verify that an error is thrown if the translated content contains invalid link syntax.
// Note that in the non-English case, the invalid ref link target will not be converted
// to an HTML link (unlike the English case above), so the error message will be different.
const deContext = enContext.derive(
'de',
new Map([
['section_1__title', 'Section 1'],
['section_1__block_1', links]
])
)
const deMd = parseMarkdownPageContent(deContext, 'page_1.md', md)
expect(() => generateHtml(deContext, 'page_1.md', { raw: deMd.raw })).toThrow(`\
Detected invalid Markdown link syntax in the generated HTML:
[page] (&lt;a href
[page] [ref]
To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref] (lang=de page=page_1.md)`)
})
})

describe('subscriptify', () => {
it('should convert chemical formulas', () => {
Expand All @@ -29,7 +130,7 @@ describe('convertMarkdownToHtml', () => {
'<p>This is -CO<sub>2</sub>-</p>\n'
)
expect(convertMarkdownToHtml(undefined, '# This is CO2')).toBe(
'<h1 id="this-is-co2">This is CO<sub>2</sub></h1>\n'
'<h1>This is CO<sub>2</sub></h1>\n'
)
expect(convertMarkdownToHtml(undefined, '> This is _CO2_')).toBe(
'<blockquote>\n<p>This is <em>CO<sub>2</sub></em></p>\n</blockquote>\n'
Expand Down
34 changes: 33 additions & 1 deletion packages/docs-builder/src/gen-html.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ export function generateHtml(context: Context, mdRelPath: string, mdPage: Markdo
// Convert the Markdown content to HTML
const body = convertMarkdownToHtml(context, md)

// Check for evidence of invalid Markdown link syntax that remains in the generated HTML
checkForInvalidLinkSyntax(context, body)

// Save the names of the `<head>` fragments to include
const headFragments = mdPage.frontmatter?.fragments?.head || []

Expand Down Expand Up @@ -592,7 +595,9 @@ export function convertMarkdownToHtml(context: Context, md: string): string {
})

// Parse the Markdown into HTML
return marked.parse(md)
return marked.parse(md, {
headerIds: false
})
}

/**
Expand All @@ -617,3 +622,30 @@ export function subscriptify(s: string): string {
return subscriptMap.get(m1)
})
}

// This will match cases where a space in the Markdown link syntax caused the link parts
// to be converted to separate elements in the HTML output, for example:
// Markdown: [text] (https://example.com)
// HTML: [text] (<a href="https://example.com">https://example.com</a>)
// Markdown: [text] [ref]
// HTML (en): [text] <a href="https://climateinteractive.org">ref</a>
// HTML (xx): [text] [ref]
// Note that the generated HTML in the second example is different for the English and
// non-English cases (due to different parsing code paths), so we need to detect both.
const invalidLinkRegExp = /\[([^\]]+)\]\s+(\(?<a\s\w+|\[([^\]]+)\])/g

/**
* Throw an error if the given HTML text contains evidence ofinvalid Markdown link syntax.
*/
function checkForInvalidLinkSyntax(context: Context, md: string): void {
const matches = md.match(invalidLinkRegExp)
if (matches) {
let msg = 'Detected invalid Markdown link syntax in the generated HTML:\n'
for (const match of matches) {
msg += `${match.replace('<', '&lt;')}\n`
}
msg +=
'To fix, ensure there are no spaces between link text and link url/reference, for example: [text](url) or [text][ref]'
throw new Error(context.getScopedMessage(msg))
}
}
5 changes: 0 additions & 5 deletions packages/docs-builder/src/parse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,6 @@ export function parseMarkdownPageContent(
relPath: string,
origMarkdownWithFrontmatter: string
): MarkdownPage {
// Configure marked.js
marked.setOptions({
headerIds: false
})

// Separate frontmatter from the content
const origMarkdownSeparated = matter(origMarkdownWithFrontmatter)
const origMarkdown = origMarkdownSeparated.content
Expand Down