diff --git a/package.json b/package.json index c3723cea..e3b85bd7 100644 --- a/package.json +++ b/package.json @@ -248,6 +248,13 @@ } }, "configurationDefaults": { + "[html]": { + "codeBlocks.queries": [ + "(element (start_tag)) @item", + "(text) @item", + "(element (end_tag)) @item" + ] + }, "[github-actions-workflow]": { "codeBlocks.npmPackageName": "@tree-sitter-grammars/tree-sitter-yaml", "codeBlocks.parserName": "tree-sitter-yaml" diff --git a/src/FileTree.ts b/src/FileTree.ts index 079a854a..584e6f3a 100644 --- a/src/FileTree.ts +++ b/src/FileTree.ts @@ -279,7 +279,6 @@ export class FileTree implements vscode.Disposable { break; } case "swap-next": { - // TODO: if block mode, resolve previous block const nextSelection = selection.getNext(blocks); if (!nextSelection) { return err(`Can't move to ${direction}, next node of selection is null`); diff --git a/src/Selection.ts b/src/Selection.ts index b53622e0..c3f6d7c1 100644 --- a/src/Selection.ts +++ b/src/Selection.ts @@ -62,56 +62,70 @@ export class Selection { const parent = this.firstNode().parent; const range = this.getRange(); - let smallestBlockIndex: number | undefined = undefined; + let smallestBlock: Block | undefined = undefined; let smallestBlockLength: number | undefined = undefined; - for (let i = 0; i < blocks.length; i++) { - const block = blocks[i]; - - const blockRange = { - startIndex: block[0].startIndex, - endIndex: block[block.length - 1].endIndex, - }; + for (const block of blocks) { + const startIndex = block[0].startIndex; + const endIndex = block[block.length - 1].endIndex; // check if block contains selection - if (blockRange.startIndex <= range.startIndex && range.endIndex <= blockRange.endIndex) { + const contains = startIndex <= range.startIndex && range.endIndex <= endIndex; + if (contains) { // check if block is at the same hierarchy level as the selection - if ( + const isSibling = (parent === null && block[0].parent === null) || - (block[0].parent !== null && parent === block[0].parent) - ) { - const length = blockRange.endIndex - blockRange.startIndex; + (block[0].parent !== null && + parent?.startIndex === block[0].parent.startIndex && + parent?.endIndex === block[0].parent.endIndex); + + if (isSibling) { + const length = endIndex - startIndex; if (length <= (smallestBlockLength ?? length)) { - smallestBlockIndex = i; + smallestBlock = block; smallestBlockLength = length; } } } } - if (smallestBlockIndex !== undefined) { - const smallestBlock = blocks[smallestBlockIndex]; - this.selectedSiblings = smallestBlock; + if (smallestBlock === undefined) { + return this; } + this.selectedSiblings = smallestBlock; return this; } public getPrevious(blocks: Block[] | undefined): Selection | undefined { const previousNode = this.selectedSiblings[0].previousNamedSibling; - if (previousNode) { - return new Selection([previousNode], this.version).expandToBlock(blocks); - } else { + if (!previousNode) { + return undefined; + } + + const previous = Selection.fromNode(previousNode, this.version).expandToBlock(blocks); + const parent = this.getParent(blocks)?.toVscodeSelection(); + + if (parent !== undefined && previous.toVscodeSelection().isEqual(parent)) { return undefined; } + + return previous; } public getNext(blocks: Block[] | undefined): Selection | undefined { const nextNode = this.selectedSiblings.at(-1)?.nextNamedSibling; - if (nextNode) { - return new Selection([nextNode], this.version).expandToBlock(blocks); - } else { + if (!nextNode) { return undefined; } + + const next = Selection.fromNode(nextNode, this.version).expandToBlock(blocks); + const parent = this.getParent(blocks)?.toVscodeSelection(); + + if (parent !== undefined && next.toVscodeSelection().isEqual(parent)) { + return undefined; + } + + return next; } public getParent(blocks: Block[] | undefined): Selection | undefined { @@ -128,7 +142,7 @@ export class Selection { } if (parent) { - return new Selection([parent], this.version).expandToBlock(blocks); + return Selection.fromNode(parent, this.version).expandToBlock(blocks); } else { return undefined; } @@ -148,7 +162,7 @@ export class Selection { } if (child) { - return new Selection([child], this.version).expandToBlock(blocks); + return Selection.fromNode(child, this.version).expandToBlock(blocks); } else { return undefined; } diff --git a/src/test/suite/BlockTrees.test.ts b/src/test/suite/BlockTrees.test.ts index c17dfa60..df3c0d8d 100644 --- a/src/test/suite/BlockTrees.test.ts +++ b/src/test/suite/BlockTrees.test.ts @@ -60,6 +60,30 @@ suite("BlockTrees", function () { return void vscode.window.showInformationMessage("Start blockTrees.getBlockTrees tests"); }); + test("resolves html blocks", async function () { + const text = "\n \n \n"; + const { fileTree } = await openDocument(text, "html"); + const lang = fileTree.parser.getLanguage() as Language; + const queries = [new Query(lang, "(element) @item")]; + const blocksTrees = getBlockTrees(fileTree.tree, queries); + + expect("\n" + blockTreesToString(text, blocksTrees)).to.equal(` ++------------------------+ +| | +| | +| +-------------------+ | +| | | | +| | | | +| +-------------------+ | +| +-------------------+ | +| | | | +| +-------------------+ | +| | +| | ++------------------------+ +`); + }); + test("resolves sequential blocks", async function () { const text = "fn foo() {}\nfn bar() {}"; const { fileTree } = await openDocument(text, "rust"); diff --git a/src/test/suite/Selection.test.ts b/src/test/suite/Selection.test.ts index df477b8a..afb357bb 100644 --- a/src/test/suite/Selection.test.ts +++ b/src/test/suite/Selection.test.ts @@ -22,7 +22,7 @@ suite("Selection", function () { } for (const update of updates) { - selection.update(update, []); + selection.update(update, fileTree.blocks); } const selectionText = selection.getText(content); @@ -30,15 +30,34 @@ suite("Selection", function () { return selectionText; } + suite(".getPrevious", function () { + test("Ignores previous nodes that start with parent", async () => { + const text = "@12"; + + expect(await selectionAt("html", text)).to.equal("1"); + expect(await selectionAt("html", text, ["parent"])).to.equal("1"); + expect(await selectionAt("html", text, ["parent", "add-previous"])).to.equal("1"); + }); + }); + + suite(".getNext", function () { + test("Ignores next nodes that start with parent", async () => { + const text = "1@2"; + + expect(await selectionAt("html", text)).to.equal("2"); + expect(await selectionAt("html", text, ["parent"])).to.equal("2"); + expect(await selectionAt("html", text, ["parent", "add-next"])).to.equal("2"); + }); + }); + suite(".update", function () { test("Select source_file node is undefined", async () => { expect(await selectionAt("rust", "fn main() { }@")).to.be.undefined; }); - test("Update selection parent/child", async () => { - expect(await selectionAt("rust", "fn main() { @ }")).to.equal("{ }"); - expect(await selectionAt("rust", "fn main() { @ }", ["parent"])).to.equal("fn main() { }"); - expect(await selectionAt("rust", "fn main() { @ }", ["parent", "child"])).to.equal("main"); + test.only("Update selection parent/child", async () => { + expect(await selectionAt("rust", "fn foo() { fn nested() { @ } }")).to.equal("fn nested() { }"); + expect(await selectionAt("rust", "fn main() { @ }")).to.equal("fn main() { }"); expect(await selectionAt("rust", "if true { @ }", ["parent"])).to.equal("if true { }"); expect(await selectionAt("rust", "if true { @ }", ["parent", "child"])).to.equal("true"); expect( @@ -64,9 +83,7 @@ suite("Selection", function () { test("Update selection parent/child", async () => { const text = "function main() { @ }"; - expect(await selectionAt("typescriptreact", text)).to.equal("{ }"); - expect(await selectionAt("typescriptreact", text, ["parent"])).to.equal("function main() { }"); - expect(await selectionAt("typescriptreact", text, ["parent", "child"])).to.equal("main"); + expect(await selectionAt("typescriptreact", text)).to.equal("function main() { }"); }); test("Update selection previous/next", async () => { diff --git a/src/test/suite/extension.test.ts b/src/test/suite/extension.test.ts index c0975700..75c61df2 100644 --- a/src/test/suite/extension.test.ts +++ b/src/test/suite/extension.test.ts @@ -214,17 +214,17 @@ source_file [0:0 - 0:12] await testSelectionCommands({ content: "<>

@a

", selectionCommands: ["codeBlocks.selectPrevious"], - expectedSelectionContent: "

a", + expectedSelectionContent: "a", language: "typescriptreact", }); }); }); suite(".selectChild", function () { - test("contracts to first named child", async () => { + test.only("contracts to first named child", async () => { await testSelectionCommands({ - content: "pub fn foo() { @ }", - selectionCommands: ["codeBlocks.selectParent", "codeBlocks.selectChild"], + content: "pub fn foo() { fn nested() { @ } }", + selectionCommands: ["codeBlocks.selectBlock"], expectedSelectionContent: "pub", }); await testSelectionCommands({ diff --git a/test-parsers/tree-sitter-html/LICENSE b/test-parsers/tree-sitter-html/LICENSE new file mode 100644 index 00000000..4b52d191 --- /dev/null +++ b/test-parsers/tree-sitter-html/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2014 Max Brunsfeld + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/test-parsers/tree-sitter-html/README.md b/test-parsers/tree-sitter-html/README.md new file mode 100644 index 00000000..28684b21 --- /dev/null +++ b/test-parsers/tree-sitter-html/README.md @@ -0,0 +1,21 @@ +# tree-sitter-html + +[![CI][ci]](https://github.com/tree-sitter/tree-sitter-html/actions/workflows/ci.yml) +[![discord][discord]](https://discord.gg/w7nTvsVJhm) +[![matrix][matrix]](https://matrix.to/#/#tree-sitter-chat:matrix.org) +[![crates][crates]](https://crates.io/crates/tree-sitter-html) +[![npm][npm]](https://www.npmjs.com/package/tree-sitter-html) +[![pypi][pypi]](https://pypi.org/project/tree-sitter-html) + +HTML grammar for [tree-sitter](https://github.com/tree-sitter/tree-sitter). + +References + +- [The HTML5 Spec](https://www.w3.org/TR/html5/syntax.html) + +[ci]: https://img.shields.io/github/actions/workflow/status/tree-sitter/tree-sitter-html/ci.yml?logo=github&label=CI +[discord]: https://img.shields.io/discord/1063097320771698699?logo=discord&label=discord +[matrix]: https://img.shields.io/matrix/tree-sitter-chat%3Amatrix.org?logo=matrix&label=matrix +[npm]: https://img.shields.io/npm/v/tree-sitter-html?logo=npm +[crates]: https://img.shields.io/crates/v/tree-sitter-html?logo=rust +[pypi]: https://img.shields.io/pypi/v/tree-sitter-html?logo=pypi&logoColor=ffd242 diff --git a/test-parsers/tree-sitter-html/binding.gyp b/test-parsers/tree-sitter-html/binding.gyp new file mode 100644 index 00000000..64bfc648 --- /dev/null +++ b/test-parsers/tree-sitter-html/binding.gyp @@ -0,0 +1,30 @@ +{ + "targets": [ + { + "target_name": "tree_sitter_html_binding", + "dependencies": [ + " + +typedef struct TSLanguage TSLanguage; + +extern "C" TSLanguage *tree_sitter_html(); + +// "tree-sitter", "language" hashed with BLAKE2 +const napi_type_tag LANGUAGE_TYPE_TAG = { + 0x8AF2E5212AD58ABF, 0xD5006CAD83ABBA16 +}; + +Napi::Object Init(Napi::Env env, Napi::Object exports) { + exports["name"] = Napi::String::New(env, "html"); + auto language = Napi::External::New(env, tree_sitter_html()); + language.TypeTag(&LANGUAGE_TYPE_TAG); + exports["language"] = language; + return exports; +} + +NODE_API_MODULE(tree_sitter_html_binding, Init) diff --git a/test-parsers/tree-sitter-html/bindings/node/binding_test.js b/test-parsers/tree-sitter-html/bindings/node/binding_test.js new file mode 100644 index 00000000..afede30a --- /dev/null +++ b/test-parsers/tree-sitter-html/bindings/node/binding_test.js @@ -0,0 +1,9 @@ +/// + +const assert = require("node:assert"); +const { test } = require("node:test"); + +test("can load grammar", () => { + const parser = new (require("tree-sitter"))(); + assert.doesNotThrow(() => parser.setLanguage(require("."))); +}); diff --git a/test-parsers/tree-sitter-html/bindings/node/index.d.ts b/test-parsers/tree-sitter-html/bindings/node/index.d.ts new file mode 100644 index 00000000..efe259ee --- /dev/null +++ b/test-parsers/tree-sitter-html/bindings/node/index.d.ts @@ -0,0 +1,28 @@ +type BaseNode = { + type: string; + named: boolean; +}; + +type ChildNode = { + multiple: boolean; + required: boolean; + types: BaseNode[]; +}; + +type NodeInfo = + | (BaseNode & { + subtypes: BaseNode[]; + }) + | (BaseNode & { + fields: { [name: string]: ChildNode }; + children: ChildNode[]; + }); + +type Language = { + name: string; + language: unknown; + nodeTypeInfo: NodeInfo[]; +}; + +declare const language: Language; +export = language; diff --git a/test-parsers/tree-sitter-html/bindings/node/index.js b/test-parsers/tree-sitter-html/bindings/node/index.js new file mode 100644 index 00000000..6657bcf4 --- /dev/null +++ b/test-parsers/tree-sitter-html/bindings/node/index.js @@ -0,0 +1,7 @@ +const root = require("path").join(__dirname, "..", ".."); + +module.exports = require("node-gyp-build")(root); + +try { + module.exports.nodeTypeInfo = require("../../src/node-types.json"); +} catch (_) {} diff --git a/test-parsers/tree-sitter-html/grammar.js b/test-parsers/tree-sitter-html/grammar.js new file mode 100644 index 00000000..657b78d3 --- /dev/null +++ b/test-parsers/tree-sitter-html/grammar.js @@ -0,0 +1,141 @@ +/** + * @file HTML grammar for tree-sitter + * @author Max Brunsfeld + * @author Amaan Qureshi + * @license MIT + */ + +/// +// @ts-check + +module.exports = grammar({ + name: 'html', + + extras: $ => [ + $.comment, + /\s+/, + ], + + externals: $ => [ + $._start_tag_name, + $._script_start_tag_name, + $._style_start_tag_name, + $._end_tag_name, + $.erroneous_end_tag_name, + '/>', + $._implicit_end_tag, + $.raw_text, + $.comment, + ], + + rules: { + document: $ => repeat($._node), + + doctype: $ => seq( + ']+/, + '>', + ), + + _doctype: _ => /[Dd][Oo][Cc][Tt][Yy][Pp][Ee]/, + + _node: $ => choice( + $.doctype, + $.entity, + $.text, + $.element, + $.script_element, + $.style_element, + $.erroneous_end_tag, + ), + + element: $ => choice( + seq( + $.start_tag, + repeat($._node), + choice($.end_tag, $._implicit_end_tag), + ), + $.self_closing_tag, + ), + + script_element: $ => seq( + alias($.script_start_tag, $.start_tag), + optional($.raw_text), + $.end_tag, + ), + + style_element: $ => seq( + alias($.style_start_tag, $.start_tag), + optional($.raw_text), + $.end_tag, + ), + + start_tag: $ => seq( + '<', + alias($._start_tag_name, $.tag_name), + repeat($.attribute), + '>', + ), + + script_start_tag: $ => seq( + '<', + alias($._script_start_tag_name, $.tag_name), + repeat($.attribute), + '>', + ), + + style_start_tag: $ => seq( + '<', + alias($._style_start_tag_name, $.tag_name), + repeat($.attribute), + '>', + ), + + self_closing_tag: $ => seq( + '<', + alias($._start_tag_name, $.tag_name), + repeat($.attribute), + '/>', + ), + + end_tag: $ => seq( + '', + ), + + erroneous_end_tag: $ => seq( + '', + ), + + attribute: $ => seq( + $.attribute_name, + optional(seq( + '=', + choice( + $.attribute_value, + $.quoted_attribute_value, + ), + )), + ), + + attribute_name: _ => /[^<>"'/=\s]+/, + + attribute_value: _ => /[^<>"'=\s]+/, + + // An entity can be named, numeric (decimal), or numeric (hexacecimal). The + // longest entity name is 29 characters long, and the HTML spec says that + // no more will ever be added. + entity: _ => /&(#([xX][0-9a-fA-F]{1,6}|[0-9]{1,5})|[A-Za-z]{1,30});?/, + + quoted_attribute_value: $ => choice( + seq('\'', optional(alias(/[^']+/, $.attribute_value)), '\''), + seq('"', optional(alias(/[^"]+/, $.attribute_value)), '"'), + ), + + text: _ => /[^<>&\s]([^<>&]*[^<>&\s])?/, + }, +}); diff --git a/test-parsers/tree-sitter-html/package.json b/test-parsers/tree-sitter-html/package.json new file mode 100644 index 00000000..369bfc69 --- /dev/null +++ b/test-parsers/tree-sitter-html/package.json @@ -0,0 +1,112 @@ +{ + "name": "tree-sitter-html", + "version": "0.23.0", + "description": "HTML grammar for tree-sitter", + "repository": "github:tree-sitter/tree-sitter-html", + "license": "MIT", + "author": "Max Brunsfeld ", + "contributors": [ + "Ashi Krishnan ", + "Amaan Qureshi " + ], + "maintainers": [ + "Amaan Qureshi " + ], + "main": "bindings/node", + "types": "bindings/node", + "keywords": [ + "incremental", + "parsing", + "tree-sitter", + "html" + ], + "files": [ + "grammar.js", + "binding.gyp", + "prebuilds/**", + "bindings/node/*", + "queries/*", + "src/**" + ], + "dependencies": { + "node-addon-api": "^8.1.0", + "node-gyp-build": "^4.8.2" + }, + "peerDependencies": { + "tree-sitter": "^0.21.1" + }, + "peerDependenciesMeta": { + "tree-sitter": { + "optional": true + } + }, + "devDependencies": { + "eslint": "^8.57.0", + "eslint-config-google": "^0.14.0", + "tree-sitter-cli": "^0.23.0", + "prebuildify": "^6.0.1" + }, + "scripts": { + "install": "node-gyp-build", + "lint": "eslint grammar.js", + "prestart": "tree-sitter build --wasm", + "start": "tree-sitter playground", + "test": "node --test bindings/node/*_test.js" + }, + "tree-sitter": [ + { + "scope": "source.html", + "file-types": [ + "html" + ], + "injection-regex": "html", + "highlights": "queries/highlights.scm", + "injections": "queries/injections.scm", + "external-files": [ + "src/tag.h" + ] + } + ], + "eslintConfig": { + "env": { + "commonjs": true, + "es2021": true + }, + "extends": "google", + "parserOptions": { + "ecmaVersion": "latest", + "sourceType": "module" + }, + "rules": { + "arrow-parens": "off", + "camel-case": "off", + "indent": [ + "error", + 2, + { + "SwitchCase": 1 + } + ], + "max-len": [ + "error", + { + "code": 160, + "ignoreComments": true, + "ignoreUrls": true, + "ignoreStrings": true + } + ], + "spaced-comment": [ + "warn", + "always", + { + "line": { + "markers": [ + "/" + ] + } + } + ] + } + } +} diff --git a/test-parsers/tree-sitter-html/prebuilds/darwin-arm64/tree-sitter-html.node b/test-parsers/tree-sitter-html/prebuilds/darwin-arm64/tree-sitter-html.node new file mode 100644 index 00000000..ece64c66 Binary files /dev/null and b/test-parsers/tree-sitter-html/prebuilds/darwin-arm64/tree-sitter-html.node differ diff --git a/test-parsers/tree-sitter-html/prebuilds/darwin-x64/tree-sitter-html.node b/test-parsers/tree-sitter-html/prebuilds/darwin-x64/tree-sitter-html.node new file mode 100644 index 00000000..69929008 Binary files /dev/null and b/test-parsers/tree-sitter-html/prebuilds/darwin-x64/tree-sitter-html.node differ diff --git a/test-parsers/tree-sitter-html/prebuilds/linux-x64/tree-sitter-html.node b/test-parsers/tree-sitter-html/prebuilds/linux-x64/tree-sitter-html.node new file mode 100644 index 00000000..64108dd4 Binary files /dev/null and b/test-parsers/tree-sitter-html/prebuilds/linux-x64/tree-sitter-html.node differ diff --git a/test-parsers/tree-sitter-html/prebuilds/win32-x64/tree-sitter-html.node b/test-parsers/tree-sitter-html/prebuilds/win32-x64/tree-sitter-html.node new file mode 100644 index 00000000..747aed11 Binary files /dev/null and b/test-parsers/tree-sitter-html/prebuilds/win32-x64/tree-sitter-html.node differ diff --git a/test-parsers/tree-sitter-html/queries/highlights.scm b/test-parsers/tree-sitter-html/queries/highlights.scm new file mode 100644 index 00000000..ea0ff4e3 --- /dev/null +++ b/test-parsers/tree-sitter-html/queries/highlights.scm @@ -0,0 +1,13 @@ +(tag_name) @tag +(erroneous_end_tag_name) @tag.error +(doctype) @constant +(attribute_name) @attribute +(attribute_value) @string +(comment) @comment + +[ + "<" + ">" + "" +] @punctuation.bracket diff --git a/test-parsers/tree-sitter-html/queries/injections.scm b/test-parsers/tree-sitter-html/queries/injections.scm new file mode 100644 index 00000000..71e7c3ae --- /dev/null +++ b/test-parsers/tree-sitter-html/queries/injections.scm @@ -0,0 +1,7 @@ +((script_element + (raw_text) @injection.content) + (#set! injection.language "javascript")) + +((style_element + (raw_text) @injection.content) + (#set! injection.language "css")) diff --git a/test-parsers/tree-sitter-html/src/grammar.json b/test-parsers/tree-sitter-html/src/grammar.json new file mode 100644 index 00000000..cf346240 --- /dev/null +++ b/test-parsers/tree-sitter-html/src/grammar.json @@ -0,0 +1,500 @@ +{ + "name": "html", + "rules": { + "document": { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "_node" + } + }, + "doctype": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "]+" + }, + { + "type": "STRING", + "value": ">" + } + ] + }, + "_doctype": { + "type": "PATTERN", + "value": "[Dd][Oo][Cc][Tt][Yy][Pp][Ee]" + }, + "_node": { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "doctype" + }, + { + "type": "SYMBOL", + "name": "entity" + }, + { + "type": "SYMBOL", + "name": "text" + }, + { + "type": "SYMBOL", + "name": "element" + }, + { + "type": "SYMBOL", + "name": "script_element" + }, + { + "type": "SYMBOL", + "name": "style_element" + }, + { + "type": "SYMBOL", + "name": "erroneous_end_tag" + } + ] + }, + "element": { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "start_tag" + }, + { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "_node" + } + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "end_tag" + }, + { + "type": "SYMBOL", + "name": "_implicit_end_tag" + } + ] + } + ] + }, + { + "type": "SYMBOL", + "name": "self_closing_tag" + } + ] + }, + "script_element": { + "type": "SEQ", + "members": [ + { + "type": "ALIAS", + "content": { + "type": "SYMBOL", + "name": "script_start_tag" + }, + "named": true, + "value": "start_tag" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "raw_text" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "SYMBOL", + "name": "end_tag" + } + ] + }, + "style_element": { + "type": "SEQ", + "members": [ + { + "type": "ALIAS", + "content": { + "type": "SYMBOL", + "name": "style_start_tag" + }, + "named": true, + "value": "start_tag" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "raw_text" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "SYMBOL", + "name": "end_tag" + } + ] + }, + "start_tag": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "<" + }, + { + "type": "ALIAS", + "content": { + "type": "SYMBOL", + "name": "_start_tag_name" + }, + "named": true, + "value": "tag_name" + }, + { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "attribute" + } + }, + { + "type": "STRING", + "value": ">" + } + ] + }, + "script_start_tag": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "<" + }, + { + "type": "ALIAS", + "content": { + "type": "SYMBOL", + "name": "_script_start_tag_name" + }, + "named": true, + "value": "tag_name" + }, + { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "attribute" + } + }, + { + "type": "STRING", + "value": ">" + } + ] + }, + "style_start_tag": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "<" + }, + { + "type": "ALIAS", + "content": { + "type": "SYMBOL", + "name": "_style_start_tag_name" + }, + "named": true, + "value": "tag_name" + }, + { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "attribute" + } + }, + { + "type": "STRING", + "value": ">" + } + ] + }, + "self_closing_tag": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "<" + }, + { + "type": "ALIAS", + "content": { + "type": "SYMBOL", + "name": "_start_tag_name" + }, + "named": true, + "value": "tag_name" + }, + { + "type": "REPEAT", + "content": { + "type": "SYMBOL", + "name": "attribute" + } + }, + { + "type": "STRING", + "value": "/>" + } + ] + }, + "end_tag": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "" + } + ] + }, + "erroneous_end_tag": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "" + } + ] + }, + "attribute": { + "type": "SEQ", + "members": [ + { + "type": "SYMBOL", + "name": "attribute_name" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "=" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "SYMBOL", + "name": "attribute_value" + }, + { + "type": "SYMBOL", + "name": "quoted_attribute_value" + } + ] + } + ] + }, + { + "type": "BLANK" + } + ] + } + ] + }, + "attribute_name": { + "type": "PATTERN", + "value": "[^<>\"'/=\\s]+" + }, + "attribute_value": { + "type": "PATTERN", + "value": "[^<>\"'=\\s]+" + }, + "entity": { + "type": "PATTERN", + "value": "&(#([xX][0-9a-fA-F]{1,6}|[0-9]{1,5})|[A-Za-z]{1,30});?" + }, + "quoted_attribute_value": { + "type": "CHOICE", + "members": [ + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "'" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "ALIAS", + "content": { + "type": "PATTERN", + "value": "[^']+" + }, + "named": true, + "value": "attribute_value" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "STRING", + "value": "'" + } + ] + }, + { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "\"" + }, + { + "type": "CHOICE", + "members": [ + { + "type": "ALIAS", + "content": { + "type": "PATTERN", + "value": "[^\"]+" + }, + "named": true, + "value": "attribute_value" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "STRING", + "value": "\"" + } + ] + } + ] + }, + "text": { + "type": "PATTERN", + "value": "[^<>&\\s]([^<>&]*[^<>&\\s])?" + } + }, + "extras": [ + { + "type": "SYMBOL", + "name": "comment" + }, + { + "type": "PATTERN", + "value": "\\s+" + } + ], + "conflicts": [], + "precedences": [], + "externals": [ + { + "type": "SYMBOL", + "name": "_start_tag_name" + }, + { + "type": "SYMBOL", + "name": "_script_start_tag_name" + }, + { + "type": "SYMBOL", + "name": "_style_start_tag_name" + }, + { + "type": "SYMBOL", + "name": "_end_tag_name" + }, + { + "type": "SYMBOL", + "name": "erroneous_end_tag_name" + }, + { + "type": "STRING", + "value": "/>" + }, + { + "type": "SYMBOL", + "name": "_implicit_end_tag" + }, + { + "type": "SYMBOL", + "name": "raw_text" + }, + { + "type": "SYMBOL", + "name": "comment" + } + ], + "inline": [], + "supertypes": [] +} diff --git a/test-parsers/tree-sitter-html/src/node-types.json b/test-parsers/tree-sitter-html/src/node-types.json new file mode 100644 index 00000000..07fc3750 --- /dev/null +++ b/test-parsers/tree-sitter-html/src/node-types.json @@ -0,0 +1,317 @@ +[ + { + "type": "attribute", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "attribute_name", + "named": true + }, + { + "type": "attribute_value", + "named": true + }, + { + "type": "quoted_attribute_value", + "named": true + } + ] + } + }, + { + "type": "doctype", + "named": true, + "fields": {} + }, + { + "type": "document", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": false, + "types": [ + { + "type": "doctype", + "named": true + }, + { + "type": "element", + "named": true + }, + { + "type": "entity", + "named": true + }, + { + "type": "erroneous_end_tag", + "named": true + }, + { + "type": "script_element", + "named": true + }, + { + "type": "style_element", + "named": true + }, + { + "type": "text", + "named": true + } + ] + } + }, + { + "type": "element", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "doctype", + "named": true + }, + { + "type": "element", + "named": true + }, + { + "type": "end_tag", + "named": true + }, + { + "type": "entity", + "named": true + }, + { + "type": "erroneous_end_tag", + "named": true + }, + { + "type": "script_element", + "named": true + }, + { + "type": "self_closing_tag", + "named": true + }, + { + "type": "start_tag", + "named": true + }, + { + "type": "style_element", + "named": true + }, + { + "type": "text", + "named": true + } + ] + } + }, + { + "type": "end_tag", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "tag_name", + "named": true + } + ] + } + }, + { + "type": "erroneous_end_tag", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": true, + "types": [ + { + "type": "erroneous_end_tag_name", + "named": true + } + ] + } + }, + { + "type": "quoted_attribute_value", + "named": true, + "fields": {}, + "children": { + "multiple": false, + "required": false, + "types": [ + { + "type": "attribute_value", + "named": true + } + ] + } + }, + { + "type": "script_element", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "end_tag", + "named": true + }, + { + "type": "raw_text", + "named": true + }, + { + "type": "start_tag", + "named": true + } + ] + } + }, + { + "type": "self_closing_tag", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "attribute", + "named": true + }, + { + "type": "tag_name", + "named": true + } + ] + } + }, + { + "type": "start_tag", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "attribute", + "named": true + }, + { + "type": "tag_name", + "named": true + } + ] + } + }, + { + "type": "style_element", + "named": true, + "fields": {}, + "children": { + "multiple": true, + "required": true, + "types": [ + { + "type": "end_tag", + "named": true + }, + { + "type": "raw_text", + "named": true + }, + { + "type": "start_tag", + "named": true + } + ] + } + }, + { + "type": "\"", + "named": false + }, + { + "type": "'", + "named": false + }, + { + "type": "/>", + "named": false + }, + { + "type": "<", + "named": false + }, + { + "type": "", + "named": false + }, + { + "type": "attribute_name", + "named": true + }, + { + "type": "attribute_value", + "named": true + }, + { + "type": "comment", + "named": true + }, + { + "type": "doctype", + "named": false + }, + { + "type": "entity", + "named": true + }, + { + "type": "erroneous_end_tag_name", + "named": true + }, + { + "type": "raw_text", + "named": true + }, + { + "type": "tag_name", + "named": true + }, + { + "type": "text", + "named": true + } +] \ No newline at end of file diff --git a/test-parsers/tree-sitter-html/src/parser.c b/test-parsers/tree-sitter-html/src/parser.c new file mode 100644 index 00000000..a342e0c9 --- /dev/null +++ b/test-parsers/tree-sitter-html/src/parser.c @@ -0,0 +1,2300 @@ +#include "tree_sitter/parser.h" + +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#define LANGUAGE_VERSION 14 +#define STATE_COUNT 94 +#define LARGE_STATE_COUNT 2 +#define SYMBOL_COUNT 41 +#define ALIAS_COUNT 0 +#define TOKEN_COUNT 25 +#define EXTERNAL_TOKEN_COUNT 9 +#define FIELD_COUNT 0 +#define MAX_ALIAS_SEQUENCE_LENGTH 4 +#define PRODUCTION_ID_COUNT 1 + +enum ts_symbol_identifiers { + anon_sym_LT_BANG = 1, + aux_sym_doctype_token1 = 2, + anon_sym_GT = 3, + sym__doctype = 4, + anon_sym_LT = 5, + anon_sym_SLASH_GT = 6, + anon_sym_LT_SLASH = 7, + anon_sym_EQ = 8, + sym_attribute_name = 9, + sym_attribute_value = 10, + sym_entity = 11, + anon_sym_SQUOTE = 12, + aux_sym_quoted_attribute_value_token1 = 13, + anon_sym_DQUOTE = 14, + aux_sym_quoted_attribute_value_token2 = 15, + sym_text = 16, + sym__start_tag_name = 17, + sym__script_start_tag_name = 18, + sym__style_start_tag_name = 19, + sym__end_tag_name = 20, + sym_erroneous_end_tag_name = 21, + sym__implicit_end_tag = 22, + sym_raw_text = 23, + sym_comment = 24, + sym_document = 25, + sym_doctype = 26, + sym__node = 27, + sym_element = 28, + sym_script_element = 29, + sym_style_element = 30, + sym_start_tag = 31, + sym_script_start_tag = 32, + sym_style_start_tag = 33, + sym_self_closing_tag = 34, + sym_end_tag = 35, + sym_erroneous_end_tag = 36, + sym_attribute = 37, + sym_quoted_attribute_value = 38, + aux_sym_document_repeat1 = 39, + aux_sym_start_tag_repeat1 = 40, +}; + +static const char * const ts_symbol_names[] = { + [ts_builtin_sym_end] = "end", + [anon_sym_LT_BANG] = "", + [sym__doctype] = "doctype", + [anon_sym_LT] = "<", + [anon_sym_SLASH_GT] = "/>", + [anon_sym_LT_SLASH] = "eof(lexer); + switch (state) { + case 0: + if (eof) ADVANCE(18); + ADVANCE_MAP( + '"', 73, + '&', 3, + '\'', 70, + '/', 6, + '<', 24, + '=', 27, + '>', 22, + 'D', 9, + 'd', 9, + ); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') SKIP(0); + END_STATE(); + case 1: + if (lookahead == '"') ADVANCE(73); + if (lookahead == '\'') ADVANCE(70); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') SKIP(1); + if (lookahead != 0 && + (lookahead < '<' || '>' < lookahead)) ADVANCE(29); + END_STATE(); + case 2: + if (lookahead == '"') ADVANCE(73); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(74); + if (lookahead != 0) ADVANCE(75); + END_STATE(); + case 3: + if (lookahead == '#') ADVANCE(12); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(69); + END_STATE(); + case 4: + if (lookahead == '\'') ADVANCE(70); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(71); + if (lookahead != 0) ADVANCE(72); + END_STATE(); + case 5: + if (lookahead == '/') ADVANCE(6); + if (lookahead == '=') ADVANCE(27); + if (lookahead == '>') ADVANCE(22); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') SKIP(5); + if (lookahead != 0 && + lookahead != '"' && + lookahead != '\'' && + (lookahead < '<' || '>' < lookahead)) ADVANCE(28); + END_STATE(); + case 6: + if (lookahead == '>') ADVANCE(25); + END_STATE(); + case 7: + if (lookahead == 'C' || + lookahead == 'c') ADVANCE(11); + END_STATE(); + case 8: + if (lookahead == 'E' || + lookahead == 'e') ADVANCE(23); + END_STATE(); + case 9: + if (lookahead == 'O' || + lookahead == 'o') ADVANCE(7); + END_STATE(); + case 10: + if (lookahead == 'P' || + lookahead == 'p') ADVANCE(8); + END_STATE(); + case 11: + if (lookahead == 'T' || + lookahead == 't') ADVANCE(13); + END_STATE(); + case 12: + if (lookahead == 'X' || + lookahead == 'x') ADVANCE(16); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(35); + END_STATE(); + case 13: + if (lookahead == 'Y' || + lookahead == 'y') ADVANCE(10); + END_STATE(); + case 14: + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(14); + if (lookahead != 0 && + lookahead != '&' && + lookahead != '<' && + lookahead != '>') ADVANCE(76); + END_STATE(); + case 15: + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(20); + if (lookahead != 0 && + lookahead != '>') ADVANCE(21); + END_STATE(); + case 16: + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(40); + END_STATE(); + case 17: + if (eof) ADVANCE(18); + if (lookahead == '&') ADVANCE(3); + if (lookahead == '<') ADVANCE(24); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') SKIP(17); + if (lookahead != 0 && + lookahead != '>') ADVANCE(76); + END_STATE(); + case 18: + ACCEPT_TOKEN(ts_builtin_sym_end); + END_STATE(); + case 19: + ACCEPT_TOKEN(anon_sym_LT_BANG); + END_STATE(); + case 20: + ACCEPT_TOKEN(aux_sym_doctype_token1); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(20); + if (lookahead != 0 && + lookahead != '>') ADVANCE(21); + END_STATE(); + case 21: + ACCEPT_TOKEN(aux_sym_doctype_token1); + if (lookahead != 0 && + lookahead != '>') ADVANCE(21); + END_STATE(); + case 22: + ACCEPT_TOKEN(anon_sym_GT); + END_STATE(); + case 23: + ACCEPT_TOKEN(sym__doctype); + END_STATE(); + case 24: + ACCEPT_TOKEN(anon_sym_LT); + if (lookahead == '!') ADVANCE(19); + if (lookahead == '/') ADVANCE(26); + END_STATE(); + case 25: + ACCEPT_TOKEN(anon_sym_SLASH_GT); + END_STATE(); + case 26: + ACCEPT_TOKEN(anon_sym_LT_SLASH); + END_STATE(); + case 27: + ACCEPT_TOKEN(anon_sym_EQ); + END_STATE(); + case 28: + ACCEPT_TOKEN(sym_attribute_name); + if (lookahead != 0 && + (lookahead < '\t' || '\r' < lookahead) && + lookahead != ' ' && + lookahead != '"' && + lookahead != '\'' && + lookahead != '/' && + (lookahead < '<' || '>' < lookahead)) ADVANCE(28); + END_STATE(); + case 29: + ACCEPT_TOKEN(sym_attribute_value); + if (lookahead != 0 && + (lookahead < '\t' || '\r' < lookahead) && + lookahead != ' ' && + lookahead != '"' && + lookahead != '\'' && + (lookahead < '<' || '>' < lookahead)) ADVANCE(29); + END_STATE(); + case 30: + ACCEPT_TOKEN(sym_entity); + END_STATE(); + case 31: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + END_STATE(); + case 32: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(31); + END_STATE(); + case 33: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(32); + END_STATE(); + case 34: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(33); + END_STATE(); + case 35: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9')) ADVANCE(34); + END_STATE(); + case 36: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(31); + END_STATE(); + case 37: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(36); + END_STATE(); + case 38: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(37); + END_STATE(); + case 39: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(38); + END_STATE(); + case 40: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('0' <= lookahead && lookahead <= '9') || + ('A' <= lookahead && lookahead <= 'F') || + ('a' <= lookahead && lookahead <= 'f')) ADVANCE(39); + END_STATE(); + case 41: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(31); + END_STATE(); + case 42: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(41); + END_STATE(); + case 43: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(42); + END_STATE(); + case 44: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(43); + END_STATE(); + case 45: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(44); + END_STATE(); + case 46: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(45); + END_STATE(); + case 47: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(46); + END_STATE(); + case 48: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(47); + END_STATE(); + case 49: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(48); + END_STATE(); + case 50: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(49); + END_STATE(); + case 51: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(50); + END_STATE(); + case 52: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(51); + END_STATE(); + case 53: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(52); + END_STATE(); + case 54: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(53); + END_STATE(); + case 55: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(54); + END_STATE(); + case 56: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(55); + END_STATE(); + case 57: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(56); + END_STATE(); + case 58: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(57); + END_STATE(); + case 59: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(58); + END_STATE(); + case 60: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(59); + END_STATE(); + case 61: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(60); + END_STATE(); + case 62: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(61); + END_STATE(); + case 63: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(62); + END_STATE(); + case 64: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(63); + END_STATE(); + case 65: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(64); + END_STATE(); + case 66: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(65); + END_STATE(); + case 67: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(66); + END_STATE(); + case 68: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(67); + END_STATE(); + case 69: + ACCEPT_TOKEN(sym_entity); + if (lookahead == ';') ADVANCE(30); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) ADVANCE(68); + END_STATE(); + case 70: + ACCEPT_TOKEN(anon_sym_SQUOTE); + END_STATE(); + case 71: + ACCEPT_TOKEN(aux_sym_quoted_attribute_value_token1); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(71); + if (lookahead != 0 && + lookahead != '\'') ADVANCE(72); + END_STATE(); + case 72: + ACCEPT_TOKEN(aux_sym_quoted_attribute_value_token1); + if (lookahead != 0 && + lookahead != '\'') ADVANCE(72); + END_STATE(); + case 73: + ACCEPT_TOKEN(anon_sym_DQUOTE); + END_STATE(); + case 74: + ACCEPT_TOKEN(aux_sym_quoted_attribute_value_token2); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(74); + if (lookahead != 0 && + lookahead != '"') ADVANCE(75); + END_STATE(); + case 75: + ACCEPT_TOKEN(aux_sym_quoted_attribute_value_token2); + if (lookahead != 0 && + lookahead != '"') ADVANCE(75); + END_STATE(); + case 76: + ACCEPT_TOKEN(sym_text); + if (('\t' <= lookahead && lookahead <= '\r') || + lookahead == ' ') ADVANCE(14); + if (lookahead != 0 && + lookahead != '&' && + lookahead != '<' && + lookahead != '>') ADVANCE(76); + END_STATE(); + default: + return false; + } +} + +static const TSLexMode ts_lex_modes[STATE_COUNT] = { + [0] = {.lex_state = 0, .external_lex_state = 1}, + [1] = {.lex_state = 17, .external_lex_state = 2}, + [2] = {.lex_state = 17, .external_lex_state = 3}, + [3] = {.lex_state = 17, .external_lex_state = 3}, + [4] = {.lex_state = 17, .external_lex_state = 3}, + [5] = {.lex_state = 17, .external_lex_state = 3}, + [6] = {.lex_state = 17, .external_lex_state = 3}, + [7] = {.lex_state = 17, .external_lex_state = 2}, + [8] = {.lex_state = 17, .external_lex_state = 2}, + [9] = {.lex_state = 17, .external_lex_state = 3}, + [10] = {.lex_state = 17, .external_lex_state = 2}, + [11] = {.lex_state = 17, .external_lex_state = 3}, + [12] = {.lex_state = 17, .external_lex_state = 2}, + [13] = {.lex_state = 17, .external_lex_state = 2}, + [14] = {.lex_state = 17, .external_lex_state = 2}, + [15] = {.lex_state = 17, .external_lex_state = 2}, + [16] = {.lex_state = 17, .external_lex_state = 2}, + [17] = {.lex_state = 17, .external_lex_state = 2}, + [18] = {.lex_state = 17, .external_lex_state = 3}, + [19] = {.lex_state = 17, .external_lex_state = 2}, + [20] = {.lex_state = 17, .external_lex_state = 2}, + [21] = {.lex_state = 17, .external_lex_state = 3}, + [22] = {.lex_state = 17, .external_lex_state = 3}, + [23] = {.lex_state = 17, .external_lex_state = 3}, + [24] = {.lex_state = 17, .external_lex_state = 3}, + [25] = {.lex_state = 17, .external_lex_state = 3}, + [26] = {.lex_state = 17, .external_lex_state = 3}, + [27] = {.lex_state = 17, .external_lex_state = 3}, + [28] = {.lex_state = 17, .external_lex_state = 3}, + [29] = {.lex_state = 17, .external_lex_state = 3}, + [30] = {.lex_state = 17, .external_lex_state = 3}, + [31] = {.lex_state = 17, .external_lex_state = 3}, + [32] = {.lex_state = 17, .external_lex_state = 2}, + [33] = {.lex_state = 17, .external_lex_state = 2}, + [34] = {.lex_state = 17, .external_lex_state = 2}, + [35] = {.lex_state = 5, .external_lex_state = 4}, + [36] = {.lex_state = 5, .external_lex_state = 4}, + [37] = {.lex_state = 5, .external_lex_state = 4}, + [38] = {.lex_state = 5, .external_lex_state = 4}, + [39] = {.lex_state = 5, .external_lex_state = 4}, + [40] = {.lex_state = 5, .external_lex_state = 2}, + [41] = {.lex_state = 5, .external_lex_state = 2}, + [42] = {.lex_state = 5, .external_lex_state = 2}, + [43] = {.lex_state = 5, .external_lex_state = 2}, + [44] = {.lex_state = 5, .external_lex_state = 2}, + [45] = {.lex_state = 5, .external_lex_state = 4}, + [46] = {.lex_state = 1, .external_lex_state = 2}, + [47] = {.lex_state = 1, .external_lex_state = 2}, + [48] = {.lex_state = 0, .external_lex_state = 5}, + [49] = {.lex_state = 5, .external_lex_state = 4}, + [50] = {.lex_state = 5, .external_lex_state = 4}, + [51] = {.lex_state = 5, .external_lex_state = 2}, + [52] = {.lex_state = 0, .external_lex_state = 6}, + [53] = {.lex_state = 0, .external_lex_state = 5}, + [54] = {.lex_state = 0, .external_lex_state = 5}, + [55] = {.lex_state = 0, .external_lex_state = 5}, + [56] = {.lex_state = 5, .external_lex_state = 4}, + [57] = {.lex_state = 0, .external_lex_state = 6}, + [58] = {.lex_state = 5, .external_lex_state = 2}, + [59] = {.lex_state = 0, .external_lex_state = 7}, + [60] = {.lex_state = 0, .external_lex_state = 2}, + [61] = {.lex_state = 0, .external_lex_state = 5}, + [62] = {.lex_state = 2, .external_lex_state = 2}, + [63] = {.lex_state = 5, .external_lex_state = 2}, + [64] = {.lex_state = 5, .external_lex_state = 2}, + [65] = {.lex_state = 0, .external_lex_state = 5}, + [66] = {.lex_state = 0, .external_lex_state = 5}, + [67] = {.lex_state = 0, .external_lex_state = 5}, + [68] = {.lex_state = 0, .external_lex_state = 7}, + [69] = {.lex_state = 0, .external_lex_state = 2}, + [70] = {.lex_state = 0, .external_lex_state = 2}, + [71] = {.lex_state = 0, .external_lex_state = 2}, + [72] = {.lex_state = 4, .external_lex_state = 2}, + [73] = {.lex_state = 4, .external_lex_state = 2}, + [74] = {.lex_state = 2, .external_lex_state = 2}, + [75] = {.lex_state = 0, .external_lex_state = 2}, + [76] = {.lex_state = 0, .external_lex_state = 2}, + [77] = {.lex_state = 0, .external_lex_state = 2}, + [78] = {.lex_state = 0, .external_lex_state = 2}, + [79] = {.lex_state = 0, .external_lex_state = 2}, + [80] = {.lex_state = 0, .external_lex_state = 2}, + [81] = {.lex_state = 15, .external_lex_state = 2}, + [82] = {.lex_state = 0, .external_lex_state = 2}, + [83] = {.lex_state = 0, .external_lex_state = 2}, + [84] = {.lex_state = 0, .external_lex_state = 2}, + [85] = {.lex_state = 0, .external_lex_state = 8}, + [86] = {.lex_state = 0, .external_lex_state = 2}, + [87] = {.lex_state = 0, .external_lex_state = 2}, + [88] = {.lex_state = 0, .external_lex_state = 9}, + [89] = {.lex_state = 0, .external_lex_state = 8}, + [90] = {.lex_state = 15, .external_lex_state = 2}, + [91] = {.lex_state = 0, .external_lex_state = 2}, + [92] = {.lex_state = 0, .external_lex_state = 9}, + [93] = {.lex_state = 0, .external_lex_state = 2}, +}; + +static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = { + [0] = { + [ts_builtin_sym_end] = ACTIONS(1), + [anon_sym_LT_BANG] = ACTIONS(1), + [anon_sym_GT] = ACTIONS(1), + [sym__doctype] = ACTIONS(1), + [anon_sym_LT] = ACTIONS(1), + [anon_sym_SLASH_GT] = ACTIONS(1), + [anon_sym_LT_SLASH] = ACTIONS(1), + [anon_sym_EQ] = ACTIONS(1), + [sym_entity] = ACTIONS(1), + [anon_sym_SQUOTE] = ACTIONS(1), + [anon_sym_DQUOTE] = ACTIONS(1), + [sym__start_tag_name] = ACTIONS(1), + [sym__script_start_tag_name] = ACTIONS(1), + [sym__style_start_tag_name] = ACTIONS(1), + [sym__end_tag_name] = ACTIONS(1), + [sym_erroneous_end_tag_name] = ACTIONS(1), + [sym__implicit_end_tag] = ACTIONS(1), + [sym_raw_text] = ACTIONS(1), + [sym_comment] = ACTIONS(3), + }, + [1] = { + [sym_document] = STATE(83), + [sym_doctype] = STATE(7), + [sym__node] = STATE(7), + [sym_element] = STATE(7), + [sym_script_element] = STATE(7), + [sym_style_element] = STATE(7), + [sym_start_tag] = STATE(2), + [sym_script_start_tag] = STATE(48), + [sym_style_start_tag] = STATE(53), + [sym_self_closing_tag] = STATE(33), + [sym_erroneous_end_tag] = STATE(7), + [aux_sym_document_repeat1] = STATE(7), + [ts_builtin_sym_end] = ACTIONS(5), + [anon_sym_LT_BANG] = ACTIONS(7), + [anon_sym_LT] = ACTIONS(9), + [anon_sym_LT_SLASH] = ACTIONS(11), + [sym_entity] = ACTIONS(13), + [sym_text] = ACTIONS(13), + [sym_comment] = ACTIONS(3), + }, +}; + +static const uint16_t ts_small_parse_table[] = { + [0] = 12, + ACTIONS(3), 1, + sym_comment, + ACTIONS(15), 1, + anon_sym_LT_BANG, + ACTIONS(17), 1, + anon_sym_LT, + ACTIONS(19), 1, + anon_sym_LT_SLASH, + ACTIONS(23), 1, + sym__implicit_end_tag, + STATE(5), 1, + sym_start_tag, + STATE(21), 1, + sym_self_closing_tag, + STATE(32), 1, + sym_end_tag, + STATE(54), 1, + sym_script_start_tag, + STATE(55), 1, + sym_style_start_tag, + ACTIONS(21), 2, + sym_entity, + sym_text, + STATE(3), 7, + sym_doctype, + sym__node, + sym_element, + sym_script_element, + sym_style_element, + sym_erroneous_end_tag, + aux_sym_document_repeat1, + [44] = 12, + ACTIONS(3), 1, + sym_comment, + ACTIONS(15), 1, + anon_sym_LT_BANG, + ACTIONS(17), 1, + anon_sym_LT, + ACTIONS(19), 1, + anon_sym_LT_SLASH, + ACTIONS(27), 1, + sym__implicit_end_tag, + STATE(5), 1, + sym_start_tag, + STATE(14), 1, + sym_end_tag, + STATE(21), 1, + sym_self_closing_tag, + STATE(54), 1, + sym_script_start_tag, + STATE(55), 1, + sym_style_start_tag, + ACTIONS(25), 2, + sym_entity, + sym_text, + STATE(6), 7, + sym_doctype, + sym__node, + sym_element, + sym_script_element, + sym_style_element, + sym_erroneous_end_tag, + aux_sym_document_repeat1, + [88] = 12, + ACTIONS(3), 1, + sym_comment, + ACTIONS(15), 1, + anon_sym_LT_BANG, + ACTIONS(17), 1, + anon_sym_LT, + ACTIONS(29), 1, + anon_sym_LT_SLASH, + ACTIONS(31), 1, + sym__implicit_end_tag, + STATE(5), 1, + sym_start_tag, + STATE(21), 1, + sym_self_closing_tag, + STATE(27), 1, + sym_end_tag, + STATE(54), 1, + sym_script_start_tag, + STATE(55), 1, + sym_style_start_tag, + ACTIONS(25), 2, + sym_entity, + sym_text, + STATE(6), 7, + sym_doctype, + sym__node, + sym_element, + sym_script_element, + sym_style_element, + sym_erroneous_end_tag, + aux_sym_document_repeat1, + [132] = 12, + ACTIONS(3), 1, + sym_comment, + ACTIONS(15), 1, + anon_sym_LT_BANG, + ACTIONS(17), 1, + anon_sym_LT, + ACTIONS(29), 1, + anon_sym_LT_SLASH, + ACTIONS(35), 1, + sym__implicit_end_tag, + STATE(5), 1, + sym_start_tag, + STATE(21), 1, + sym_self_closing_tag, + STATE(22), 1, + sym_end_tag, + STATE(54), 1, + sym_script_start_tag, + STATE(55), 1, + sym_style_start_tag, + ACTIONS(33), 2, + sym_entity, + sym_text, + STATE(4), 7, + sym_doctype, + sym__node, + sym_element, + sym_script_element, + sym_style_element, + sym_erroneous_end_tag, + aux_sym_document_repeat1, + [176] = 11, + ACTIONS(3), 1, + sym_comment, + ACTIONS(37), 1, + anon_sym_LT_BANG, + ACTIONS(40), 1, + anon_sym_LT, + ACTIONS(43), 1, + anon_sym_LT_SLASH, + ACTIONS(49), 1, + sym__implicit_end_tag, + STATE(5), 1, + sym_start_tag, + STATE(21), 1, + sym_self_closing_tag, + STATE(54), 1, + sym_script_start_tag, + STATE(55), 1, + sym_style_start_tag, + ACTIONS(46), 2, + sym_entity, + sym_text, + STATE(6), 7, + sym_doctype, + sym__node, + sym_element, + sym_script_element, + sym_style_element, + sym_erroneous_end_tag, + aux_sym_document_repeat1, + [217] = 11, + ACTIONS(3), 1, + sym_comment, + ACTIONS(7), 1, + anon_sym_LT_BANG, + ACTIONS(9), 1, + anon_sym_LT, + ACTIONS(11), 1, + anon_sym_LT_SLASH, + ACTIONS(51), 1, + ts_builtin_sym_end, + STATE(2), 1, + sym_start_tag, + STATE(33), 1, + sym_self_closing_tag, + STATE(48), 1, + sym_script_start_tag, + STATE(53), 1, + sym_style_start_tag, + ACTIONS(53), 2, + sym_entity, + sym_text, + STATE(8), 7, + sym_doctype, + sym__node, + sym_element, + sym_script_element, + sym_style_element, + sym_erroneous_end_tag, + aux_sym_document_repeat1, + [258] = 11, + ACTIONS(3), 1, + sym_comment, + ACTIONS(49), 1, + ts_builtin_sym_end, + ACTIONS(55), 1, + anon_sym_LT_BANG, + ACTIONS(58), 1, + anon_sym_LT, + ACTIONS(61), 1, + anon_sym_LT_SLASH, + STATE(2), 1, + sym_start_tag, + STATE(33), 1, + sym_self_closing_tag, + STATE(48), 1, + sym_script_start_tag, + STATE(53), 1, + sym_style_start_tag, + ACTIONS(64), 2, + sym_entity, + sym_text, + STATE(8), 7, + sym_doctype, + sym__node, + sym_element, + sym_script_element, + sym_style_element, + sym_erroneous_end_tag, + aux_sym_document_repeat1, + [299] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(69), 1, + anon_sym_LT, + ACTIONS(67), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [313] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(73), 1, + anon_sym_LT, + ACTIONS(71), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [327] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(77), 1, + anon_sym_LT, + ACTIONS(75), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [341] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(81), 1, + anon_sym_LT, + ACTIONS(79), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [355] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(85), 1, + anon_sym_LT, + ACTIONS(83), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [369] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(89), 1, + anon_sym_LT, + ACTIONS(87), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [383] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(93), 1, + anon_sym_LT, + ACTIONS(91), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [397] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(97), 1, + anon_sym_LT, + ACTIONS(95), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [411] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(101), 1, + anon_sym_LT, + ACTIONS(99), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [425] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(105), 1, + anon_sym_LT, + ACTIONS(103), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [439] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(109), 1, + anon_sym_LT, + ACTIONS(107), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [453] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(69), 1, + anon_sym_LT, + ACTIONS(67), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [467] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(113), 1, + anon_sym_LT, + ACTIONS(111), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [481] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(117), 1, + anon_sym_LT, + ACTIONS(115), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [495] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(109), 1, + anon_sym_LT, + ACTIONS(107), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [509] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(73), 1, + anon_sym_LT, + ACTIONS(71), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [523] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(81), 1, + anon_sym_LT, + ACTIONS(79), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [537] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(85), 1, + anon_sym_LT, + ACTIONS(83), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [551] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(89), 1, + anon_sym_LT, + ACTIONS(87), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [565] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(93), 1, + anon_sym_LT, + ACTIONS(91), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [579] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(97), 1, + anon_sym_LT, + ACTIONS(95), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [593] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(101), 1, + anon_sym_LT, + ACTIONS(99), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [607] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(121), 1, + anon_sym_LT, + ACTIONS(119), 5, + sym__implicit_end_tag, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [621] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(117), 1, + anon_sym_LT, + ACTIONS(115), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [635] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(113), 1, + anon_sym_LT, + ACTIONS(111), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [649] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(121), 1, + anon_sym_LT, + ACTIONS(119), 5, + ts_builtin_sym_end, + anon_sym_LT_BANG, + anon_sym_LT_SLASH, + sym_entity, + sym_text, + [663] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(125), 1, + sym_attribute_name, + ACTIONS(123), 2, + anon_sym_GT, + anon_sym_SLASH_GT, + STATE(35), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [678] = 5, + ACTIONS(3), 1, + sym_comment, + ACTIONS(128), 1, + anon_sym_GT, + ACTIONS(130), 1, + anon_sym_SLASH_GT, + ACTIONS(132), 1, + sym_attribute_name, + STATE(35), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [695] = 5, + ACTIONS(3), 1, + sym_comment, + ACTIONS(132), 1, + sym_attribute_name, + ACTIONS(134), 1, + anon_sym_GT, + ACTIONS(136), 1, + anon_sym_SLASH_GT, + STATE(38), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [712] = 5, + ACTIONS(3), 1, + sym_comment, + ACTIONS(128), 1, + anon_sym_GT, + ACTIONS(132), 1, + sym_attribute_name, + ACTIONS(138), 1, + anon_sym_SLASH_GT, + STATE(35), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [729] = 5, + ACTIONS(3), 1, + sym_comment, + ACTIONS(132), 1, + sym_attribute_name, + ACTIONS(134), 1, + anon_sym_GT, + ACTIONS(140), 1, + anon_sym_SLASH_GT, + STATE(36), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [746] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(142), 1, + anon_sym_GT, + ACTIONS(144), 1, + sym_attribute_name, + STATE(41), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [760] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(123), 1, + anon_sym_GT, + ACTIONS(146), 1, + sym_attribute_name, + STATE(41), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [774] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(144), 1, + sym_attribute_name, + ACTIONS(149), 1, + anon_sym_GT, + STATE(40), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [788] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(144), 1, + sym_attribute_name, + ACTIONS(151), 1, + anon_sym_GT, + STATE(44), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [802] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(144), 1, + sym_attribute_name, + ACTIONS(153), 1, + anon_sym_GT, + STATE(41), 2, + sym_attribute, + aux_sym_start_tag_repeat1, + [816] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(157), 1, + anon_sym_EQ, + ACTIONS(155), 3, + anon_sym_GT, + anon_sym_SLASH_GT, + sym_attribute_name, + [828] = 5, + ACTIONS(3), 1, + sym_comment, + ACTIONS(159), 1, + sym_attribute_value, + ACTIONS(161), 1, + anon_sym_SQUOTE, + ACTIONS(163), 1, + anon_sym_DQUOTE, + STATE(56), 1, + sym_quoted_attribute_value, + [844] = 5, + ACTIONS(3), 1, + sym_comment, + ACTIONS(165), 1, + sym_attribute_value, + ACTIONS(167), 1, + anon_sym_SQUOTE, + ACTIONS(169), 1, + anon_sym_DQUOTE, + STATE(58), 1, + sym_quoted_attribute_value, + [860] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(171), 1, + anon_sym_LT_SLASH, + ACTIONS(173), 1, + sym_raw_text, + STATE(19), 1, + sym_end_tag, + [873] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(175), 3, + anon_sym_GT, + anon_sym_SLASH_GT, + sym_attribute_name, + [882] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(177), 3, + anon_sym_GT, + anon_sym_SLASH_GT, + sym_attribute_name, + [891] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(179), 1, + anon_sym_EQ, + ACTIONS(155), 2, + anon_sym_GT, + sym_attribute_name, + [902] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(181), 1, + sym__start_tag_name, + ACTIONS(183), 1, + sym__script_start_tag_name, + ACTIONS(185), 1, + sym__style_start_tag_name, + [915] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(171), 1, + anon_sym_LT_SLASH, + ACTIONS(187), 1, + sym_raw_text, + STATE(10), 1, + sym_end_tag, + [928] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(189), 1, + anon_sym_LT_SLASH, + ACTIONS(191), 1, + sym_raw_text, + STATE(23), 1, + sym_end_tag, + [941] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(189), 1, + anon_sym_LT_SLASH, + ACTIONS(193), 1, + sym_raw_text, + STATE(24), 1, + sym_end_tag, + [954] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(195), 3, + anon_sym_GT, + anon_sym_SLASH_GT, + sym_attribute_name, + [963] = 4, + ACTIONS(3), 1, + sym_comment, + ACTIONS(183), 1, + sym__script_start_tag_name, + ACTIONS(185), 1, + sym__style_start_tag_name, + ACTIONS(197), 1, + sym__start_tag_name, + [976] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(195), 2, + anon_sym_GT, + sym_attribute_name, + [984] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(199), 1, + sym__end_tag_name, + ACTIONS(201), 1, + sym_erroneous_end_tag_name, + [994] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(171), 1, + anon_sym_LT_SLASH, + STATE(16), 1, + sym_end_tag, + [1004] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(203), 2, + sym_raw_text, + anon_sym_LT_SLASH, + [1012] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(205), 1, + anon_sym_DQUOTE, + ACTIONS(207), 1, + aux_sym_quoted_attribute_value_token2, + [1022] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(175), 2, + anon_sym_GT, + sym_attribute_name, + [1030] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(177), 2, + anon_sym_GT, + sym_attribute_name, + [1038] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(209), 2, + sym_raw_text, + anon_sym_LT_SLASH, + [1046] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(211), 2, + sym_raw_text, + anon_sym_LT_SLASH, + [1054] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(213), 2, + sym_raw_text, + anon_sym_LT_SLASH, + [1062] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(201), 1, + sym_erroneous_end_tag_name, + ACTIONS(215), 1, + sym__end_tag_name, + [1072] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(189), 1, + anon_sym_LT_SLASH, + STATE(28), 1, + sym_end_tag, + [1082] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(189), 1, + anon_sym_LT_SLASH, + STATE(29), 1, + sym_end_tag, + [1092] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(171), 1, + anon_sym_LT_SLASH, + STATE(15), 1, + sym_end_tag, + [1102] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(205), 1, + anon_sym_SQUOTE, + ACTIONS(217), 1, + aux_sym_quoted_attribute_value_token1, + [1112] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(219), 1, + anon_sym_SQUOTE, + ACTIONS(221), 1, + aux_sym_quoted_attribute_value_token1, + [1122] = 3, + ACTIONS(3), 1, + sym_comment, + ACTIONS(219), 1, + anon_sym_DQUOTE, + ACTIONS(223), 1, + aux_sym_quoted_attribute_value_token2, + [1132] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(225), 1, + anon_sym_DQUOTE, + [1139] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(227), 1, + anon_sym_DQUOTE, + [1146] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(229), 1, + sym__doctype, + [1153] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(227), 1, + anon_sym_SQUOTE, + [1160] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(231), 1, + anon_sym_GT, + [1167] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(233), 1, + anon_sym_GT, + [1174] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(235), 1, + aux_sym_doctype_token1, + [1181] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(237), 1, + anon_sym_GT, + [1188] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(239), 1, + ts_builtin_sym_end, + [1195] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(241), 1, + anon_sym_GT, + [1202] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(243), 1, + sym_erroneous_end_tag_name, + [1209] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(225), 1, + anon_sym_SQUOTE, + [1216] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(245), 1, + anon_sym_GT, + [1223] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(215), 1, + sym__end_tag_name, + [1230] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(201), 1, + sym_erroneous_end_tag_name, + [1237] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(247), 1, + aux_sym_doctype_token1, + [1244] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(249), 1, + anon_sym_GT, + [1251] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(199), 1, + sym__end_tag_name, + [1258] = 2, + ACTIONS(3), 1, + sym_comment, + ACTIONS(251), 1, + sym__doctype, +}; + +static const uint32_t ts_small_parse_table_map[] = { + [SMALL_STATE(2)] = 0, + [SMALL_STATE(3)] = 44, + [SMALL_STATE(4)] = 88, + [SMALL_STATE(5)] = 132, + [SMALL_STATE(6)] = 176, + [SMALL_STATE(7)] = 217, + [SMALL_STATE(8)] = 258, + [SMALL_STATE(9)] = 299, + [SMALL_STATE(10)] = 313, + [SMALL_STATE(11)] = 327, + [SMALL_STATE(12)] = 341, + [SMALL_STATE(13)] = 355, + [SMALL_STATE(14)] = 369, + [SMALL_STATE(15)] = 383, + [SMALL_STATE(16)] = 397, + [SMALL_STATE(17)] = 411, + [SMALL_STATE(18)] = 425, + [SMALL_STATE(19)] = 439, + [SMALL_STATE(20)] = 453, + [SMALL_STATE(21)] = 467, + [SMALL_STATE(22)] = 481, + [SMALL_STATE(23)] = 495, + [SMALL_STATE(24)] = 509, + [SMALL_STATE(25)] = 523, + [SMALL_STATE(26)] = 537, + [SMALL_STATE(27)] = 551, + [SMALL_STATE(28)] = 565, + [SMALL_STATE(29)] = 579, + [SMALL_STATE(30)] = 593, + [SMALL_STATE(31)] = 607, + [SMALL_STATE(32)] = 621, + [SMALL_STATE(33)] = 635, + [SMALL_STATE(34)] = 649, + [SMALL_STATE(35)] = 663, + [SMALL_STATE(36)] = 678, + [SMALL_STATE(37)] = 695, + [SMALL_STATE(38)] = 712, + [SMALL_STATE(39)] = 729, + [SMALL_STATE(40)] = 746, + [SMALL_STATE(41)] = 760, + [SMALL_STATE(42)] = 774, + [SMALL_STATE(43)] = 788, + [SMALL_STATE(44)] = 802, + [SMALL_STATE(45)] = 816, + [SMALL_STATE(46)] = 828, + [SMALL_STATE(47)] = 844, + [SMALL_STATE(48)] = 860, + [SMALL_STATE(49)] = 873, + [SMALL_STATE(50)] = 882, + [SMALL_STATE(51)] = 891, + [SMALL_STATE(52)] = 902, + [SMALL_STATE(53)] = 915, + [SMALL_STATE(54)] = 928, + [SMALL_STATE(55)] = 941, + [SMALL_STATE(56)] = 954, + [SMALL_STATE(57)] = 963, + [SMALL_STATE(58)] = 976, + [SMALL_STATE(59)] = 984, + [SMALL_STATE(60)] = 994, + [SMALL_STATE(61)] = 1004, + [SMALL_STATE(62)] = 1012, + [SMALL_STATE(63)] = 1022, + [SMALL_STATE(64)] = 1030, + [SMALL_STATE(65)] = 1038, + [SMALL_STATE(66)] = 1046, + [SMALL_STATE(67)] = 1054, + [SMALL_STATE(68)] = 1062, + [SMALL_STATE(69)] = 1072, + [SMALL_STATE(70)] = 1082, + [SMALL_STATE(71)] = 1092, + [SMALL_STATE(72)] = 1102, + [SMALL_STATE(73)] = 1112, + [SMALL_STATE(74)] = 1122, + [SMALL_STATE(75)] = 1132, + [SMALL_STATE(76)] = 1139, + [SMALL_STATE(77)] = 1146, + [SMALL_STATE(78)] = 1153, + [SMALL_STATE(79)] = 1160, + [SMALL_STATE(80)] = 1167, + [SMALL_STATE(81)] = 1174, + [SMALL_STATE(82)] = 1181, + [SMALL_STATE(83)] = 1188, + [SMALL_STATE(84)] = 1195, + [SMALL_STATE(85)] = 1202, + [SMALL_STATE(86)] = 1209, + [SMALL_STATE(87)] = 1216, + [SMALL_STATE(88)] = 1223, + [SMALL_STATE(89)] = 1230, + [SMALL_STATE(90)] = 1237, + [SMALL_STATE(91)] = 1244, + [SMALL_STATE(92)] = 1251, + [SMALL_STATE(93)] = 1258, +}; + +static const TSParseActionEntry ts_parse_actions[] = { + [0] = {.entry = {.count = 0, .reusable = false}}, + [1] = {.entry = {.count = 1, .reusable = false}}, RECOVER(), + [3] = {.entry = {.count = 1, .reusable = true}}, SHIFT_EXTRA(), + [5] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_document, 0, 0, 0), + [7] = {.entry = {.count = 1, .reusable = true}}, SHIFT(77), + [9] = {.entry = {.count = 1, .reusable = false}}, SHIFT(52), + [11] = {.entry = {.count = 1, .reusable = true}}, SHIFT(85), + [13] = {.entry = {.count = 1, .reusable = true}}, SHIFT(7), + [15] = {.entry = {.count = 1, .reusable = true}}, SHIFT(93), + [17] = {.entry = {.count = 1, .reusable = false}}, SHIFT(57), + [19] = {.entry = {.count = 1, .reusable = true}}, SHIFT(68), + [21] = {.entry = {.count = 1, .reusable = true}}, SHIFT(3), + [23] = {.entry = {.count = 1, .reusable = true}}, SHIFT(32), + [25] = {.entry = {.count = 1, .reusable = true}}, SHIFT(6), + [27] = {.entry = {.count = 1, .reusable = true}}, SHIFT(14), + [29] = {.entry = {.count = 1, .reusable = true}}, SHIFT(59), + [31] = {.entry = {.count = 1, .reusable = true}}, SHIFT(27), + [33] = {.entry = {.count = 1, .reusable = true}}, SHIFT(4), + [35] = {.entry = {.count = 1, .reusable = true}}, SHIFT(22), + [37] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_document_repeat1, 2, 0, 0), SHIFT_REPEAT(93), + [40] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_document_repeat1, 2, 0, 0), SHIFT_REPEAT(57), + [43] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_document_repeat1, 2, 0, 0), SHIFT_REPEAT(89), + [46] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_document_repeat1, 2, 0, 0), SHIFT_REPEAT(6), + [49] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_document_repeat1, 2, 0, 0), + [51] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_document, 1, 0, 0), + [53] = {.entry = {.count = 1, .reusable = true}}, SHIFT(8), + [55] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_document_repeat1, 2, 0, 0), SHIFT_REPEAT(77), + [58] = {.entry = {.count = 2, .reusable = false}}, REDUCE(aux_sym_document_repeat1, 2, 0, 0), SHIFT_REPEAT(52), + [61] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_document_repeat1, 2, 0, 0), SHIFT_REPEAT(85), + [64] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_document_repeat1, 2, 0, 0), SHIFT_REPEAT(8), + [67] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_end_tag, 3, 0, 0), + [69] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_end_tag, 3, 0, 0), + [71] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_element, 2, 0, 0), + [73] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_style_element, 2, 0, 0), + [75] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_start_tag, 3, 0, 0), + [77] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_start_tag, 3, 0, 0), + [79] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_self_closing_tag, 3, 0, 0), + [81] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_self_closing_tag, 3, 0, 0), + [83] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_erroneous_end_tag, 3, 0, 0), + [85] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_erroneous_end_tag, 3, 0, 0), + [87] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_element, 3, 0, 0), + [89] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_element, 3, 0, 0), + [91] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_element, 3, 0, 0), + [93] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_script_element, 3, 0, 0), + [95] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_element, 3, 0, 0), + [97] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_style_element, 3, 0, 0), + [99] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_doctype, 4, 0, 0), + [101] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_doctype, 4, 0, 0), + [103] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_start_tag, 4, 0, 0), + [105] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_start_tag, 4, 0, 0), + [107] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_element, 2, 0, 0), + [109] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_script_element, 2, 0, 0), + [111] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_element, 1, 0, 0), + [113] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_element, 1, 0, 0), + [115] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_element, 2, 0, 0), + [117] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_element, 2, 0, 0), + [119] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_self_closing_tag, 4, 0, 0), + [121] = {.entry = {.count = 1, .reusable = false}}, REDUCE(sym_self_closing_tag, 4, 0, 0), + [123] = {.entry = {.count = 1, .reusable = true}}, REDUCE(aux_sym_start_tag_repeat1, 2, 0, 0), + [125] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_start_tag_repeat1, 2, 0, 0), SHIFT_REPEAT(45), + [128] = {.entry = {.count = 1, .reusable = true}}, SHIFT(18), + [130] = {.entry = {.count = 1, .reusable = true}}, SHIFT(34), + [132] = {.entry = {.count = 1, .reusable = true}}, SHIFT(45), + [134] = {.entry = {.count = 1, .reusable = true}}, SHIFT(11), + [136] = {.entry = {.count = 1, .reusable = true}}, SHIFT(25), + [138] = {.entry = {.count = 1, .reusable = true}}, SHIFT(31), + [140] = {.entry = {.count = 1, .reusable = true}}, SHIFT(12), + [142] = {.entry = {.count = 1, .reusable = true}}, SHIFT(67), + [144] = {.entry = {.count = 1, .reusable = true}}, SHIFT(51), + [146] = {.entry = {.count = 2, .reusable = true}}, REDUCE(aux_sym_start_tag_repeat1, 2, 0, 0), SHIFT_REPEAT(51), + [149] = {.entry = {.count = 1, .reusable = true}}, SHIFT(66), + [151] = {.entry = {.count = 1, .reusable = true}}, SHIFT(61), + [153] = {.entry = {.count = 1, .reusable = true}}, SHIFT(65), + [155] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute, 1, 0, 0), + [157] = {.entry = {.count = 1, .reusable = true}}, SHIFT(46), + [159] = {.entry = {.count = 1, .reusable = true}}, SHIFT(56), + [161] = {.entry = {.count = 1, .reusable = true}}, SHIFT(72), + [163] = {.entry = {.count = 1, .reusable = true}}, SHIFT(62), + [165] = {.entry = {.count = 1, .reusable = true}}, SHIFT(58), + [167] = {.entry = {.count = 1, .reusable = true}}, SHIFT(73), + [169] = {.entry = {.count = 1, .reusable = true}}, SHIFT(74), + [171] = {.entry = {.count = 1, .reusable = true}}, SHIFT(88), + [173] = {.entry = {.count = 1, .reusable = true}}, SHIFT(71), + [175] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoted_attribute_value, 2, 0, 0), + [177] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_quoted_attribute_value, 3, 0, 0), + [179] = {.entry = {.count = 1, .reusable = true}}, SHIFT(47), + [181] = {.entry = {.count = 1, .reusable = true}}, SHIFT(39), + [183] = {.entry = {.count = 1, .reusable = true}}, SHIFT(43), + [185] = {.entry = {.count = 1, .reusable = true}}, SHIFT(42), + [187] = {.entry = {.count = 1, .reusable = true}}, SHIFT(60), + [189] = {.entry = {.count = 1, .reusable = true}}, SHIFT(92), + [191] = {.entry = {.count = 1, .reusable = true}}, SHIFT(69), + [193] = {.entry = {.count = 1, .reusable = true}}, SHIFT(70), + [195] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_attribute, 3, 0, 0), + [197] = {.entry = {.count = 1, .reusable = true}}, SHIFT(37), + [199] = {.entry = {.count = 1, .reusable = true}}, SHIFT(82), + [201] = {.entry = {.count = 1, .reusable = true}}, SHIFT(91), + [203] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_start_tag, 3, 0, 0), + [205] = {.entry = {.count = 1, .reusable = false}}, SHIFT(49), + [207] = {.entry = {.count = 1, .reusable = true}}, SHIFT(76), + [209] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_script_start_tag, 4, 0, 0), + [211] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_start_tag, 3, 0, 0), + [213] = {.entry = {.count = 1, .reusable = true}}, REDUCE(sym_style_start_tag, 4, 0, 0), + [215] = {.entry = {.count = 1, .reusable = true}}, SHIFT(87), + [217] = {.entry = {.count = 1, .reusable = true}}, SHIFT(78), + [219] = {.entry = {.count = 1, .reusable = false}}, SHIFT(63), + [221] = {.entry = {.count = 1, .reusable = true}}, SHIFT(86), + [223] = {.entry = {.count = 1, .reusable = true}}, SHIFT(75), + [225] = {.entry = {.count = 1, .reusable = true}}, SHIFT(64), + [227] = {.entry = {.count = 1, .reusable = true}}, SHIFT(50), + [229] = {.entry = {.count = 1, .reusable = true}}, SHIFT(81), + [231] = {.entry = {.count = 1, .reusable = true}}, SHIFT(17), + [233] = {.entry = {.count = 1, .reusable = true}}, SHIFT(30), + [235] = {.entry = {.count = 1, .reusable = true}}, SHIFT(79), + [237] = {.entry = {.count = 1, .reusable = true}}, SHIFT(9), + [239] = {.entry = {.count = 1, .reusable = true}}, ACCEPT_INPUT(), + [241] = {.entry = {.count = 1, .reusable = true}}, SHIFT(13), + [243] = {.entry = {.count = 1, .reusable = true}}, SHIFT(84), + [245] = {.entry = {.count = 1, .reusable = true}}, SHIFT(20), + [247] = {.entry = {.count = 1, .reusable = true}}, SHIFT(80), + [249] = {.entry = {.count = 1, .reusable = true}}, SHIFT(26), + [251] = {.entry = {.count = 1, .reusable = true}}, SHIFT(90), +}; + +enum ts_external_scanner_symbol_identifiers { + ts_external_token__start_tag_name = 0, + ts_external_token__script_start_tag_name = 1, + ts_external_token__style_start_tag_name = 2, + ts_external_token__end_tag_name = 3, + ts_external_token_erroneous_end_tag_name = 4, + ts_external_token_SLASH_GT = 5, + ts_external_token__implicit_end_tag = 6, + ts_external_token_raw_text = 7, + ts_external_token_comment = 8, +}; + +static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = { + [ts_external_token__start_tag_name] = sym__start_tag_name, + [ts_external_token__script_start_tag_name] = sym__script_start_tag_name, + [ts_external_token__style_start_tag_name] = sym__style_start_tag_name, + [ts_external_token__end_tag_name] = sym__end_tag_name, + [ts_external_token_erroneous_end_tag_name] = sym_erroneous_end_tag_name, + [ts_external_token_SLASH_GT] = anon_sym_SLASH_GT, + [ts_external_token__implicit_end_tag] = sym__implicit_end_tag, + [ts_external_token_raw_text] = sym_raw_text, + [ts_external_token_comment] = sym_comment, +}; + +static const bool ts_external_scanner_states[10][EXTERNAL_TOKEN_COUNT] = { + [1] = { + [ts_external_token__start_tag_name] = true, + [ts_external_token__script_start_tag_name] = true, + [ts_external_token__style_start_tag_name] = true, + [ts_external_token__end_tag_name] = true, + [ts_external_token_erroneous_end_tag_name] = true, + [ts_external_token_SLASH_GT] = true, + [ts_external_token__implicit_end_tag] = true, + [ts_external_token_raw_text] = true, + [ts_external_token_comment] = true, + }, + [2] = { + [ts_external_token_comment] = true, + }, + [3] = { + [ts_external_token__implicit_end_tag] = true, + [ts_external_token_comment] = true, + }, + [4] = { + [ts_external_token_SLASH_GT] = true, + [ts_external_token_comment] = true, + }, + [5] = { + [ts_external_token_raw_text] = true, + [ts_external_token_comment] = true, + }, + [6] = { + [ts_external_token__start_tag_name] = true, + [ts_external_token__script_start_tag_name] = true, + [ts_external_token__style_start_tag_name] = true, + [ts_external_token_comment] = true, + }, + [7] = { + [ts_external_token__end_tag_name] = true, + [ts_external_token_erroneous_end_tag_name] = true, + [ts_external_token_comment] = true, + }, + [8] = { + [ts_external_token_erroneous_end_tag_name] = true, + [ts_external_token_comment] = true, + }, + [9] = { + [ts_external_token__end_tag_name] = true, + [ts_external_token_comment] = true, + }, +}; + +#ifdef __cplusplus +extern "C" { +#endif +void *tree_sitter_html_external_scanner_create(void); +void tree_sitter_html_external_scanner_destroy(void *); +bool tree_sitter_html_external_scanner_scan(void *, TSLexer *, const bool *); +unsigned tree_sitter_html_external_scanner_serialize(void *, char *); +void tree_sitter_html_external_scanner_deserialize(void *, const char *, unsigned); + +#ifdef TREE_SITTER_HIDE_SYMBOLS +#define TS_PUBLIC +#elif defined(_WIN32) +#define TS_PUBLIC __declspec(dllexport) +#else +#define TS_PUBLIC __attribute__((visibility("default"))) +#endif + +TS_PUBLIC const TSLanguage *tree_sitter_html(void) { + static const TSLanguage language = { + .version = LANGUAGE_VERSION, + .symbol_count = SYMBOL_COUNT, + .alias_count = ALIAS_COUNT, + .token_count = TOKEN_COUNT, + .external_token_count = EXTERNAL_TOKEN_COUNT, + .state_count = STATE_COUNT, + .large_state_count = LARGE_STATE_COUNT, + .production_id_count = PRODUCTION_ID_COUNT, + .field_count = FIELD_COUNT, + .max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH, + .parse_table = &ts_parse_table[0][0], + .small_parse_table = ts_small_parse_table, + .small_parse_table_map = ts_small_parse_table_map, + .parse_actions = ts_parse_actions, + .symbol_names = ts_symbol_names, + .symbol_metadata = ts_symbol_metadata, + .public_symbol_map = ts_symbol_map, + .alias_map = ts_non_terminal_alias_map, + .alias_sequences = &ts_alias_sequences[0][0], + .lex_modes = ts_lex_modes, + .lex_fn = ts_lex, + .external_scanner = { + &ts_external_scanner_states[0][0], + ts_external_scanner_symbol_map, + tree_sitter_html_external_scanner_create, + tree_sitter_html_external_scanner_destroy, + tree_sitter_html_external_scanner_scan, + tree_sitter_html_external_scanner_serialize, + tree_sitter_html_external_scanner_deserialize, + }, + .primary_state_ids = ts_primary_state_ids, + }; + return &language; +} +#ifdef __cplusplus +} +#endif diff --git a/test-parsers/tree-sitter-html/src/scanner.c b/test-parsers/tree-sitter-html/src/scanner.c new file mode 100644 index 00000000..eecef9ad --- /dev/null +++ b/test-parsers/tree-sitter-html/src/scanner.c @@ -0,0 +1,362 @@ +#include "tag.h" +#include "tree_sitter/parser.h" + +#include + +enum TokenType { + START_TAG_NAME, + SCRIPT_START_TAG_NAME, + STYLE_START_TAG_NAME, + END_TAG_NAME, + ERRONEOUS_END_TAG_NAME, + SELF_CLOSING_TAG_DELIMITER, + IMPLICIT_END_TAG, + RAW_TEXT, + COMMENT, +}; + +typedef struct { + Array(Tag) tags; +} Scanner; + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } + +static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } + +static unsigned serialize(Scanner *scanner, char *buffer) { + uint16_t tag_count = scanner->tags.size > UINT16_MAX ? UINT16_MAX : scanner->tags.size; + uint16_t serialized_tag_count = 0; + + unsigned size = sizeof(tag_count); + memcpy(&buffer[size], &tag_count, sizeof(tag_count)); + size += sizeof(tag_count); + + for (; serialized_tag_count < tag_count; serialized_tag_count++) { + Tag tag = scanner->tags.contents[serialized_tag_count]; + if (tag.type == CUSTOM) { + unsigned name_length = tag.custom_tag_name.size; + if (name_length > UINT8_MAX) { + name_length = UINT8_MAX; + } + if (size + 2 + name_length >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { + break; + } + buffer[size++] = (char)tag.type; + buffer[size++] = (char)name_length; + strncpy(&buffer[size], tag.custom_tag_name.contents, name_length); + size += name_length; + } else { + if (size + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { + break; + } + buffer[size++] = (char)tag.type; + } + } + + memcpy(&buffer[0], &serialized_tag_count, sizeof(serialized_tag_count)); + return size; +} + +static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { + for (unsigned i = 0; i < scanner->tags.size; i++) { + tag_free(&scanner->tags.contents[i]); + } + array_clear(&scanner->tags); + + if (length > 0) { + unsigned size = 0; + uint16_t tag_count = 0; + uint16_t serialized_tag_count = 0; + + memcpy(&serialized_tag_count, &buffer[size], sizeof(serialized_tag_count)); + size += sizeof(serialized_tag_count); + + memcpy(&tag_count, &buffer[size], sizeof(tag_count)); + size += sizeof(tag_count); + + array_reserve(&scanner->tags, tag_count); + if (tag_count > 0) { + unsigned iter = 0; + for (iter = 0; iter < serialized_tag_count; iter++) { + Tag tag = tag_new(); + tag.type = (TagType)buffer[size++]; + if (tag.type == CUSTOM) { + uint16_t name_length = (uint8_t)buffer[size++]; + array_reserve(&tag.custom_tag_name, name_length); + tag.custom_tag_name.size = name_length; + memcpy(tag.custom_tag_name.contents, &buffer[size], name_length); + size += name_length; + } + array_push(&scanner->tags, tag); + } + // add zero tags if we didn't read enough, this is because the + // buffer had no more room but we held more tags. + for (; iter < tag_count; iter++) { + array_push(&scanner->tags, tag_new()); + } + } + } +} + +static String scan_tag_name(TSLexer *lexer) { + String tag_name = array_new(); + while (iswalnum(lexer->lookahead) || lexer->lookahead == '-' || lexer->lookahead == ':') { + array_push(&tag_name, towupper(lexer->lookahead)); + advance(lexer); + } + return tag_name; +} + +static bool scan_comment(TSLexer *lexer) { + if (lexer->lookahead != '-') { + return false; + } + advance(lexer); + if (lexer->lookahead != '-') { + return false; + } + advance(lexer); + + unsigned dashes = 0; + while (lexer->lookahead) { + switch (lexer->lookahead) { + case '-': + ++dashes; + break; + case '>': + if (dashes >= 2) { + lexer->result_symbol = COMMENT; + advance(lexer); + lexer->mark_end(lexer); + return true; + } + default: + dashes = 0; + } + advance(lexer); + } + return false; +} + +static bool scan_raw_text(Scanner *scanner, TSLexer *lexer) { + if (scanner->tags.size == 0) { + return false; + } + + lexer->mark_end(lexer); + + const char *end_delimiter = array_back(&scanner->tags)->type == SCRIPT ? "lookahead) { + if (towupper(lexer->lookahead) == end_delimiter[delimiter_index]) { + delimiter_index++; + if (delimiter_index == strlen(end_delimiter)) { + break; + } + advance(lexer); + } else { + delimiter_index = 0; + advance(lexer); + lexer->mark_end(lexer); + } + } + + lexer->result_symbol = RAW_TEXT; + return true; +} + +static void pop_tag(Scanner *scanner) { + Tag popped_tag = array_pop(&scanner->tags); + tag_free(&popped_tag); +} + +static bool scan_implicit_end_tag(Scanner *scanner, TSLexer *lexer) { + Tag *parent = scanner->tags.size == 0 ? NULL : array_back(&scanner->tags); + + bool is_closing_tag = false; + if (lexer->lookahead == '/') { + is_closing_tag = true; + advance(lexer); + } else { + if (parent && tag_is_void(parent)) { + pop_tag(scanner); + lexer->result_symbol = IMPLICIT_END_TAG; + return true; + } + } + + String tag_name = scan_tag_name(lexer); + if (tag_name.size == 0 && !lexer->eof(lexer)) { + array_delete(&tag_name); + return false; + } + + Tag next_tag = tag_for_name(tag_name); + + if (is_closing_tag) { + // The tag correctly closes the topmost element on the stack + if (scanner->tags.size > 0 && tag_eq(array_back(&scanner->tags), &next_tag)) { + tag_free(&next_tag); + return false; + } + + // Otherwise, dig deeper and queue implicit end tags (to be nice in + // the case of malformed HTML) + for (unsigned i = scanner->tags.size; i > 0; i--) { + if (scanner->tags.contents[i - 1].type == next_tag.type) { + pop_tag(scanner); + lexer->result_symbol = IMPLICIT_END_TAG; + tag_free(&next_tag); + return true; + } + } + } else if ( + parent && + ( + !tag_can_contain(parent, &next_tag) || + ((parent->type == HTML || parent->type == HEAD || parent->type == BODY) && lexer->eof(lexer)) + ) + ) { + pop_tag(scanner); + lexer->result_symbol = IMPLICIT_END_TAG; + tag_free(&next_tag); + return true; + } + + tag_free(&next_tag); + return false; +} + +static bool scan_start_tag_name(Scanner *scanner, TSLexer *lexer) { + String tag_name = scan_tag_name(lexer); + if (tag_name.size == 0) { + array_delete(&tag_name); + return false; + } + + Tag tag = tag_for_name(tag_name); + array_push(&scanner->tags, tag); + switch (tag.type) { + case SCRIPT: + lexer->result_symbol = SCRIPT_START_TAG_NAME; + break; + case STYLE: + lexer->result_symbol = STYLE_START_TAG_NAME; + break; + default: + lexer->result_symbol = START_TAG_NAME; + break; + } + return true; +} + +static bool scan_end_tag_name(Scanner *scanner, TSLexer *lexer) { + String tag_name = scan_tag_name(lexer); + + if (tag_name.size == 0) { + array_delete(&tag_name); + return false; + } + + Tag tag = tag_for_name(tag_name); + if (scanner->tags.size > 0 && tag_eq(array_back(&scanner->tags), &tag)) { + pop_tag(scanner); + lexer->result_symbol = END_TAG_NAME; + } else { + lexer->result_symbol = ERRONEOUS_END_TAG_NAME; + } + + tag_free(&tag); + return true; +} + +static bool scan_self_closing_tag_delimiter(Scanner *scanner, TSLexer *lexer) { + advance(lexer); + if (lexer->lookahead == '>') { + advance(lexer); + if (scanner->tags.size > 0) { + pop_tag(scanner); + lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER; + } + return true; + } + return false; +} + +static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { + if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] && !valid_symbols[END_TAG_NAME]) { + return scan_raw_text(scanner, lexer); + } + + while (iswspace(lexer->lookahead)) { + skip(lexer); + } + + switch (lexer->lookahead) { + case '<': + lexer->mark_end(lexer); + advance(lexer); + + if (lexer->lookahead == '!') { + advance(lexer); + return scan_comment(lexer); + } + + if (valid_symbols[IMPLICIT_END_TAG]) { + return scan_implicit_end_tag(scanner, lexer); + } + break; + + case '\0': + if (valid_symbols[IMPLICIT_END_TAG]) { + return scan_implicit_end_tag(scanner, lexer); + } + break; + + case '/': + if (valid_symbols[SELF_CLOSING_TAG_DELIMITER]) { + return scan_self_closing_tag_delimiter(scanner, lexer); + } + break; + + default: + if ((valid_symbols[START_TAG_NAME] || valid_symbols[END_TAG_NAME]) && !valid_symbols[RAW_TEXT]) { + return valid_symbols[START_TAG_NAME] ? scan_start_tag_name(scanner, lexer) + : scan_end_tag_name(scanner, lexer); + } + } + + return false; +} + +void *tree_sitter_html_external_scanner_create() { + Scanner *scanner = (Scanner *)ts_calloc(1, sizeof(Scanner)); + return scanner; +} + +bool tree_sitter_html_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) { + Scanner *scanner = (Scanner *)payload; + return scan(scanner, lexer, valid_symbols); +} + +unsigned tree_sitter_html_external_scanner_serialize(void *payload, char *buffer) { + Scanner *scanner = (Scanner *)payload; + return serialize(scanner, buffer); +} + +void tree_sitter_html_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) { + Scanner *scanner = (Scanner *)payload; + deserialize(scanner, buffer, length); +} + +void tree_sitter_html_external_scanner_destroy(void *payload) { + Scanner *scanner = (Scanner *)payload; + for (unsigned i = 0; i < scanner->tags.size; i++) { + tag_free(&scanner->tags.contents[i]); + } + array_delete(&scanner->tags); + ts_free(scanner); +} diff --git a/test-parsers/tree-sitter-html/src/tag.h b/test-parsers/tree-sitter-html/src/tag.h new file mode 100644 index 00000000..b8dd7ff7 --- /dev/null +++ b/test-parsers/tree-sitter-html/src/tag.h @@ -0,0 +1,385 @@ +#include "tree_sitter/array.h" + +#include + +typedef enum { + AREA, + BASE, + BASEFONT, + BGSOUND, + BR, + COL, + COMMAND, + EMBED, + FRAME, + HR, + IMAGE, + IMG, + INPUT, + ISINDEX, + KEYGEN, + LINK, + MENUITEM, + META, + NEXTID, + PARAM, + SOURCE, + TRACK, + WBR, + END_OF_VOID_TAGS, + + A, + ABBR, + ADDRESS, + ARTICLE, + ASIDE, + AUDIO, + B, + BDI, + BDO, + BLOCKQUOTE, + BODY, + BUTTON, + CANVAS, + CAPTION, + CITE, + CODE, + COLGROUP, + DATA, + DATALIST, + DD, + DEL, + DETAILS, + DFN, + DIALOG, + DIV, + DL, + DT, + EM, + FIELDSET, + FIGCAPTION, + FIGURE, + FOOTER, + FORM, + H1, + H2, + H3, + H4, + H5, + H6, + HEAD, + HEADER, + HGROUP, + HTML, + I, + IFRAME, + INS, + KBD, + LABEL, + LEGEND, + LI, + MAIN, + MAP, + MARK, + MATH, + MENU, + METER, + NAV, + NOSCRIPT, + OBJECT, + OL, + OPTGROUP, + OPTION, + OUTPUT, + P, + PICTURE, + PRE, + PROGRESS, + Q, + RB, + RP, + RT, + RTC, + RUBY, + S, + SAMP, + SCRIPT, + SECTION, + SELECT, + SLOT, + SMALL, + SPAN, + STRONG, + STYLE, + SUB, + SUMMARY, + SUP, + SVG, + TABLE, + TBODY, + TD, + TEMPLATE, + TEXTAREA, + TFOOT, + TH, + THEAD, + TIME, + TITLE, + TR, + U, + UL, + VAR, + VIDEO, + + CUSTOM, + + END_, +} TagType; + +typedef Array(char) String; + +typedef struct { + char tag_name[16]; + TagType tag_type; +} TagMapEntry; + +typedef struct { + TagType type; + String custom_tag_name; +} Tag; + +static const TagMapEntry TAG_TYPES_BY_TAG_NAME[126] = { + {"AREA", AREA }, + {"BASE", BASE }, + {"BASEFONT", BASEFONT }, + {"BGSOUND", BGSOUND }, + {"BR", BR }, + {"COL", COL }, + {"COMMAND", COMMAND }, + {"EMBED", EMBED }, + {"FRAME", FRAME }, + {"HR", HR }, + {"IMAGE", IMAGE }, + {"IMG", IMG }, + {"INPUT", INPUT }, + {"ISINDEX", ISINDEX }, + {"KEYGEN", KEYGEN }, + {"LINK", LINK }, + {"MENUITEM", MENUITEM }, + {"META", META }, + {"NEXTID", NEXTID }, + {"PARAM", PARAM }, + {"SOURCE", SOURCE }, + {"TRACK", TRACK }, + {"WBR", WBR }, + {"A", A }, + {"ABBR", ABBR }, + {"ADDRESS", ADDRESS }, + {"ARTICLE", ARTICLE }, + {"ASIDE", ASIDE }, + {"AUDIO", AUDIO }, + {"B", B }, + {"BDI", BDI }, + {"BDO", BDO }, + {"BLOCKQUOTE", BLOCKQUOTE}, + {"BODY", BODY }, + {"BUTTON", BUTTON }, + {"CANVAS", CANVAS }, + {"CAPTION", CAPTION }, + {"CITE", CITE }, + {"CODE", CODE }, + {"COLGROUP", COLGROUP }, + {"DATA", DATA }, + {"DATALIST", DATALIST }, + {"DD", DD }, + {"DEL", DEL }, + {"DETAILS", DETAILS }, + {"DFN", DFN }, + {"DIALOG", DIALOG }, + {"DIV", DIV }, + {"DL", DL }, + {"DT", DT }, + {"EM", EM }, + {"FIELDSET", FIELDSET }, + {"FIGCAPTION", FIGCAPTION}, + {"FIGURE", FIGURE }, + {"FOOTER", FOOTER }, + {"FORM", FORM }, + {"H1", H1 }, + {"H2", H2 }, + {"H3", H3 }, + {"H4", H4 }, + {"H5", H5 }, + {"H6", H6 }, + {"HEAD", HEAD }, + {"HEADER", HEADER }, + {"HGROUP", HGROUP }, + {"HTML", HTML }, + {"I", I }, + {"IFRAME", IFRAME }, + {"INS", INS }, + {"KBD", KBD }, + {"LABEL", LABEL }, + {"LEGEND", LEGEND }, + {"LI", LI }, + {"MAIN", MAIN }, + {"MAP", MAP }, + {"MARK", MARK }, + {"MATH", MATH }, + {"MENU", MENU }, + {"METER", METER }, + {"NAV", NAV }, + {"NOSCRIPT", NOSCRIPT }, + {"OBJECT", OBJECT }, + {"OL", OL }, + {"OPTGROUP", OPTGROUP }, + {"OPTION", OPTION }, + {"OUTPUT", OUTPUT }, + {"P", P }, + {"PICTURE", PICTURE }, + {"PRE", PRE }, + {"PROGRESS", PROGRESS }, + {"Q", Q }, + {"RB", RB }, + {"RP", RP }, + {"RT", RT }, + {"RTC", RTC }, + {"RUBY", RUBY }, + {"S", S }, + {"SAMP", SAMP }, + {"SCRIPT", SCRIPT }, + {"SECTION", SECTION }, + {"SELECT", SELECT }, + {"SLOT", SLOT }, + {"SMALL", SMALL }, + {"SPAN", SPAN }, + {"STRONG", STRONG }, + {"STYLE", STYLE }, + {"SUB", SUB }, + {"SUMMARY", SUMMARY }, + {"SUP", SUP }, + {"SVG", SVG }, + {"TABLE", TABLE }, + {"TBODY", TBODY }, + {"TD", TD }, + {"TEMPLATE", TEMPLATE }, + {"TEXTAREA", TEXTAREA }, + {"TFOOT", TFOOT }, + {"TH", TH }, + {"THEAD", THEAD }, + {"TIME", TIME }, + {"TITLE", TITLE }, + {"TR", TR }, + {"U", U }, + {"UL", UL }, + {"VAR", VAR }, + {"VIDEO", VIDEO }, + {"CUSTOM", CUSTOM }, +}; + +static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = { + ADDRESS, ARTICLE, ASIDE, BLOCKQUOTE, DETAILS, DIV, DL, + FIELDSET, FIGCAPTION, FIGURE, FOOTER, FORM, H1, H2, + H3, H4, H5, H6, HEADER, HR, MAIN, + NAV, OL, P, PRE, SECTION, +}; + +static TagType tag_type_for_name(const String *tag_name) { + for (int i = 0; i < 126; i++) { + const TagMapEntry *entry = &TAG_TYPES_BY_TAG_NAME[i]; + if ( + strlen(entry->tag_name) == tag_name->size && + memcmp(tag_name->contents, entry->tag_name, tag_name->size) == 0 + ) { + return entry->tag_type; + } + } + return CUSTOM; +} + +static inline Tag tag_new() { + Tag tag; + tag.type = END_; + tag.custom_tag_name = (String) array_new(); + return tag; +} + +static inline Tag tag_for_name(String name) { + Tag tag = tag_new(); + tag.type = tag_type_for_name(&name); + if (tag.type == CUSTOM) { + tag.custom_tag_name = name; + } else { + array_delete(&name); + } + return tag; +} + +static inline void tag_free(Tag *tag) { + if (tag->type == CUSTOM) { + array_delete(&tag->custom_tag_name); + } +} + +static inline bool tag_is_void(const Tag *self) { + return self->type < END_OF_VOID_TAGS; +} + +static inline bool tag_eq(const Tag *self, const Tag *other) { + if (self->type != other->type) return false; + if (self->type == CUSTOM) { + if (self->custom_tag_name.size != other->custom_tag_name.size) { + return false; + } + if (memcmp( + self->custom_tag_name.contents, + other->custom_tag_name.contents, + self->custom_tag_name.size + ) != 0) { + return false; + } + } + return true; +} + +static bool tag_can_contain(Tag *self, const Tag *other) { + TagType child = other->type; + + switch (self->type) { + case LI: + return child != LI; + + case DT: + case DD: + return child != DT && child != DD; + + case P: + for (int i = 0; i < 26; i++) { + if (child == TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[i]) { + return false; + } + } + return true; + + case COLGROUP: + return child == COL; + + case RB: + case RT: + case RP: + return child != RB && child != RT && child != RP; + + case OPTGROUP: + return child != OPTGROUP; + + case TR: + return child != TR; + + case TD: + case TH: + return child != TD && child != TH && child != TR; + + default: + return true; + } +} diff --git a/test-parsers/tree-sitter-html/src/tree_sitter/alloc.h b/test-parsers/tree-sitter-html/src/tree_sitter/alloc.h new file mode 100644 index 00000000..1f4466d7 --- /dev/null +++ b/test-parsers/tree-sitter-html/src/tree_sitter/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t); +extern void *(*ts_current_calloc)(size_t, size_t); +extern void *(*ts_current_realloc)(void *, size_t); +extern void (*ts_current_free)(void *); + +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc malloc +#endif +#ifndef ts_calloc +#define ts_calloc calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/test-parsers/tree-sitter-html/src/tree_sitter/array.h b/test-parsers/tree-sitter-html/src/tree_sitter/array.h new file mode 100644 index 00000000..15a3b233 --- /dev/null +++ b/test-parsers/tree-sitter-html/src/tree_sitter/array.h @@ -0,0 +1,290 @@ +#ifndef TREE_SITTER_ARRAY_H_ +#define TREE_SITTER_ARRAY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./alloc.h" + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#define Array(T) \ + struct { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } + +/// Initialize an array. +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ + { NULL, 0, 0 } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ + do { \ + if ((count) == 0) break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), (self)->size, \ + 0, count, contents \ + ) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), _index, \ + old_count, new_count, new_contents \ + ) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) \ + _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +// Private + +typedef Array(void) Array; + +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) { + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } +} + +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, + uint32_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * element_size, contents + (index + 1) * element_size, + (self->size - index - 1) * element_size); + self->size--; +} + +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { + if (new_capacity > self->capacity) { + if (self->contents) { + self->contents = ts_realloc(self->contents, new_capacity * element_size); + } else { + self->contents = ts_malloc(new_capacity * element_size); + } + self->capacity = new_capacity; + } +} + +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { + _array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); +} + +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) { + Array swap = *other; + *other = *self; + *self = swap; +} + +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { + uint32_t new_size = self->size + count; + if (new_size > self->capacity) { + uint32_t new_capacity = self->capacity * 2; + if (new_capacity < 8) new_capacity = 8; + if (new_capacity < new_size) new_capacity = new_size; + _array__reserve(self, element_size, new_capacity); + } +} + +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) { + uint32_t new_size = self->size + new_count - old_count; + uint32_t old_end = index + old_count; + uint32_t new_end = index + new_count; + assert(old_end <= self->size); + + _array__reserve(self, element_size, new_size); + + char *contents = (char *)self->contents; + if (self->size > old_end) { + memmove( + contents + new_end * element_size, + contents + old_end * element_size, + (self->size - old_end) * element_size + ); + } + if (new_count > 0) { + if (elements) { + memcpy( + (contents + index * element_size), + elements, + new_count * element_size + ); + } else { + memset( + (contents + index * element_size), + 0, + new_count * element_size + ); + } + } + self->size += new_count - old_count; +} + +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ + do { \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(_index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(_index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) *(_exists) = true; \ + else if (comparison < 0) *(_index) += 1; \ + } while (0) + +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#ifdef _MSC_VER +#pragma warning(default : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ARRAY_H_ diff --git a/test-parsers/tree-sitter-html/src/tree_sitter/parser.h b/test-parsers/tree-sitter-html/src/tree_sitter/parser.h new file mode 100644 index 00000000..799f599b --- /dev/null +++ b/test-parsers/tree-sitter-html/src/tree_sitter/parser.h @@ -0,0 +1,266 @@ +#ifndef TREE_SITTER_PARSER_H_ +#define TREE_SITTER_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#define ts_builtin_sym_error ((TSSymbol)-1) +#define ts_builtin_sym_end 0 +#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 + +#ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId; +typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; +typedef struct TSLanguage TSLanguage; +#endif + +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapEntry; + +typedef struct { + uint16_t index; + uint16_t length; +} TSFieldMapSlice; + +typedef struct { + bool visible; + bool named; + bool supertype; +} TSSymbolMetadata; + +typedef struct TSLexer TSLexer; + +struct TSLexer { + int32_t lookahead; + TSSymbol result_symbol; + void (*advance)(TSLexer *, bool); + void (*mark_end)(TSLexer *); + uint32_t (*get_column)(TSLexer *); + bool (*is_at_included_range_start)(const TSLexer *); + bool (*eof)(const TSLexer *); + void (*log)(const TSLexer *, const char *, ...); +}; + +typedef enum { + TSParseActionTypeShift, + TSParseActionTypeReduce, + TSParseActionTypeAccept, + TSParseActionTypeRecover, +} TSParseActionType; + +typedef union { + struct { + uint8_t type; + TSStateId state; + bool extra; + bool repetition; + } shift; + struct { + uint8_t type; + uint8_t child_count; + TSSymbol symbol; + int16_t dynamic_precedence; + uint16_t production_id; + } reduce; + uint8_t type; +} TSParseAction; + +typedef struct { + uint16_t lex_state; + uint16_t external_lex_state; +} TSLexMode; + +typedef union { + TSParseAction action; + struct { + uint8_t count; + bool reusable; + } entry; +} TSParseActionEntry; + +typedef struct { + int32_t start; + int32_t end; +} TSCharacterRange; + +struct TSLanguage { + uint32_t version; + uint32_t symbol_count; + uint32_t alias_count; + uint32_t token_count; + uint32_t external_token_count; + uint32_t state_count; + uint32_t large_state_count; + uint32_t production_id_count; + uint32_t field_count; + uint16_t max_alias_sequence_length; + const uint16_t *parse_table; + const uint16_t *small_parse_table; + const uint32_t *small_parse_table_map; + const TSParseActionEntry *parse_actions; + const char * const *symbol_names; + const char * const *field_names; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; + const TSSymbolMetadata *symbol_metadata; + const TSSymbol *public_symbol_map; + const uint16_t *alias_map; + const TSSymbol *alias_sequences; + const TSLexMode *lex_modes; + bool (*lex_fn)(TSLexer *, TSStateId); + bool (*keyword_lex_fn)(TSLexer *, TSStateId); + TSSymbol keyword_capture_token; + struct { + const bool *states; + const TSSymbol *symbol_map; + void *(*create)(void); + void (*destroy)(void *); + bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist); + unsigned (*serialize)(void *, char *); + void (*deserialize)(void *, const char *, unsigned); + } external_scanner; + const TSStateId *primary_state_ids; +}; + +static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { + uint32_t index = 0; + uint32_t size = len - index; + while (size > 1) { + uint32_t half_size = size / 2; + uint32_t mid_index = index + half_size; + TSCharacterRange *range = &ranges[mid_index]; + if (lookahead >= range->start && lookahead <= range->end) { + return true; + } else if (lookahead > range->end) { + index = mid_index; + } + size -= half_size; + } + TSCharacterRange *range = &ranges[index]; + return (lookahead >= range->start && lookahead <= range->end); +} + +/* + * Lexer Macros + */ + +#ifdef _MSC_VER +#define UNUSED __pragma(warning(suppress : 4101)) +#else +#define UNUSED __attribute__((unused)) +#endif + +#define START_LEXER() \ + bool result = false; \ + bool skip = false; \ + UNUSED \ + bool eof = false; \ + int32_t lookahead; \ + goto start; \ + next_state: \ + lexer->advance(lexer, skip); \ + start: \ + skip = false; \ + lookahead = lexer->lookahead; + +#define ADVANCE(state_value) \ + { \ + state = state_value; \ + goto next_state; \ + } + +#define ADVANCE_MAP(...) \ + { \ + static const uint16_t map[] = { __VA_ARGS__ }; \ + for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \ + if (map[i] == lookahead) { \ + state = map[i + 1]; \ + goto next_state; \ + } \ + } \ + } + +#define SKIP(state_value) \ + { \ + skip = true; \ + state = state_value; \ + goto next_state; \ + } + +#define ACCEPT_TOKEN(symbol_value) \ + result = true; \ + lexer->result_symbol = symbol_value; \ + lexer->mark_end(lexer); + +#define END_STATE() return result; + +/* + * Parse Table Macros + */ + +#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT) + +#define STATE(id) id + +#define ACTIONS(id) id + +#define SHIFT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value) \ + } \ + }} + +#define SHIFT_REPEAT(state_value) \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .state = (state_value), \ + .repetition = true \ + } \ + }} + +#define SHIFT_EXTRA() \ + {{ \ + .shift = { \ + .type = TSParseActionTypeShift, \ + .extra = true \ + } \ + }} + +#define REDUCE(symbol_name, children, precedence, prod_id) \ + {{ \ + .reduce = { \ + .type = TSParseActionTypeReduce, \ + .symbol = symbol_name, \ + .child_count = children, \ + .dynamic_precedence = precedence, \ + .production_id = prod_id \ + }, \ + }} + +#define RECOVER() \ + {{ \ + .type = TSParseActionTypeRecover \ + }} + +#define ACCEPT_INPUT() \ + {{ \ + .type = TSParseActionTypeAccept \ + }} + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_PARSER_H_