ES HTML Parser is a HTML parser that generates an abstract syntax tree similar to the ESTree specification.
This project began as a fork of hyntax and is developed to follow ESTree-like ast specification.
See online demo.
npm install es-html-parser
import { parse } from "es-html-parser";
const input = `
<button type="button"> press here </button>
const { ast, tokens } = parse(input);
parse(html: string, options?: Options): ParseResult;
: HTML string to parse.options (optional)
: The adapter option for changing tokens information.
: Result of parsing
interface ParseResult {
ast: DocumentNode;
tokens: AnyToken[];
: The root node of the ast.tokens
: An array of resulting tokens.
The AnyNode
is an union type of all nodes.
type AnyNode =
| DocumentNode
| TextNode
| TagNode
| OpenTagStartNode
| OpenTagEndNode
| CloseTagNode
| AttributeNode
| AttributeKeyNode
| AttributeValueNode
| AttributeValueWrapperStartNode
| AttributeValueWrapperEndNode
| ScriptTagNode
| OpenScriptTagStartNode
| CloseScriptTagNode
| OpenScriptTagEndNode
| ScriptTagContentNode
| StyleTagNode
| OpenStyleTagStartNode
| OpenStyleTagEndNode
| StyleTagContentNode
| CloseStyleTagNode
| CommentNode
| CommentOpenNode
| CommentCloseNode
| CommentContentNode
| DoctypeNode
| DoctypeOpenNode
| DoctypeCloseNode
| DoctypeAttributeNode
| DoctypeAttributeValueNode
| DoctypeAttributeWrapperStartNode
| DoctypeAttributeWrapperEndNode;
The AnyToken
is an union type all tokens.
type AnyToken =
| Token<TokenTypes.Text>
| Token<TokenTypes.OpenTagStart>
| Token<TokenTypes.OpenTagEnd>
| Token<TokenTypes.CloseTag>
| Token<TokenTypes.AttributeKey>
| Token<TokenTypes.AttributeAssignment>
| Token<TokenTypes.AttributeValueWrapperStart>
| Token<TokenTypes.AttributeValue>
| Token<TokenTypes.AttributeValueWrapperEnd>
| Token<TokenTypes.DoctypeOpen>
| Token<TokenTypes.DoctypeAttributeValue>
| Token<TokenTypes.DoctypeAttributeWrapperStart>
| Token<TokenTypes.DoctypeAttributeWrapperEnd>
| Token<TokenTypes.DoctypeClose>
| Token<TokenTypes.CommentOpen>
| Token<TokenTypes.CommentContent>
| Token<TokenTypes.CommentClose>
| Token<TokenTypes.OpenScriptTagStart>
| Token<TokenTypes.OpenScriptTagEnd>
| Token<TokenTypes.ScriptTagContent>
| Token<TokenTypes.CloseScriptTag>
| Token<TokenTypes.OpenStyleTagStart>
| Token<TokenTypes.OpenStyleTagEnd>
| Token<TokenTypes.StyleTagContent>
| Token<TokenTypes.CloseStyleTag>;
enum TokenTypes {
Text = "Text",
OpenTagStart = "OpenTagStart",
OpenTagEnd = "OpenTagEnd",
CloseTag = "CloseTag",
AttributeKey = "AttributeKey",
AttributeAssignment = "AttributeAssignment",
AttributeValueWrapperStart = "AttributeValueWrapperStart",
AttributeValue = "AttributeValue",
AttributeValueWrapperEnd = "AttributeValueWrapperEnd",
DoctypeOpen = "DoctypeOpen",
DoctypeAttributeValue = "DoctypeAttributeValue",
DoctypeAttributeWrapperStart = "DoctypeAttributeWrapperStart",
DoctypeAttributeWrapperEnd = "DoctypeAttributeWrapperEnd",
DoctypeClose = "DoctypeClose",
CommentOpen = "CommentOpen",
CommentContent = "CommentContent",
CommentClose = "CommentClose",
OpenScriptTagStart = "OpenScriptTagStart",
OpenScriptTagEnd = "OpenScriptTagEnd",
ScriptTagContent = "ScriptTagContent",
CloseScriptTag = "CloseScriptTag",
OpenStyleTagStart = "OpenStyleTagStart",
OpenStyleTagEnd = "OpenStyleTagEnd",
StyleTagContent = "StyleTagContent",
CloseStyleTag = "CloseStyleTag",
enum NodeTypes {
Document = "Document",
Tag = "Tag",
Text = "Text",
Doctype = "Doctype",
Comment = "Comment",
CommentOpen = "CommentOpen",
CommentClose = "CommentClose",
CommentContent = "CommentContent",
Attribute = "Attribute",
AttributeKey = "AttributeKey",
AttributeValue = "AttributeValue",
AttributeValueWrapperStart = "AttributeValueWrapperStart",
AttributeValueWrapperEnd = "AttributeValueWrapperEnd",
CloseTag = "CloseTag",
OpenTagEnd = "OpenTagEnd",
OpenTagStart = "OpenTagStart",
DoctypeOpen = "DoctypeOpen",
DoctypeAttribute = "DoctypeAttribute",
DoctypeClose = "DoctypeClose",
ScriptTag = "ScriptTag",
OpenScriptTagStart = "OpenScriptTagStart",
OpenScriptTagEnd = "OpenScriptTagEnd",
ScriptTagContent = "ScriptTagContent",
StyleTag = "StyleTag",
OpenStyleTagStart = "OpenStyleTagStart",
OpenStyleTagEnd = "OpenStyleTagEnd",
StyleTagContent = "StyleTagContent",
CloseStyleTag = "CloseStyleTag",
CloseScriptTag = "CloseScriptTag",
DoctypeAttributeValue = "DoctypeAttributeValue",
DoctypeAttributeWrapperStart = "DoctypeAttributeWrapperStart",
DoctypeAttributeWrapperEnd = "DoctypeAttributeWrapperEnd",
Every AST node and token implements the BaseNode
interface BaseNode {
type: string;
loc: SourceLocation;
range: [number, number];
The type
field is representing the AST type. Its value is one of the NodeTypes
or TokenTypes
The loc
and range
fields represent the source location of the node.
interface SourceLocation {
start: Position;
end: Position;
The start
field represents the start location of the node.
The end
field represents the end location of the node.
interface Position {
line: number; // >= 1
column: number; // >= 0
The line
field is a number representing the line number where the node positioned. (1-based index).
The column
field is a number representing the offset in the line. (0-based index).
All tokens implement the Token
interface Token<T extends TokenTypes> extends BaseNode {
type: T;
value: string;
represents a whole parsed document. It's a root node of the AST.
interface DocumentNode extends BaseNode {
type: "Document";
children: Array<TextNode | TagNode | ScriptNode | StyleNode | CommentNode>;
represents any plain text in HTML.
interface TextNode extends BaseNode {
type: "Text";
value: string;
represents all kinds of tag nodes in HTML except for doctype, script, style, and comment. (e.g. <div></div>
, <span></span>
interface TagNode extends BaseNode {
type: "Tag";
selfClosing: boolean;
name: string;
openStart: OpenTagStartNode;
openEnd: OpenTagEndNode;
close?: CloseTagNode;
children: Array<TextNode | TagNode | ScriptNode | StyleNode | CommentNode>;
attributes: Array<AttributeNode>;
represents the opening part of the Start tags. (e.g. <div
interface OpenTagStartNode extends BaseNode {
type: "OpenTagStart";
value: string;
represents the closing part of the Start tags. (e.g. >
, />
interface OpenTagEndNode extends BaseNode {
type: "OpenTagEnd";
value: string;
represents the End tags. (e.g. </div>
interface CloseTagNode extends BaseNode {
type: "CloseTag";
value: string;
represents an attribute. (e.g. id="foo"
interface AttributeNode extends BaseNode {
type: "Attribute";
key: AttributeKeyNode;
value?: AttributeValueNode;
startWrapper?: AttributeValueWrapperStartNode;
endWrapper?: AttributeValueWrapperEndNode;
represents a key part of an attribute. (e.g. id
interface AttributeKeyNode extends BaseNode {
type: "AttributeKey";
value: string;
represents the left side character that wraps the value of the attribute. (e.g. "
, '
interface AttributeValueWrapperStartNode extends BaseNode {
type: "AttributeValueWrapperStart";
value: string;
represents the right side character that wraps the value of the attribute. (e.g. "
, '
interface AttributeValueWrapperEndNode extends BaseNode {
type: "AttributeValueWrapperEnd";
value: string;
represents the value part of the attribute. It does not include wrapper characters. (e.g. foo
interface AttributeValueNode extends BaseNode {
type: "AttributeValue";
value: string;
The ScriptTagNode
represents a script tags in the HTML. (e.g. <script> console.log('hello'); </script>
interface ScriptTagNode extends BaseNode {
type: "ScriptTag";
attributes: Array<AttributeNode>;
openStart: OpenScriptTagStartNode;
openEnd: OpenScriptTagEndNode;
close: CloseScriptTagNode;
value?: ScriptTagContentNode;
represents an opening part of a start script tag. (e.g. <script
interface OpenScriptTagStartNode extends BaseNode {
type: "OpenScriptTagStart";
value: string;
represents a closing part of a start script tag. (e.g. >
interface OpenScriptTagEndNode extends BaseNode {
type: "OpenScriptTagEnd";
value: string;
represents a close script tag. (e.g. </script>
interface CloseScriptTagNode extends BaseNode {
type: "CloseScriptTag";
value: string;
represents a script content in script tag. (e.g. console.log('hello');
interface ScriptTagContentNode extends BaseNode {
type: "ScriptTagContent";
value: string;
represents style tags. (e.g. <style> .foo {} </style>
interface StyleTagNode extends BaseNode {
type: "StyleTag";
attributes: Array<AttributeNode>;
openStart: OpenStyleTagStartNode;
openEnd: OpenStyleTagEndNode;
close: CloseStyleTagNode;
value?: StyleTagContentNode;
represents an opening part of a start style tag. (e.g. <style
interface OpenStyleTagStartNode extends BaseNode {
type: "OpenStyleTagStart";
value: string;
represents a closing part of a start style tag. (e.g. >
interface OpenStyleTagEndNode extends BaseNode {
type: "OpenStyleTagEnd";
value: string;
represents a close style tag. (e.g. </style>
interface CloseStyleTagNode extends BaseNode {
type: "CloseStyleTag";
value: string;
represents a style content in style tag.
interface StyleTagContentNode extends BaseNode {
type: "StyleTagContent";
value: string;
represents comment in HTML. (e.g. <!-- content -->
interface CommentNode extends BaseNode {
type: "Comment";
open: CommentOpenNode;
close: CommentCloseNode;
value: CommentContentNode;
represents comment start character sequence. (e.g. <!--
interface CommentOpenNode extends BaseNode {
type: "CommentOpen";
value: string;
represents comment end character sequence. (e.g. -->
interface CommentCloseNode extends BaseNode {
type: "CommentClose";
value: string;
The CommentContentNode
represents text in the comment.
interface CommentContentNode extends BaseNode {
type: "CommentContent";
value: string;
represents the DOCTYPE in html.
interface DoctypeNode extends BaseNode {
type: "Doctype";
attributes: Array<DoctypeAttributeNode>;
open: DoctypeOpenNode;
close: DoctypeCloseNode;
represents character sequence of doctype start . (<!DOCTYPE
interface DoctypeOpenNode extends BaseNode {
type: "DoctypeOpen";
value: string;
represents the doctype end character sequence (e.g. >
interface DoctypeCloseNode extends BaseNode {
type: "DoctypeClose";
value: string;
represents an attribute of doctype node. (e.g. html
, "-//W3C//DTD HTML 4.01 Transitional//EN"
interface DoctypeAttributeNode extends BaseNode {
type: "DoctypeAttribute";
key: DoctypeAttributeKey;
represents a value of doctype node's attribute. (e.g. html
, -//W3C//DTD HTML 4.01 Transitional//EN
. It does not include wrapper characters ('
, "
interface DoctypeAttributeValueNode extends BaseNode {
type: "DoctypeAttributeValue";
value: string;
represents a left side character that wraps the value of the attribute. (e.g. "
, '
interface DoctypeAttributeWrapperStartNode extends BaseNode {
type: "DoctypeAttributeWrapperStart";
value: string;
represents a right side character that wraps the value of the attribute. (e.g. "
, '
interface DoctypeAttributeWrapperEndNode extends BaseNode {
type: "DoctypeAttributeWrapperEnd";
value: string;