From eb76dfaab455b6ae16e1c1a025b21925d1a129b0 Mon Sep 17 00:00:00 2001 From: Jason Evans Date: Thu, 15 Aug 2024 19:20:29 -0700 Subject: [PATCH] Continue work on LR-based parser (compiles, but goes into infinite loop) --- bootstrap/bin/hocc/ParseLR.hmh | 485 +++++++++++++++++++++++++++------ 1 file changed, 397 insertions(+), 88 deletions(-) diff --git a/bootstrap/bin/hocc/ParseLR.hmh b/bootstrap/bin/hocc/ParseLR.hmh index 9e530346..52e828e1 100644 --- a/bootstrap/bin/hocc/ParseLR.hmh +++ b/bootstrap/bin/hocc/ParseLR.hmh @@ -1,7 +1,128 @@ open Basis open! Basis.Rudiments +module Error = struct + module T = struct + type t = { + source: Hmc.Source.Slice.t; + msg: string; + } + + let cmp t0 t1 = + Hmc.Source.Slice.cmp t0.source t1.source + + let pp {source; msg} formatter = + formatter + |> Fmt.fmt "{source=" |> Hmc.Source.Slice.pp source + |> Fmt.fmt "; msg=" |> String.pp msg + |> Fmt.fmt "}" + + let fmt ?(alt=false) ({source; msg} as t) formatter = + match alt with + | false -> pp t formatter + | true -> begin + formatter + |> Fmt.fmt "hocc: At " + |> Hmc.Source.Slice.pp source + |> Fmt.fmt ": " + |> Fmt.fmt msg + |> Fmt.fmt "\n" + end + end + include T + include Cmpable.Make(T) + + let init_token token_ msg = + {source=Scan.Token.source token_; msg} + + let init_mal mal = + let open Hmc.Scan.AbstractToken.Rendition.Malformation in + {source=source mal; msg=description mal} + + let init_scanner scanner msg = + let cursor = Scan.cursor scanner in + let source = Hmc.Source.Slice.of_cursors ~base:cursor ~past:cursor in + {source; msg} + end + module X = struct (* XXX Workaround for qualified type syntax limitation. *) +type token_hocc = + | HOCC of {token_: Scan.Token.t} +type token_nonterm = + | NONTERM of {token_: Scan.Token.t} +type token_epsilon = + | EPSILON of {token_: Scan.Token.t} +type token_start = + | START of {token_: Scan.Token.t} +type token_token = + | TOKEN of {token_: Scan.Token.t} +type token_neutral = + | NEUTRAL of {token_: Scan.Token.t} +type token_left = + | LEFT of {token_: Scan.Token.t} +type token_right = + | RIGHT of {token_: Scan.Token.t} +type token_prec = + | PREC of {token_: Scan.Token.t} +type token_uident = + | UIDENT of {token_: Scan.Token.t} +type token_cident = + | CIDENT of {token_: Scan.Token.t} +type token_uscore = + | USCORE of {token_: Scan.Token.t} +(* XXX Should be normal string. *) +type token_alias = + | ALIAS of {token_: Scan.Token.t} +type token_colon_colon_eq = + | COLON_COLON_EQ of {token_: Scan.Token.t} +type token_of = + | OF of {token_: Scan.Token.t} +type token_colon = + | COLON of {token_: Scan.Token.t} +type token_dot = + | DOT of {token_: Scan.Token.t} +type token_arrow = + | ARROW of {token_: Scan.Token.t} +type token_bar = + | BAR of {token_: Scan.Token.t} +type token_lt = + | LT of {token_: Scan.Token.t} +type token_comma = + | COMMA of {token_: Scan.Token.t} +type token_semi = + | SEMI of {token_: Scan.Token.t} +type token_line_delim = + | LINE_DELIM of {token_: Scan.Token.t} +type token_indent = + | INDENT of {token_: Scan.Token.t} +type token_dedent = + | DEDENT of {token_: Scan.Token.t} +type token_lparen = + | LPAREN of {token_: Scan.Token.t} +type token_rparen = + | RPAREN of {token_: Scan.Token.t} +type token_lcapture = + | LCAPTURE of {token_: Scan.Token.t} +type token_rcapture = + | RCAPTURE of {token_: Scan.Token.t} +type token_lbrack = + | LBRACK of {token_: Scan.Token.t} +type token_rbrack = + | RBRACK of {token_: Scan.Token.t} +type token_larray = + | LARRAY of {token_: Scan.Token.t} +type token_rarray = + | RARRAY of {token_: Scan.Token.t} +type token_lcurly = + | LCURLY of {token_: Scan.Token.t} +type token_rcurly = + | RCURLY of {token_: Scan.Token.t} +type token_code_token = + | CODE_TOKEN of {token_: Scan.Token.t} +type token_eoi = + | EOI of {token_: Scan.Token.t} + +(* XXX Add nonterm_ prefixes. *) type uident = | Uident of {uident: Scan.Token.t} and cident = @@ -32,12 +153,12 @@ type uident = and prec_ref = | PrecRefPrecUident of {prec_: Scan.Token.t; uident: uident} | PrecRefEpsilon - and token_alias = + and nonterm_token_alias = | TokenAlias of {alias: Scan.Token.t} | TokenAliasEpsilon and token_ = - | Token of {token_: Scan.Token.t; cident: cident; token_alias: token_alias; of_type0: of_type0; - prec_ref: prec_ref} + | Token of {token_: Scan.Token.t; cident: cident; token_alias: nonterm_token_alias; + of_type0: of_type0; prec_ref: prec_ref} and sep = | SepLineDelim of {line_delim: Scan.Token.t} | SepSemi of {semi: Scan.Token.t} @@ -57,13 +178,15 @@ type uident = | DelimitedList of {lbrack: Scan.Token.t; codes0: codes0; rbrack: Scan.Token.t} | DelimitedArray of {larray: Scan.Token.t; codes0: codes0; rarray: Scan.Token.t} | DelimitedModule of {lcurly: Scan.Token.t; codes0: codes0; rcurly: Scan.Token.t} + and nonterm_code_token = + | CodeToken of {token_: Scan.Token.t} and code_tl = | CodeTlDelimited of {delimited: delimited; code_tl: code_tl} | CodeTlToken of {token_: Scan.Token.t; code_tl: code_tl} | CodeTlEpsilon and code = | CodeDelimited of {delimited: delimited; code_tl: code_tl} - | CodeToken of {token_: Scan.Token.t; code_tl: code_tl} + | CodeCodeToken of {token_: Scan.Token.t; code_tl: code_tl} and prod_param_symbol = | ProdParamSymbolCident of {cident: cident} | ProdParamSymbolAlias of {alias: Scan.Token.t} @@ -130,88 +253,109 @@ module ScanToken = Scan.Token include hocc (* hocc-specific keywords *) - token HOCC "hocc" of ScanToken.t - token NONTERM "nonterm" of ScanToken.t - token EPSILON_ "epsilon" of ScanToken.t - token START "start" of ScanToken.t - token TOKEN "token" of ScanToken.t - token NEUTRAL "neutral" of ScanToken.t - token LEFT "left" of ScanToken.t - token RIGHT "right" of ScanToken.t - token PREC "prec" of ScanToken.t + token HOCC "hocc" of X.token_hocc + token NONTERM "nonterm" of X.token_nonterm + token EPSILON_ "epsilon" of X.token_epsilon + token START "start" of X.token_start + token TOKEN "token" of X.token_token + token NEUTRAL "neutral" of X.token_neutral + token LEFT "left" of X.token_left + token RIGHT "right" of X.token_right + token PREC "prec" of X.token_prec (* Identifiers *) - token UIDENT of ScanToken.t # Uncapitalized - token CIDENT of ScanToken.t # Capitalized - token USCORE "_" of ScanToken.t + token UIDENT of X.token_uident # Uncapitalized + token CIDENT of X.token_cident # Capitalized + token USCORE "_" of X.token_uscore (* Token alias *) - token STRING of ScanToken.t + token ALIAS of X.token_alias (* Punctuation/separators *) - token COLON_COLON_EQ "::=" of ScanToken.t - token OF "of" of ScanToken.t - token COLON ":" of ScanToken.t - token DOT "." of ScanToken.t - token ARROW "->" of ScanToken.t - token BAR "|" of ScanToken.t - token LT "<" of ScanToken.t - token COMMA "," of ScanToken.t - token SEMI ";" of ScanToken.t - token LINE_DELIM of ScanToken.t + token COLON_COLON_EQ "::=" of X.token_colon_colon_eq + token OF "of" of X.token_of + token COLON ":" of X.token_colon + token DOT "." of X.token_dot + token ARROW "->" of X.token_arrow + neutral pBar + token BAR "|" of X.token_bar prec pBar + token LT "<" of X.token_lt + token COMMA "," of X.token_comma + neutral pSemi + token SEMI ";" of X.token_semi prec pSemi + neutral pLineDelim + token LINE_DELIM of X.token_line_delim prec pLineDelim (* Left-right paired delimiters *) - token INDENT of ScanToken.t - token DEDENT of ScanToken.t - token LPAREN "(" of ScanToken.t - token RPAREN ")" of ScanToken.t - token LCAPTURE "(|" of ScanToken.t - token RCAPTURE "|)" of ScanToken.t - token LBRACK "[" of ScanToken.t - token RBRACK "]" of ScanToken.t - token LARRAY "[|" of ScanToken.t - token RARRAY "|]" of ScanToken.t - token LCURLY "{" of ScanToken.t - token RCURLY "}" of ScanToken.t + token INDENT of X.token_indent + token DEDENT of X.token_dedent + token LPAREN "(" of X.token_lparen + token RPAREN ")" of X.token_rparen + token LCAPTURE "(|" of X.token_lcapture + token RCAPTURE "|)" of X.token_rcapture + token LBRACK "[" of X.token_lbrack + token RBRACK "]" of X.token_rbrack + token LARRAY "[|" of X.token_larray + token RARRAY "|]" of X.token_rarray + token LCURLY "{" of X.token_lcurly + token RCURLY "}" of X.token_rcurly (* Miscellaneous Hemlock token in embedded code *) - token CODE_TOKEN of ScanToken.t + token CODE_TOKEN of X.token_code_token (* End of input, used to terminate start symbols *) - token EOI of ScanToken.t + token EOI of X.token_eoi nonterm Uident of X.uident ::= - | uident:UIDENT -> Uident {uident} + | uident:UIDENT -> + let X.(UIDENT {token_=uident; _}) = uident in + Uident {uident} nonterm Cident of X.cident ::= - | cident:CIDENT -> Cident {cident} + | cident:CIDENT -> + let X.(CIDENT {token_=cident; _}) = cident in + Cident {cident} nonterm Ident of X.ident ::= | uident:Uident -> IdentUident {uident} | cident:Cident -> IdentCident {cident} - | uscore:"_" -> IdentUscore {uscore} + | uscore:"_" -> + let X.(USCORE {token_=uscore; _}) = uscore in + IdentUscore {uscore} nonterm PrecsTl of X.precs_tl ::= - | comma:"," uident:Uident precs_tl:PrecsTl -> PrecsTlCommaUident {comma; uident; precs_tl} + | comma:"," uident:Uident precs_tl:PrecsTl -> + let X.(COMMA {token_=comma; _}) = comma in + PrecsTlCommaUident {comma; uident; precs_tl} | epsilon -> PrecsTlEpsilon nonterm Precs of X.precs ::= | uident:Uident precs_tl:PrecsTl -> Precs {uident; precs_tl} nonterm PrecRels of X.prec_rels ::= - | lt:"<" precs:Precs -> PrecRelsLtPrecs {lt; precs} + | lt:"<" precs:Precs -> + let X.(LT {token_=lt; _}) = lt in + PrecRelsLtPrecs {lt; precs} | epsilon -> PrecRelsEpsilon nonterm PrecType of X.prec_type ::= - | neutral_:"neutral" -> PrecTypeNeutral {neutral_} - | left_:"left" -> PrecTypeLeft {left_} - | right_:"right" -> PrecTypeRight {right_} + | neutral_:"neutral" -> + let X.(NEUTRAL {token_=neutral_; _}) = neutral_ in + PrecTypeNeutral {neutral_} + | left_:"left" -> + let X.(LEFT {token_=left_; _}) = left_ in + PrecTypeLeft {left_} + | right_:"right" -> + let X.(RIGHT {token_=right_; _}) = right_ in + PrecTypeRight {right_} nonterm Prec of X.prec_ ::= | prec_type:PrecType uident:Uident prec_rels:PrecRels -> Prec {prec_type; uident; prec_rels} nonterm OfType of X.of_type ::= | of_:"of" type_module:Cident dot:"." type_type:Uident -> + let X.(OF {token_=of_; _}) = of_ in + let X.(DOT {token_=dot; _}) = dot in OfType {of_; type_module; dot; type_type} nonterm OfType0 of X.of_type0 ::= @@ -219,21 +363,32 @@ include hocc | epsilon -> OfType0Epsilon nonterm PrecRef of X.prec_ref ::= - | prec_:"prec" uident:Uident -> PrecRefPrecUident {prec_; uident} + | prec_:"prec" uident:Uident -> + let X.(PREC {token_=prec_; _}) = prec_ in + PrecRefPrecUident {prec_; uident} | epsilon -> PrecRefEpsilon - nonterm TokenAlias of X.token_alias ::= - | alias:STRING -> TokenAlias {alias} + nonterm TokenAlias of X.nonterm_token_alias ::= + | alias:ALIAS -> + let X.(ALIAS {token_=alias; _}) = alias in + TokenAlias {alias} | epsilon -> TokenAliasEpsilon nonterm Token of X.token_ ::= | token_:"token" cident:Cident token_alias:TokenAlias of_type0:OfType0 prec_ref:PrecRef -> + let X.(TOKEN {token_; _}) = token_ in Token {token_; cident; token_alias; of_type0; prec_ref} nonterm Sep of X.sep ::= - | line_delim:LINE_DELIM -> SepLineDelim {line_delim} - | semi:";" -> SepSemi {semi} - | bar:"|" -> SepBar {bar} + | line_delim:LINE_DELIM -> + let X.(LINE_DELIM {token_=line_delim; _}) = line_delim in + SepLineDelim {line_delim} + | semi:";" -> + let X.(SEMI {token_=semi; _}) = semi in + SepSemi {semi} + | bar:"|" -> + let X.(BAR {token_=bar; _}) = bar in + SepBar {bar} nonterm CodesTl of X.codes_tl ::= | sep:Sep code:Code codes_tl:CodesTl -> CodesTlSepCode {sep; code; codes_tl} @@ -247,28 +402,96 @@ include hocc | epsilon -> Codes0Epsilon nonterm Delimited of X.delimited ::= - | indent:INDENT codes:Codes dedent:DEDENT -> DelimitedBlock {indent; codes; dedent} - | lparen:"(" codes0:Codes0 rparen:")" -> DelimitedParen {lparen; codes0; rparen} - | lcapture:"(|" codes0:Codes0 rcapture:"|)" -> DelimitedCapture {lcapture; codes0; rcapture} - | lbrack:"[" codes0:Codes0 rbrack:"]" -> DelimitedList {lbrack; codes0; rbrack} - | larray:"[|" codes0:Codes0 rarray:"|]" -> DelimitedArray {larray; codes0; rarray} - | lcurly:"{" codes0:Codes0 rcurly:"}" -> DelimitedModule {lcurly; codes0; rcurly} - + | indent:INDENT codes:Codes dedent:DEDENT -> + let X.(INDENT {token_=indent; _}) = indent in + let X.(DEDENT {token_=dedent; _}) = dedent in + DelimitedBlock {indent; codes; dedent} + | lparen:"(" codes0:Codes0 rparen:")" -> + let X.(LPAREN {token_=lparen; _}) = lparen in + let X.(RPAREN {token_=rparen; _}) = rparen in + DelimitedParen {lparen; codes0; rparen} + | lcapture:"(|" codes0:Codes0 rcapture:"|)" -> + let X.(LCAPTURE {token_=lcapture; _}) = lcapture in + let X.(RCAPTURE {token_=rcapture; _}) = rcapture in + DelimitedCapture {lcapture; codes0; rcapture} + | lbrack:"[" codes0:Codes0 rbrack:"]" -> + let X.(LBRACK {token_=lbrack; _}) = lbrack in + let X.(RBRACK {token_=rbrack; _}) = rbrack in + DelimitedList {lbrack; codes0; rbrack} + | larray:"[|" codes0:Codes0 rarray:"|]" -> + let X.(LARRAY {token_=larray; _}) = larray in + let X.(RARRAY {token_=rarray; _}) = rarray in + DelimitedArray {larray; codes0; rarray} + | lcurly:"{" codes0:Codes0 rcurly:"}" -> + let X.(LCURLY {token_=lcurly; _}) = lcurly in + let X.(RCURLY {token_=rcurly; _}) = rcurly in + DelimitedModule {lcurly; codes0; rcurly} + + nonterm CodeToken of X.nonterm_code_token ::= + | token_:UIDENT -> + let X.(UIDENT {token_; _}) = token_ in + CodeToken {token_} + | token_:CIDENT -> + let X.(CIDENT {token_; _}) = token_ in + CodeToken {token_} + | token_:"_"-> + let X.(USCORE {token_; _}) = token_ in + CodeToken {token_} + | token_:"of" -> + let X.(OF {token_; _}) = token_ in + CodeToken {token_} + | token_:":" -> + let X.(COLON {token_; _}) = token_ in + CodeToken {token_} + | token_:"." -> + let X.(DOT {token_; _}) = token_ in + CodeToken {token_} + | token_:"->" -> + let X.(ARROW {token_; _}) = token_ in + CodeToken {token_} + | token_:"|" -> + let X.(BAR {token_; _}) = token_ in + CodeToken {token_} + | token_:"<" -> + let X.(LT {token_; _}) = token_ in + CodeToken {token_} + | token_:"," -> + let X.(COMMA {token_; _}) = token_ in + CodeToken {token_} + | token_:";" -> + let X.(SEMI {token_; _}) = token_ in + CodeToken {token_} + | token_:LINE_DELIM -> + let X.(LINE_DELIM {token_; _}) = token_ in + CodeToken {token_} + | token_:CODE_TOKEN -> + let X.(CODE_TOKEN {token_; _}) = token_ in + CodeToken {token_} + + neutral pCodeTl < pBar, pSemi, pLineDelim nonterm CodeTl of X.code_tl ::= | delimited:Delimited code_tl:CodeTl -> CodeTlDelimited {delimited; code_tl} - | token_:CODE_TOKEN code_tl:CodeTl -> CodeTlToken {token_; code_tl} - | epsilon -> CodeTlEpsilon + | code_token:CodeToken code_tl:CodeTl -> + let X.(CodeToken {token_}) = code_token in + CodeTlToken {token_; code_tl} + + | epsilon prec pCodeTl -> CodeTlEpsilon nonterm Code of X.code ::= | delimited:Delimited code_tl:CodeTl -> CodeDelimited {delimited; code_tl} - | token_:CODE_TOKEN code_tl:CodeTl -> CodeToken {token_; code_tl} + | code_token:CodeToken code_tl:CodeTl -> + let X.(CodeToken {token_}) = code_token in + CodeCodeToken {token_; code_tl} nonterm ProdParamSymbol of X.prod_param_symbol ::= | cident:Cident -> ProdParamSymbolCident {cident} - | alias:STRING -> ProdParamSymbolAlias {alias} + | alias:ALIAS -> + let X.(ALIAS {token_=alias; _}) = alias in + ProdParamSymbolAlias {alias} nonterm ProdParam of X.prod_param ::= | ident:Ident colon:":" prod_param_symbol:ProdParamSymbol -> + let X.(COLON {token_=colon; _}) = colon in ProdParamBinding {ident; colon; prod_param_symbol} | prod_param_symbol:ProdParamSymbol -> ProdParam {prod_param_symbol} @@ -283,24 +506,33 @@ include hocc nonterm ProdPattern of X.prod_pattern ::= | prod_params:ProdParams -> ProdPatternParams {prod_params} - | epsilon_:"epsilon" -> ProdPatternEpsilon {epsilon_} + | epsilon_:"epsilon" -> + let X.(EPSILON {token_=epsilon_; _}) = epsilon_ in + ProdPatternEpsilon {epsilon_} nonterm Prod of X.prod ::= | prod_pattern:ProdPattern prec_ref:PrecRef -> Prod {prod_pattern; prec_ref} nonterm ProdsTl of X.prods_tl ::= - | bar:"|" prod:Prod prods_tl:ProdsTl -> ProdsTlBarProd {bar; prod; prods_tl} + | bar:"|" prod:Prod prods_tl:ProdsTl -> + let X.(BAR {token_=bar; _}) = bar in + ProdsTlBarProd {bar; prod; prods_tl} | epsilon -> ProdsTlEpsilon nonterm Prods of X.prods ::= - | bar:"|" prod:Prod prods_tl:ProdsTl -> ProdsBarProd {bar; prod; prods_tl} + | bar:"|" prod:Prod prods_tl:ProdsTl -> + let X.(BAR {token_=bar; _}) = bar in + ProdsBarProd {bar; prod; prods_tl} | prod:Prod prods_tl:ProdsTl -> ProdsProd {prod; prods_tl} nonterm Reduction of X.reduction ::= - | prods:Prods arrow:"->" code:Code -> Reduction {prods; arrow; code} + | prods:Prods arrow:"->" code:Code -> + let X.(ARROW {token_=arrow; _}) = arrow in + Reduction {prods; arrow; code} nonterm ReductionsTl of X.reductions_tl ::= | bar:"|" reduction:Reduction reductions_tl:ReductionsTl -> + let X.(BAR {token_=bar; _}) = bar in ReductionsTlBarReduction {bar; reduction; reductions_tl} | epsilon -> ReductionsTlEpsilon @@ -309,14 +541,20 @@ include hocc ReductionsReduction {reduction; reductions_tl} nonterm NontermType of X.nonterm_type ::= - | nonterm_:"nonterm" -> NontermTypeNonterm {nonterm_} - | start_:"start" -> NontermTypeStart {start_} + | nonterm_:"nonterm" -> + let X.(NONTERM {token_=nonterm_; _}) = nonterm_ in + NontermTypeNonterm {nonterm_} + | start_:"start" -> + let X.(START {token_=start_; _}) = start_ in + NontermTypeStart {start_} nonterm Nonterm of X.nonterm_ ::= | nonterm_type:NontermType cident:Cident prec_ref:PrecRef cce:"::=" prods:Prods -> + let X.(COLON_COLON_EQ {token_=cce; _}) = cce in NontermProds {nonterm_type; cident; prec_ref; cce; prods} | nonterm_type:NontermType cident:Cident of_type:OfType prec_ref:PrecRef cce:"::=" reductions:Reductions -> + let X.(COLON_COLON_EQ {token_=cce; _}) = cce in NontermReductions {nonterm_type; cident; of_type; prec_ref; cce; reductions} nonterm Stmt of X.stmt ::= @@ -326,37 +564,108 @@ include hocc | code:Code -> StmtCode {code} nonterm StmtsTl of X.stmts_tl ::= - | line_delim:LINE_DELIM stmt:Stmt stmts_tl:StmtsTl -> StmtsTl {line_delim; stmt; stmts_tl} + | line_delim:LINE_DELIM stmt:Stmt stmts_tl:StmtsTl -> + let X.(LINE_DELIM {token_=line_delim; _}) = line_delim in + StmtsTl {line_delim; stmt; stmts_tl} | epsilon -> StmtsTlEpsilon nonterm Stmts of X.stmts ::= | stmt:Stmt stmts_tl:StmtsTl -> Stmts {stmt; stmts_tl} nonterm Hocc of X.hocc_ ::= - | hocc_:"hocc" indent:INDENT stmts:Stmts dedent:DEDENT -> Hocc {hocc_; indent; stmts; dedent} + | hocc_:"hocc" indent:INDENT stmts:Stmts dedent:DEDENT -> + let X.(HOCC {token_=hocc_; _}) = hocc_ in + let X.(INDENT {token_=indent; _}) = indent in + let X.(DEDENT {token_=dedent; _}) = dedent in + Hocc {hocc_; indent; stmts; dedent} nonterm Eoi of X.eoi ::= - | eoi:EOI -> Eoi {eoi} + | eoi:EOI -> + let X.(EOI {token_=eoi; _}) = eoi in + Eoi {eoi} nonterm Matter of X.matter ::= - | token_:CODE_TOKEN matter:Matter -> Matter {token_; matter} + | code_token:CodeToken matter:Matter -> + let X.(CodeToken {token_}) = code_token in + Matter {token_; matter} | epsilon -> MatterEpsilon start Hmh of X.hmh ::= | prelude:Matter hocc_:Hocc postlude:Matter eoi:Eoi -> Hmh {prelude; hocc_; postlude; eoi} start Hmhi of X.hmhi ::= - | prelude:Matter hocc_:"hocc" postlude:Matter eoi:Eoi -> Hmhi {prelude; hocc_; postlude; eoi} + | prelude:Matter hocc_:"hocc" postlude:Matter eoi:Eoi -> + let X.(HOCC {token_=hocc_; _}) = hocc_ in + Hmhi {prelude; hocc_; postlude; eoi} + +include X (* XXX Work around qualified type syntax limitations. *) +let token_of_scan_token token_ = + match token_ with + | Scan.Token.HmcToken {atok; _} -> begin + match atok with + | Tok_uident _ -> Token.UIDENT (UIDENT {token_}) + | Tok_cident _ -> Token.CIDENT (CIDENT {token_}) + | Tok_uscore -> Token.USCORE (USCORE {token_}) + | Tok_of -> Token.OF (OF {token_}) + | Tok_colon -> Token.COLON (COLON {token_}) + | Tok_dot -> Token.DOT (DOT {token_}) + | Tok_arrow -> Token.ARROW (ARROW {token_}) + | Tok_bar -> Token.BAR (BAR {token_}) + | Tok_lt -> Token.LT (LT {token_}) + | Tok_comma -> Token.COMMA (COMMA {token_}) + | Tok_semi -> Token.SEMI (SEMI {token_}) + | Tok_line_delim -> Token.LINE_DELIM (LINE_DELIM {token_}) + | Tok_indent _ -> Token.INDENT (INDENT {token_}) + | Tok_dedent _ -> Token.DEDENT (DEDENT {token_}) + | Tok_lparen -> Token.LPAREN (LPAREN {token_}) + | Tok_rparen -> Token.RPAREN (RPAREN {token_}) + | Tok_lcapture -> Token.LCAPTURE (LCAPTURE {token_}) + | Tok_rcapture -> Token.RCAPTURE (RCAPTURE {token_}) + | Tok_lbrack -> Token.LBRACK (LBRACK {token_}) + | Tok_rbrack -> Token.RBRACK (RBRACK {token_}) + | Tok_larray -> Token.LARRAY (LARRAY {token_}) + | Tok_rarray -> Token.RARRAY (RARRAY {token_}) + | Tok_lcurly -> Token.LCURLY (LCURLY {token_}) + | Tok_rcurly -> Token.RCURLY (RCURLY {token_}) + | _ -> Token.CODE_TOKEN (CODE_TOKEN {token_}) + end + | HoccToken {atok; _} -> begin + match atok with + | Tok_hocc -> Token.HOCC (HOCC {token_}) + | Tok_token -> Token.TOKEN (TOKEN {token_}) + | Tok_nonterm -> Token.NONTERM (NONTERM {token_}) + | Tok_start -> Token.START (START {token_}) + | Tok_epsilon -> Token.EPSILON_ (EPSILON {token_}) + | Tok_neutral -> Token.NEUTRAL (NEUTRAL {token_}) + | Tok_left -> Token.LEFT (LEFT {token_}) + | Tok_right -> Token.RIGHT (RIGHT {token_}) + | Tok_prec -> Token.PREC (PREC {token_}) + | Tok_colon_colon_eq -> Token.COLON_COLON_EQ (COLON_COLON_EQ {token_}) + end + +let hmhi scanner = + let rec inner scanner parser = begin + let scanner', scan_token = Scan.next scanner in + let token_ = token_of_scan_token scan_token in + let {status; _} as parser' = next token_ parser in + match status with + | Prefix -> inner scanner' parser' + | Accept (Hmhi hmhi) -> scanner', Ok hmhi + | Reject _ -> scanner, Error [(* XXX *)] + | _ -> not_reached () + end in + let parser = Start.Hmhi.boi in + inner scanner parser let hmh scanner = let rec inner scanner parser = begin - let scanner', tok = Scan.next scanner in - (* XXX Wrap scanner token in parser Token constructor. *) - let {status; _} as parser' = next tok parser in + let scanner', scan_token = Scan.next scanner in + let token_ = token_of_scan_token scan_token in + let {status; _} as parser' = next token_ parser in match status with | Prefix -> inner scanner' parser' - | Accept (Hmh hmh) -> scanner', (hmh, []) - | Reject _ -> halt "Parse error" + | Accept (Hmh hmh) -> scanner', Ok hmh + | Reject _ -> scanner, Error [(* XXX *)] | _ -> not_reached () end in let parser = Start.Hmh.boi in @@ -505,7 +814,7 @@ let rec source_of_uident = function | CodeDelimited {delimited; code_tl} -> source_of_delimited delimited |> join_sources (source_of_code_tl code_tl) - | CodeToken {token_; code_tl} -> + | CodeCodeToken {token_; code_tl} -> token_source token_ |> join_sources (source_of_code_tl code_tl) @@ -1009,8 +1318,8 @@ let rec fmt_lcurly ~alt ~width formatter = |> fmt_semi ~alt ~width |> Fmt.fmt "code_tl=" |> fmt_code_tl ~alt ~width:width' code_tl |> fmt_rcurly ~alt ~width - | CodeToken {token_; code_tl} -> - formatter |> Fmt.fmt "CodeToken " + | CodeCodeToken {token_; code_tl} -> + formatter |> Fmt.fmt "CodeCodeToken " |> fmt_lcurly ~alt ~width |> Fmt.fmt "token_=" |> Scan.Token.pp token_ |> fmt_semi ~alt ~width @@ -1409,7 +1718,7 @@ let base_of_code code = | DelimitedModule {lcurly=token_; _} -> of_token token_ and of_code = function | CodeDelimited {delimited; _} -> of_delimited delimited - | CodeToken {token_; _} -> of_token token_ + | CodeCodeToken {token_; _} -> of_token token_ in of_code code @@ -1470,7 +1779,7 @@ let last_token_of_code hocc_block code = and of_code = function | CodeDelimited {delimited; code_tl} -> of_code_tl code_tl |> Option.some_or_thunk ~f:(fun () -> Some (of_delimited delimited)) - | CodeToken {token_; code_tl} -> + | CodeCodeToken {token_; code_tl} -> of_code_tl code_tl |> Option.some_or_thunk ~f:(fun () -> Some token_) in of_code code @@ -1497,7 +1806,7 @@ let indentation_of_code hocc_block code = let min_comment_indentation = min_comment_indentation_of_hocc_block hocc_block in match code with | CodeDelimited _ -> min_comment_indentation + (extend_of_int 4) - | CodeToken _ -> min_comment_indentation + | CodeCodeToken _ -> min_comment_indentation (* Find the base cursor for the postlude that preserves comments/whitespace that fall outside the * `hocc` block. *)