diff --git a/bootstrap/src/hmc/scan.ml b/bootstrap/src/hmc/scan.ml index eee46ccb..157d3fe1 100644 --- a/bootstrap/src/hmc/scan.ml +++ b/bootstrap/src/hmc/scan.ml @@ -1618,6 +1618,143 @@ module Token = struct ) |> Fmt.fmt ">" + let source = function + | Tok_and {source} + | Tok_also {source} + | Tok_as {source} + | Tok_conceal {source} + | Tok_effect {source} + | Tok_else {source} + | Tok_expose {source} + | Tok_external {source} + | Tok_false {source} + | Tok_fn {source} + | Tok_function {source} + | Tok_if {source} + | Tok_import {source} + | Tok_include {source} + | Tok_lazy {source} + | Tok_let {source} + | Tok_match {source} + | Tok_mutability {source} + | Tok_of {source} + | Tok_open {source} + | Tok_or {source} + | Tok_rec {source} + | Tok_then {source} + | Tok_true {source} + | Tok_type {source} + | Tok_when {source} + | Tok_with {source} + | Tok_tilde_op {source; _} + | Tok_qmark_op {source; _} + | Tok_star_star_op {source; _} + | Tok_star_op {source; _} + | Tok_slash_op {source; _} + | Tok_pct_op {source; _} + | Tok_plus_op {source; _} + | Tok_minus_op {source; _} + | Tok_at_op {source; _} + | Tok_caret_op {source; _} + | Tok_dollar_op {source; _} + | Tok_lt_op {source; _} + | Tok_eq_op {source; _} + | Tok_gt_op {source; _} + | Tok_bar_op {source; _} + | Tok_colon_op {source; _} + | Tok_dot_op {source; _} + | Tok_tilde {source} + | Tok_qmark {source} + | Tok_minus {source} + | Tok_lt {source} + | Tok_lt_eq {source} + | Tok_eq {source} + | Tok_lt_gt {source} + | Tok_gt_eq {source} + | Tok_gt {source} + | Tok_comma {source} + | Tok_dot {source} + | Tok_dot_dot {source} + | Tok_semi {source} + | Tok_colon {source} + | Tok_colon_colon {source} + | Tok_colon_eq {source} + | Tok_lparen {source} + | Tok_rparen {source} + | Tok_lbrack {source} + | Tok_rbrack {source} + | Tok_lcurly {source} + | Tok_rcurly {source} + | Tok_bar {source} + | Tok_lcapture {source} + | Tok_rcapture {source} + | Tok_larray {source} + | Tok_rarray {source} + | Tok_bslash {source} + | Tok_tick {source} + | Tok_caret {source} + | Tok_amp {source} + | Tok_xmark {source} + | Tok_arrow {source} + | Tok_carrow {source} + | Tok_source_directive {source; _} + | Tok_line_delim {source} + | Tok_indent {source; _} + | Tok_dedent {source; _} + | Tok_whitespace {source} + | Tok_hash_comment {source} + | Tok_paren_comment {source; _} + | Tok_uscore {source} + | Tok_uident {source; _} + | Tok_cident {source; _} + | Tok_codepoint {source; _} + | Tok_rstring {source; _} + | Tok_istring {source; _} + | Tok_fstring_lditto {source} + | Tok_fstring_interpolated {source; _} + | Tok_fstring_pct {source} + | Tok_fstring_pad {source; _} + | Tok_fstring_just {source; _} + | Tok_fstring_sign {source; _} + | Tok_fstring_alt {source} + | Tok_fstring_zpad {source} + | Tok_fstring_width_star {source} + | Tok_fstring_width {source; _} + | Tok_fstring_pmode {source; _} + | Tok_fstring_precision_star {source} + | Tok_fstring_precision {source; _} + | Tok_fstring_radix {source; _} + | Tok_fstring_notation {source; _} + | Tok_fstring_pretty {source} + | Tok_fstring_fmt {source; _} + | Tok_fstring_sep {source; _} + | Tok_fstring_label {source; _} + | Tok_fstring_lparen_caret {source} + | Tok_fstring_caret_rparen {source} + | Tok_fstring_rditto {source} + | Tok_r32 {source; _} + | Tok_r64 {source; _} + | Tok_u8 {source; _} + | Tok_i8 {source; _} + | Tok_u16 {source; _} + | Tok_i16 {source; _} + | Tok_u32 {source; _} + | Tok_i32 {source; _} + | Tok_u64 {source; _} + | Tok_i64 {source; _} + | Tok_u128 {source; _} + | Tok_i128 {source; _} + | Tok_u256 {source; _} + | Tok_i256 {source; _} + | Tok_u512 {source; _} + | Tok_i512 {source; _} + | Tok_nat {source; _} + | Tok_zint {source; _} + | Tok_end_of_input {source} + | Tok_misaligned {source} + | Tok_error {source; _} + -> source + let malformations = function (* Keywords. *) | Tok_and _ | Tok_also _ | Tok_as _ | Tok_conceal _ | Tok_effect _ | Tok_else _ | Tok_expose _ @@ -1842,7 +1979,7 @@ let pp_line_state line_state formatter = * completing specifier scanning the state transition to `Fstring_body`, which is capable of * initiating the scan of a subsequent specifier. *) type fstring_state = - | Fstring_spec_pct_seen of ConcreteToken.t list + | Fstring_spec_pct_seen of Token.t list | Fstring_spec_pad_seen | Fstring_spec_just_seen | Fstring_spec_sign_seen @@ -1863,14 +2000,14 @@ type fstring_state = | Fstring_spec_fmt_seen | Fstring_spec_sep_seen | Fstring_expr_value of Source.Cursor.t option (* Cursor is start of captured value expression. *) - | Fstring_value_seen of ConcreteToken.t + | Fstring_value_seen of Token.t | Fstring_body - | Fstring_rditto_seen of ConcreteToken.t + | Fstring_rditto_seen of Token.t let pp_fstring_state fstring_state formatter = match fstring_state with | Fstring_spec_pct_seen ctoks -> - formatter |> Fmt.fmt "Fstring_spec_pct " |> (List.pp ConcreteToken.pp) ctoks + formatter |> Fmt.fmt "Fstring_spec_pct " |> (List.pp Token.pp) ctoks | Fstring_spec_pad_seen -> formatter |> Fmt.fmt "Fstring_spec_pad_seen" | Fstring_spec_just_seen -> formatter |> Fmt.fmt "Fstring_spec_just_seen" | Fstring_spec_sign_seen -> formatter |> Fmt.fmt "Fstring_spec_sign_seen" @@ -1892,9 +2029,9 @@ let pp_fstring_state fstring_state formatter = | Fstring_spec_sep_seen -> formatter |> Fmt.fmt "Fstring_spec_sep_seen" | Fstring_expr_value cursor_opt -> formatter |> Fmt.fmt "Fstring_exp_value " |> (Option.pp Source.Cursor.pp) cursor_opt - | Fstring_value_seen ctok -> formatter |> Fmt.fmt "Fstring_spec_pct " |> ConcreteToken.pp ctok + | Fstring_value_seen ctok -> formatter |> Fmt.fmt "Fstring_spec_pct " |> Token.pp ctok | Fstring_body -> formatter |> Fmt.fmt "Fstring_body" - | Fstring_rditto_seen ctok -> formatter |> Fmt.fmt "Fstring_spec_pct " |> ConcreteToken.pp ctok + | Fstring_rditto_seen ctok -> formatter |> Fmt.fmt "Fstring_spec_pct " |> Token.pp ctok type t = { tok_base: Source.Cursor.t; @@ -1947,10 +2084,10 @@ let in_fstring t = * Convenience routines for reporting malformations. *) let malformation ~base ~past description = - AbstractToken.Rendition.Malformation.init ~base ~past ~description + Token.Rendition.Malformation.init ~base ~past ~description let malformed malformation = - AbstractToken.Rendition.of_mals [malformation] + Token.Rendition.of_mals [malformation] let unexpected_codepoint_source_directive base past = malformation ~base ~past "Unexpected codepoint in source directive" @@ -2023,7 +2160,7 @@ module State = struct m: Realer.t; point_shift: sint; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2034,7 +2171,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~m = R {m; point_shift=Sint.zero} @@ -2083,7 +2220,7 @@ module State = struct m: real; ds: real; (* (ds * digit) scales digit to its fractional value. *) } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2094,7 +2231,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~m = R {m; ds=1. /. 10.} @@ -2120,7 +2257,7 @@ module State = struct | R of { m: Realer.t; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2130,7 +2267,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~m = R {m} @@ -2148,7 +2285,7 @@ module State = struct | R of { m: real; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2158,7 +2295,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~m = R {m} @@ -2177,7 +2314,7 @@ module State = struct m: Realer.t; exp_sign: Sign.t; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2188,7 +2325,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~m ~exp_sign = R {m; exp_sign} @@ -2207,7 +2344,7 @@ module State = struct m: real; exp_sign: Sign.t; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2218,7 +2355,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~m ~exp_sign = R {m; exp_sign} @@ -2238,7 +2375,7 @@ module State = struct exp_sign: Sign.t; exp: Nat.t; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2250,7 +2387,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~m ~exp_sign ~exp = R {m; exp_sign; exp} @@ -2286,7 +2423,7 @@ module State = struct exp_sign: Sign.t; exp: Nat.t; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2298,7 +2435,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~m ~exp_sign ~exp = R {m; exp_sign; exp} @@ -2332,7 +2469,7 @@ module State = struct | R of { r: Realer.t; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2342,7 +2479,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~r = R {r} @@ -2360,7 +2497,7 @@ module State = struct | R of { r: real; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2370,7 +2507,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~r = R {r} @@ -2389,7 +2526,7 @@ module State = struct r: Realer.t; bitwidth: Nat.t; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2400,7 +2537,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~r ~bitwidth = R {r; bitwidth} @@ -2423,7 +2560,7 @@ module State = struct r: real; bitwidth: Nat.t; } - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2434,7 +2571,7 @@ module State = struct |> Fmt.fmt "}" | Malformations mals -> formatter - |> Fmt.fmt "Malformations " |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "Malformations " |> (List.pp Token.Rendition.Malformation.pp) mals let init ~r ~bitwidth = R {r; bitwidth} @@ -2562,13 +2699,13 @@ module State = struct module Src_directive_path = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; } let pp {mals; path} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "}" @@ -2585,14 +2722,14 @@ module State = struct module Src_directive_path_bslash = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; bslash_cursor: Source.Cursor.t; } let pp {mals; path; bslash_cursor} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "; bslash_cursor=" |> Text.Pos.pp (Source.Cursor.pos bslash_cursor) |> Fmt.fmt "}" @@ -2610,14 +2747,14 @@ module State = struct module Src_directive_path_bslash_u_lcurly = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; bslash_cursor: Source.Cursor.t; } let pp {mals; path; bslash_cursor} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "; bslash_cursor=" |> Text.Pos.pp (Source.Cursor.pos bslash_cursor) |> Fmt.fmt "}" @@ -2631,7 +2768,7 @@ module State = struct module Src_directive_path_bslash_u_lcurly_hex = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; bslash_cursor: Source.Cursor.t; hex: Nat.t; @@ -2639,7 +2776,7 @@ module State = struct let pp {mals; path; bslash_cursor; hex} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "; bslash_cursor=" |> Text.Pos.pp (Source.Cursor.pos bslash_cursor) |> Fmt.fmt "; hex=" |> Nat.fmt ~alt:true ~radix:Radix.Hex ~pretty:true hex @@ -2666,7 +2803,7 @@ module State = struct module Src_directive_line = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; line_cursor: Source.Cursor.t; line: Nat.t option; @@ -2674,7 +2811,7 @@ module State = struct let pp {mals; path; line_cursor; line} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "; line_cursor=" |> Text.Pos.pp (Source.Cursor.pos line_cursor) |> Fmt.fmt "; line=" |> (Option.pp Nat.pp) line @@ -2689,14 +2826,14 @@ module State = struct module Src_directive_line_colon = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; line: Nat.t option; } let pp {mals; path; line} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "; line=" |> (Option.pp Nat.pp) line |> Fmt.fmt "}" @@ -2710,7 +2847,7 @@ module State = struct module Src_directive_indent = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; line: Nat.t option; indent_cursor: Source.Cursor.t; @@ -2719,7 +2856,7 @@ module State = struct let pp {mals; path; line; indent_cursor; indent} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "; line=" |> (Option.pp Nat.pp) line |> Fmt.fmt "; indent_cursor=" |> Text.Pos.pp (Source.Cursor.pos indent_cursor) @@ -2738,14 +2875,14 @@ module State = struct module Src_directive_indent_0 = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; line: Nat.t option; } let pp {mals; path; line} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "; line=" |> (Option.pp Nat.pp) line |> Fmt.fmt "}" @@ -2759,7 +2896,7 @@ module State = struct module Src_directive_indent_plus = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; line: Nat.t option; indent: Nat.t option; @@ -2767,7 +2904,7 @@ module State = struct let pp {mals; path; line; indent} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "; line=" |> (Option.pp Nat.pp) line |> Fmt.fmt "; indent=" |> (Option.pp Nat.pp) indent @@ -2782,7 +2919,7 @@ module State = struct module Src_directive_omit = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; path: codepoint list option; line: Nat.t option; indent: Nat.t option; @@ -2792,7 +2929,7 @@ module State = struct let pp {mals; path; line; indent; omit_cursor; omit} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; path=" |> (Option.pp (List.pp Codepoint.pp)) path |> Fmt.fmt "; line=" |> (Option.pp Nat.pp) line |> Fmt.fmt "; indent=" |> (Option.pp Nat.pp) indent @@ -2864,12 +3001,12 @@ module State = struct module Codepoint_mal = struct type t = { - mal: AbstractToken.Rendition.Malformation.t; + mal: Token.Rendition.Malformation.t; } let pp {mal} formatter = formatter - |> Fmt.fmt "{mal=" |> AbstractToken.Rendition.Malformation.pp mal |> Fmt.fmt "}" + |> Fmt.fmt "{mal=" |> Token.Rendition.Malformation.pp mal |> Fmt.fmt "}" let init ~mal = {mal} @@ -2877,13 +3014,13 @@ module State = struct module Rstring_ltag = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; ltag_base: Source.Cursor.t; } let pp {mals; ltag_base} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; ltag_base=" |> Source.Cursor.pp ltag_base |> Fmt.fmt "}" @@ -2896,14 +3033,14 @@ module State = struct module Rstring_body = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; ltag: Source.Slice.t; body_base: Source.Cursor.t; } let pp {mals; ltag; body_base} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; ltag=" |> Source.Slice.pp ltag |> Fmt.fmt "; body_base=" |> Source.Cursor.pp body_base |> Fmt.fmt "}" @@ -2917,7 +3054,7 @@ module State = struct module Rstring_rtag = struct type t = { - mals: AbstractToken.Rendition.Malformation.t list; + mals: Token.Rendition.Malformation.t list; ltag: Source.Slice.t; body: Source.Slice.t; ltag_cursor: Source.Cursor.t; @@ -2925,7 +3062,7 @@ module State = struct let pp {mals; ltag; ltag_cursor; body} formatter = formatter - |> Fmt.fmt "{mals=" |> (List.pp AbstractToken.Rendition.Malformation.pp) mals + |> Fmt.fmt "{mals=" |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt "; ltag=" |> Source.Slice.pp ltag |> Fmt.fmt "; body=" |> Source.Slice.pp body |> Fmt.fmt "; ltag_cursor=" |> Source.Cursor.pp ltag_cursor @@ -2944,7 +3081,7 @@ module State = struct module CodepointAccum = struct type t = | Codepoints of codepoint list - | Malformations of AbstractToken.Rendition.Malformation.t list + | Malformations of Token.Rendition.Malformation.t list let pp t formatter = match t with @@ -2952,7 +3089,7 @@ module State = struct formatter |> Fmt.fmt "(Codepoints " |> (List.pp Codepoint.pp) cps |> Fmt.fmt ")" | Malformations mals -> formatter |> Fmt.fmt "(Malformations " - |> (List.pp AbstractToken.Rendition.Malformation.pp) mals |> Fmt.fmt ")" + |> (List.pp Token.Rendition.Malformation.pp) mals |> Fmt.fmt ")" let empty = Codepoints [] @@ -2964,6 +3101,7 @@ module State = struct | Codepoints _ -> Malformations [mal] | Malformations mals -> Malformations (mal :: mals) +(* XXX let to_atok_istring = function | Codepoints cps -> AbstractToken.Tok_istring (Constant (String.of_list_rev cps)) | Malformations mals -> AbstractToken.Tok_istring (AbstractToken.Rendition.of_mals mals) @@ -2972,6 +3110,14 @@ module State = struct | Codepoints cps -> AbstractToken.Tok_fstring_interpolated (Constant (String.of_list_rev cps)) | Malformations mals -> AbstractToken.Tok_fstring_interpolated (AbstractToken.Rendition.of_mals mals) +*) + let to_istring = function + | Codepoints cps -> Token.Rendition.Constant (String.of_list_rev cps) + | Malformations mals -> Token.Rendition.of_mals mals + + let to_fstring_interpolated = function + | Codepoints cps -> Token.Rendition.Constant (String.of_list_rev cps) + | Malformations mals -> Token.Rendition.of_mals mals end module Istring_body = struct @@ -3448,7 +3594,7 @@ module Dfa = struct type transition = | Advance of View.t * State.t | Retry of State.t - | Accept of ConcreteToken.t + | Accept of Token.t type action0 = View.t -> t -> t * transition and node0 = { @@ -3514,6 +3660,7 @@ module Dfa = struct let t', transition = action1 state_payload view' t in t', transition +(* XXX Remove. let accept_ctok ctok cursor t = {t with tok_base=cursor}, Accept ctok @@ -3528,8 +3675,24 @@ module Dfa = struct let accept_pexcl atok View.{ppcursor; _} t = accept atok ppcursor t +*) + + let accept_tok tok cursor t = + {t with tok_base=cursor}, Accept tok + + let source_at cursor t = + Source.Slice.of_cursors ~base:t.tok_base ~past:cursor + + let source_incl View.{cursor; _} t = + source_at cursor t + + let source_excl View.{pcursor; _} t = + source_at pcursor t - let accept_source_directive atok View.{cursor; _} t = + let source_pexcl {View.ppcursor; _} t = + source_at ppcursor t + + let accept_source_directive source_directive View.{cursor; _} t = (* Treat the directive as having come from the unbiased source. Rebias the cursors such that * they are unbiased. This is different than unbiasing, in that it preserves the cursors' bias * chain, which enables recovering source bias when moving leftwards. *) @@ -3542,13 +3705,13 @@ module Dfa = struct end in f base cursor end in - let cursor', level' = match atok with - | AbstractToken.Tok_source_directive Constant {path=None; line=None; io=None} + let cursor', level' = match source_directive with + | Token.Rendition.Constant Token.{path=None; line=None; io=None} -> begin (* Rebias the source such that it is unbiased. *) past, Level.reset t.level end - | AbstractToken.Tok_source_directive Constant {path; line; io} -> begin + | Constant {path; line; io} -> begin let source = Source.Cursor.container past in let path = match path with | None -> Source.path source @@ -3570,17 +3733,19 @@ module Dfa = struct let source' = Source.bias ~path ~line_bias ~col_bias source in (Source.Cursor.bias source' past), Level.embed (indent / 4L) t.level end - | AbstractToken.Tok_source_directive Malformed _ -> past, t.level - | _ -> not_reached () + | Malformed _ -> past, t.level in - {t with tok_base=cursor'; level=level'}, Accept (ConcreteToken.ctok_at ~base ~past atok) + let source = Source.Slice.of_cursors ~base ~past in + let tok = Token.Tok_source_directive {source; source_directive} in + {t with tok_base=cursor'; level=level'}, Accept tok - let accept_line_break atok cursor t = + let accept_whitespace_incl View.{cursor; _} t = {t with tok_base=cursor; line_state=Line_begin}, - Accept (ConcreteToken.ctok_at ~base:t.tok_base ~past:cursor atok) + Accept (Token.Tok_whitespace {source=source_at cursor t}) - let accept_line_break_incl atok View.{cursor; _} t = - accept_line_break atok cursor t + let accept_hash_comment_incl View.{cursor; _} t = + {t with tok_base=cursor; line_state=Line_begin}, + Accept (Token.Tok_hash_comment {source=source_at cursor t}) let fstring_push fstring_state t = {t with fstring_states=fstring_state :: t.fstring_states} @@ -3591,27 +3756,59 @@ module Dfa = struct let fstring_trans fstring_state t = fstring_push fstring_state (fstring_pop t) +(* let accept_fstring_push_ctok fstring_state ctok cursor t = accept_ctok ctok cursor (fstring_push fstring_state t) +*) + let accept_fstring_push_tok fstring_state ctok cursor t = + accept_tok ctok cursor (fstring_push fstring_state t) + +(* let accept_fstring_push_ctok_incl fstring_state ctok View.{cursor; _} t = accept_fstring_push_ctok fstring_state ctok cursor t +*) + + let accept_fstring_push_tok_incl fstring_state tok View.{cursor; _} t = + accept_fstring_push_tok fstring_state tok cursor t +(* let accept_fstring_pop_ctok ctok cursor t = accept_ctok ctok cursor (fstring_pop t) +*) + + let accept_fstring_pop_tok tok cursor t = + accept_tok tok cursor (fstring_pop t) +(* let accept_fstring_pop atok cursor t = accept_fstring_pop_ctok (ConcreteToken.ctok_at ~base:t.tok_base ~past:cursor atok) cursor t +*) +(* let accept_fstring_trans_ctok fstring_state ctok cursor t = accept_ctok ctok cursor (fstring_trans fstring_state t) +*) + let accept_fstring_trans_tok fstring_state tok cursor t = + accept_tok tok cursor (fstring_trans fstring_state t) + +(* let accept_fstring_trans_ctok_incl fstring_state ctok View.{cursor; _} t = accept_fstring_trans_ctok fstring_state ctok cursor t +*) + + let accept_fstring_trans_tok_incl fstring_state tok View.{cursor; _} t = + accept_fstring_trans_tok fstring_state tok cursor t +(* let accept_fstring_trans fstring_state atok cursor t = accept_fstring_trans_ctok fstring_state (ConcreteToken.ctok_at ~base:t.tok_base ~past:cursor atok) cursor t +*) + + let accept_fstring_trans fstring_state tok cursor t = + accept_fstring_trans_tok fstring_state tok cursor t let accept_fstring_trans_incl trans atok View.{cursor; _} t = accept_fstring_trans trans atok cursor t @@ -3637,18 +3834,27 @@ module Dfa = struct let node0_start = { edges0=map_of_cps_alist [ - (",", accept_incl Tok_comma); - (";", accept_incl Tok_semi); + (",", fun (View.{cursor; _} as view) t -> + accept_tok (Tok_comma {source=source_incl view t}) cursor t); + (";", fun (View.{cursor; _} as view) t -> + accept_tok (Tok_semi {source=source_incl view t}) cursor t); ("(", advance State_lparen); - (")", accept_incl Tok_rparen); + (")", fun (View.{cursor; _} as view) t -> + accept_tok (Tok_rparen {source=source_incl view t}) cursor t); ("[", advance State_lbrack); - ("]", accept_incl Tok_rbrack); - ("{", accept_incl Tok_lcurly); - ("}", accept_incl Tok_rcurly); - ("\\", accept_incl Tok_bslash); - ("&", accept_incl Tok_amp); - ("!", accept_incl Tok_xmark); - ("\n", accept_line_break_incl Tok_whitespace); + ("]", fun (View.{cursor; _} as view) t -> + accept_tok (Tok_rbrack {source=source_incl view t}) cursor t); + ("{", fun (View.{cursor; _} as view) t -> + accept_tok (Tok_lcurly {source=source_incl view t}) cursor t); + ("}", fun (View.{cursor; _} as view) t -> + accept_tok (Tok_rcurly {source=source_incl view t}) cursor t); + ("\\", fun (View.{cursor; _} as view) t -> + accept_tok (Tok_bslash {source=source_incl view t}) cursor t); + ("&", fun (View.{cursor; _} as view) t -> + accept_tok (Tok_amp {source=source_incl view t}) cursor t); + ("!", fun (View.{cursor; _} as view) t -> + accept_tok (Tok_xmark {source=source_incl view t}) cursor t); + ("\n", accept_whitespace_incl); ("~", advance State_tilde); ("?", advance State_qmark); ("*", advance State_star); @@ -3682,16 +3888,18 @@ module Dfa = struct ]; default0=(fun (View.{cursor; _} as view) t -> let mal = malformation ~base:t.tok_base ~past:cursor "Unsupported codepoint" in - accept_incl (Tok_error [mal]) view t + accept_tok (Tok_error {source=source_incl view t; error=[mal]}) cursor t ); - eoi0=(fun view t -> - let accept_dentation atok View.{cursor; _} t = begin - accept atok cursor {t with line_state=Line_body} - end in + eoi0=(fun (View.{cursor; _} as view) t -> match Level.level t.level, t.line_state with - | 0L, Line_begin -> accept_dentation Tok_line_delim view t - | 0L, _ -> accept_incl Tok_end_of_input view t - | _ -> accept_dentation (Tok_dedent (Constant ())) view {t with level=Level.pred t.level} + | 0L, Line_begin -> accept_tok (Tok_line_delim {source=source_incl view t}) cursor t + | 0L, _ -> accept_tok (Tok_end_of_input {source=source_incl view t}) cursor t + | _ -> begin + accept_tok (Tok_dedent { + source=source_incl view t; + dedent=(Constant ()) + }) cursor {t with level=Level.pred t.level} + end ); } @@ -4630,7 +4838,10 @@ module Dfa = struct advance (State_real_dec_dot (State.Real_dec_dot.mals ~mals:[mal])) view t ); ]; + default0=(fun view t -> Token.Tok_r64 {source=source_excl view t; r64=(Constant 0.)}) +(*XXX default0=accept_excl (AbstractToken.Tok_r64 (Constant 0.)); +*) eoi0=accept_incl (AbstractToken.Tok_r64 (Constant 0.)); } @@ -5197,9 +5408,9 @@ module Dfa = struct | Some _, None | None, Some _ -> not_reached () in - AbstractToken.Tok_source_directive (Constant {path; line; io}) + Token.Rendition.Constant Token.{path; line; io} end - | _ :: _ -> AbstractToken.Tok_source_directive (AbstractToken.Rendition.of_mals mals) + | _ :: _ -> Token.Rendition.of_mals mals let node0_colon = { edges0=map_of_cps_alist [ @@ -5798,7 +6009,7 @@ module Dfa = struct match t.line_state with | Line_begin -> accept_incl Tok_whitespace view t | Line_whitespace - | Line_start_col _ -> accept_line_break_incl Tok_whitespace view t + | Line_start_col _ -> accept_whitespace_incl view t | Line_body -> not_reached () ); ]; @@ -5835,7 +6046,7 @@ module Dfa = struct let node0_space = { edges0=map_of_cps_alist [ (" ", advance State_dentation_space); - ("\n", accept_line_break_incl Tok_whitespace); + ("\n", accept_whitespace_incl); ]; default0=accept_whitespace_excl; eoi0=accept_whitespace_incl; @@ -5845,7 +6056,7 @@ module Dfa = struct let node0_whitespace = { edges0=map_of_cps_alist [ (" ", advance State_whitespace); - ("\n", accept_line_break_incl Tok_whitespace); + ("\n", accept_whitespace_incl); ]; default0=accept_excl Tok_whitespace; eoi0=accept_incl Tok_whitespace; @@ -5853,10 +6064,10 @@ module Dfa = struct let node0_hash_comment = { edges0=map_of_cps_alist [ - ("\n", accept_line_break_incl Tok_hash_comment); + ("\n", accept_hash_comment_incl); ]; default0=advance State_hash_comment; - eoi0=accept_line_break_incl Tok_hash_comment; + eoi0=accept_hash_comment_incl; } module Codepoint_ = struct @@ -6183,29 +6394,33 @@ module Dfa = struct match in_fstring t with | false -> begin let interpolated_base = Source.Cursor.succ t.tok_base in - let lditto_ctok = ConcreteToken.ctok_at ~base:t.tok_base ~past:interpolated_base - Tok_fstring_lditto in - let pct_ctok = ConcreteToken.ctok_at ~base:pcursor ~past:cursor Tok_fstring_pct in + let lditto_tok = + Token.Tok_fstring_lditto {source=source_at interpolated_base t} in + let pct_tok = Token.Tok_fstring_pct {source=Source.Slice.of_cursors + ~base:pcursor ~past:cursor} in let fstring_state = match Source.Cursor.(interpolated_base = pcursor) with - | true -> Fstring_spec_pct_seen [pct_ctok] + | true -> Fstring_spec_pct_seen [pct_tok] | false -> begin - let interpolated_ctok = ConcreteToken.ctok_at ~base:interpolated_base - ~past:pcursor (State.CodepointAccum.to_atok_fstring_interpolated accum) - in - Fstring_spec_pct_seen [interpolated_ctok; pct_ctok] + let interpolated_tok = Token.Tok_fstring_interpolated { + source=Source.Slice.of_cursors ~base:interpolated_base ~past:pcursor; + fstring_interpolated=State.CodepointAccum.to_fstring_interpolated accum + } in + Fstring_spec_pct_seen [interpolated_tok; pct_tok] end in - accept_fstring_push_ctok_incl fstring_state lditto_ctok view t + accept_fstring_push_ctok_incl fstring_state lditto_tok view t end | true -> begin - let pct_ctok = ConcreteToken.ctok_at ~base:pcursor ~past:cursor Tok_fstring_pct in + let pct_tok = Token.Tok_fstring_pct {source=Source.Slice.of_cursors ~base:pcursor + ~past:cursor} in let fstring_state, ctok = match Source.Cursor.(t.tok_base = pcursor) with - | true -> Fstring_spec_pct_seen [], pct_ctok + | true -> Fstring_spec_pct_seen [], pct_tok | false -> begin - let interpolated_ctok = ConcreteToken.ctok_at ~base:t.tok_base - ~past:pcursor (State.CodepointAccum.to_atok_fstring_interpolated accum) - in - Fstring_spec_pct_seen [pct_ctok], interpolated_ctok + let interpolated_tok = Token.Tok_fstring_interpolated { + source=source_excl view t; + fstring_interpolated=State.CodepointAccum.to_fstring_interpolated accum + } in + Fstring_spec_pct_seen [pct_tok], interpolated_tok end in accept_fstring_trans_ctok_incl fstring_state ctok view t @@ -6214,17 +6429,19 @@ module Dfa = struct ("\"", fun {accum} (View.{pcursor; cursor; _} as view) t -> match in_fstring t with | true -> begin - let rditto_ctok = ConcreteToken.ctok_at ~base:pcursor ~past:cursor - Tok_fstring_rditto in + let rditto_tok = Token.Tok_fstring_rditto {source=Source.Slice.of_cursors + ~base:pcursor ~past:cursor} in let fstring_states' = List.tl t.fstring_states in match Source.Cursor.(t.tok_base = pcursor) with | true -> - {t with tok_base=cursor; fstring_states=fstring_states'}, Accept rditto_ctok + {t with tok_base=cursor; fstring_states=fstring_states'}, Accept rditto_tok | false -> begin - let interpolated_ctok = ConcreteToken.ctok_at ~base:t.tok_base - ~past:pcursor (State.CodepointAccum.to_atok_fstring_interpolated accum) in - accept_fstring_trans_ctok_incl (Fstring_rditto_seen rditto_ctok) - interpolated_ctok view t + let interpolated_tok = Token.Tok_fstring_interpolated { + source=source_excl view t; + fstring_interpolated=State.CodepointAccum.to_fstring_interpolated accum + } in + accept_fstring_trans_ctok_incl (Fstring_rditto_seen rditto_tok) + interpolated_tok view t end end | false -> accept_istring_incl accum view t diff --git a/bootstrap/src/hmc/scan.mli b/bootstrap/src/hmc/scan.mli index a1b99d63..ff26cbed 100644 --- a/bootstrap/src/hmc/scan.mli +++ b/bootstrap/src/hmc/scan.mli @@ -461,6 +461,8 @@ module Token : sig val pp: t -> (module Fmt.Formatter) -> (module Fmt.Formatter) + val source: t -> Source.Slice.t + val malformations: t -> Rendition.Malformation.t list (** [malformations t] returns a list of malformations associated with [t], or an empty list if there are no malformations. This function can be used on any token variant, even if no @@ -481,7 +483,7 @@ val cursor: t -> Source.Cursor.t (** [cursor t] returns the cursor at the scanner's current position. This cursor is equivalent to the base of the token returned by [next t]. *) -val next: t -> t * ConcreteToken.t +val next: t -> t * Token.t (** [next t] scans the next token past the tokens scanned by [t]'s predecessor state(s) and returns the scanner's successor state along with a token. If [t] is at the end of input, there is no successor state, and [t, EndOfInput] is returned. *)